Add enhanced text sanitization (#83)
* Add enhanced text sanitization * Format code with prettier * Refactor tests to remove redundancy and improve structure - Remove redundant 'mixed input patterns' test from sanitizer.test.ts - Consolidate integration tests into 2 focused real-world scenarios - Add HTML comment stripping to sanitizeContent function - Update test expectations to match sanitization behavior - Maintain full coverage with fewer, more focused tests * Fix prettier formatting * Remove rendered.html from repository * Remove test-markdown.json and update .gitignore * Revert .gitignore changes
This commit is contained in:
65
src/github/utils/sanitizer.ts
Normal file
65
src/github/utils/sanitizer.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
export function stripInvisibleCharacters(content: string): string {
|
||||
content = content.replace(/[\u200B\u200C\u200D\uFEFF]/g, "");
|
||||
content = content.replace(
|
||||
/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F]/g,
|
||||
"",
|
||||
);
|
||||
content = content.replace(/\u00AD/g, "");
|
||||
content = content.replace(/[\u202A-\u202E\u2066-\u2069]/g, "");
|
||||
return content;
|
||||
}
|
||||
|
||||
export function stripMarkdownImageAltText(content: string): string {
|
||||
return content.replace(/!\[[^\]]*\]\(/g, ";
|
||||
}
|
||||
|
||||
export function stripMarkdownLinkTitles(content: string): string {
|
||||
content = content.replace(/(\[[^\]]*\]\([^)]+)\s+"[^"]*"/g, "$1");
|
||||
content = content.replace(/(\[[^\]]*\]\([^)]+)\s+'[^']*'/g, "$1");
|
||||
return content;
|
||||
}
|
||||
|
||||
export function stripHiddenAttributes(content: string): string {
|
||||
content = content.replace(/\salt\s*=\s*["'][^"']*["']/gi, "");
|
||||
content = content.replace(/\salt\s*=\s*[^\s>]+/gi, "");
|
||||
content = content.replace(/\stitle\s*=\s*["'][^"']*["']/gi, "");
|
||||
content = content.replace(/\stitle\s*=\s*[^\s>]+/gi, "");
|
||||
content = content.replace(/\saria-label\s*=\s*["'][^"']*["']/gi, "");
|
||||
content = content.replace(/\saria-label\s*=\s*[^\s>]+/gi, "");
|
||||
content = content.replace(/\sdata-[a-zA-Z0-9-]+\s*=\s*["'][^"']*["']/gi, "");
|
||||
content = content.replace(/\sdata-[a-zA-Z0-9-]+\s*=\s*[^\s>]+/gi, "");
|
||||
content = content.replace(/\splaceholder\s*=\s*["'][^"']*["']/gi, "");
|
||||
content = content.replace(/\splaceholder\s*=\s*[^\s>]+/gi, "");
|
||||
return content;
|
||||
}
|
||||
|
||||
export function normalizeHtmlEntities(content: string): string {
|
||||
content = content.replace(/&#(\d+);/g, (_, dec) => {
|
||||
const num = parseInt(dec, 10);
|
||||
if (num >= 32 && num <= 126) {
|
||||
return String.fromCharCode(num);
|
||||
}
|
||||
return "";
|
||||
});
|
||||
content = content.replace(/&#x([0-9a-fA-F]+);/g, (_, hex) => {
|
||||
const num = parseInt(hex, 16);
|
||||
if (num >= 32 && num <= 126) {
|
||||
return String.fromCharCode(num);
|
||||
}
|
||||
return "";
|
||||
});
|
||||
return content;
|
||||
}
|
||||
|
||||
export function sanitizeContent(content: string): string {
|
||||
content = stripHtmlComments(content);
|
||||
content = stripInvisibleCharacters(content);
|
||||
content = stripMarkdownImageAltText(content);
|
||||
content = stripMarkdownLinkTitles(content);
|
||||
content = stripHiddenAttributes(content);
|
||||
content = normalizeHtmlEntities(content);
|
||||
return content;
|
||||
}
|
||||
|
||||
export const stripHtmlComments = (content: string) =>
|
||||
content.replace(/<!--[\s\S]*?-->/g, "");
|
||||
Reference in New Issue
Block a user