import { describe, expect, it } from "bun:test"; import { stripInvisibleCharacters, stripMarkdownImageAltText, stripMarkdownLinkTitles, stripHiddenAttributes, normalizeHtmlEntities, sanitizeContent, stripHtmlComments, } from "../src/github/utils/sanitizer"; describe("stripInvisibleCharacters", () => { it("should remove zero-width characters", () => { expect(stripInvisibleCharacters("Hello\u200BWorld")).toBe("HelloWorld"); expect(stripInvisibleCharacters("Text\u200C\u200D")).toBe("Text"); expect(stripInvisibleCharacters("\uFEFFStart")).toBe("Start"); }); it("should remove control characters", () => { expect(stripInvisibleCharacters("Hello\u0000World")).toBe("HelloWorld"); expect(stripInvisibleCharacters("Text\u001F\u007F")).toBe("Text"); }); it("should preserve common whitespace", () => { expect(stripInvisibleCharacters("Hello\nWorld")).toBe("Hello\nWorld"); expect(stripInvisibleCharacters("Tab\there")).toBe("Tab\there"); expect(stripInvisibleCharacters("Carriage\rReturn")).toBe( "Carriage\rReturn", ); }); it("should remove soft hyphens", () => { expect(stripInvisibleCharacters("Soft\u00ADHyphen")).toBe("SoftHyphen"); }); it("should remove Unicode direction overrides", () => { expect(stripInvisibleCharacters("Text\u202A\u202BMore")).toBe("TextMore"); expect(stripInvisibleCharacters("\u2066Isolated\u2069")).toBe("Isolated"); }); }); describe("stripMarkdownImageAltText", () => { it("should remove alt text from markdown images", () => { expect(stripMarkdownImageAltText("![example alt text](image.png)")).toBe( "![](image.png)", ); expect( stripMarkdownImageAltText("Text ![description](pic.jpg) more text"), ).toBe("Text ![](pic.jpg) more text"); }); it("should handle multiple images", () => { expect(stripMarkdownImageAltText("![one](1.png) ![two](2.png)")).toBe( "![](1.png) ![](2.png)", ); }); it("should handle empty alt text", () => { expect(stripMarkdownImageAltText("![](image.png)")).toBe("![](image.png)"); }); }); describe("stripMarkdownLinkTitles", () => { it("should remove titles from markdown links", () => { expect(stripMarkdownLinkTitles('[Link](url.com "example title")')).toBe( "[Link](url.com)", ); expect(stripMarkdownLinkTitles("[Link](url.com 'example title')")).toBe( "[Link](url.com)", ); }); it("should handle multiple links", () => { expect( stripMarkdownLinkTitles('[One](1.com "first") [Two](2.com "second")'), ).toBe("[One](1.com) [Two](2.com)"); }); it("should preserve links without titles", () => { expect(stripMarkdownLinkTitles("[Link](url.com)")).toBe("[Link](url.com)"); }); }); describe("stripHiddenAttributes", () => { it("should remove alt attributes", () => { expect( stripHiddenAttributes(' example text

'), ).toBe('

'); expect(stripHiddenAttributes(" example

")).toBe( '

', ); expect(stripHiddenAttributes(' example

')).toBe( '

', ); }); it("should remove title attributes", () => { expect( stripHiddenAttributes('Link'), ).toBe('Link'); expect(stripHiddenAttributes("

Content

")).toBe( "

Content

", ); }); it("should remove aria-label attributes", () => { expect( stripHiddenAttributes(''), ).toBe(""); }); it("should remove data-* attributes", () => { expect( stripHiddenAttributes( '

Text

', ), ).toBe("

Text

"); }); it("should remove placeholder attributes", () => { expect( stripHiddenAttributes(''), ).toBe(''); }); it("should handle multiple attributes", () => { expect( stripHiddenAttributes( ' example

', ), ).toBe('

'); }); }); describe("normalizeHtmlEntities", () => { it("should decode numeric entities", () => { expect(normalizeHtmlEntities("Hello")).toBe( "Hello", ); expect(normalizeHtmlEntities("ABC")).toBe("ABC"); }); it("should decode hex entities", () => { expect(normalizeHtmlEntities("Hello")).toBe( "Hello", ); expect(normalizeHtmlEntities("ABC")).toBe("ABC"); }); it("should remove non-printable entities", () => { expect(normalizeHtmlEntities("�")).toBe(""); expect(normalizeHtmlEntities("�")).toBe(""); }); it("should preserve normal text", () => { expect(normalizeHtmlEntities("Normal text")).toBe("Normal text"); }); }); describe("sanitizeContent", () => { it("should apply all sanitization measures", () => { const testContent = ` example alt text

![example image description](screenshot.png) [click here](https://example.com "example title")

Normal text with hidden\u200Bcharacters

Hidden message `; const sanitized = sanitizeContent(testContent); expect(sanitized).not.toContain(""); expect(sanitized).not.toContain("example alt text"); expect(sanitized).not.toContain("example image description"); expect(sanitized).not.toContain("example title"); expect(sanitized).not.toContain("example data"); expect(sanitized).not.toContain("example label"); expect(sanitized).not.toContain("\u200B"); expect(sanitized).not.toContain("alt="); expect(sanitized).not.toContain("data-prompt="); expect(sanitized).not.toContain("aria-label="); expect(sanitized).toContain("Normal text with hiddencharacters"); expect(sanitized).toContain("Hidden message"); expect(sanitized).toContain('

'); expect(sanitized).toContain("![](screenshot.png)"); expect(sanitized).toContain("[click here](https://example.com)"); }); it("should handle complex nested patterns", () => { const complexContent = ` Text with ![alt \u200B text](image.png) and more. Link

Content

`; const sanitized = sanitizeContent(complexContent); expect(sanitized).not.toContain("\u200B"); expect(sanitized).not.toContain("\u00AD"); expect(sanitized).not.toContain("alt "); expect(sanitized).not.toContain('title="'); expect(sanitized).not.toContain('data-x="'); expect(sanitized).toContain("![](image.png)"); expect(sanitized).toContain('Link'); }); it("should preserve legitimate markdown and HTML", () => { const legitimateContent = ` # Heading This is **bold** and *italic* text. Here's a normal image: ![](normal.jpg) And a normal link: [Click here](https://example.com)

Normal paragraph

`; const sanitized = sanitizeContent(legitimateContent); expect(sanitized).toBe(legitimateContent); }); it("should handle entity-encoded text", () => { const encodedText = ` Hidden message

Test

`; const sanitized = sanitizeContent(encodedText); expect(sanitized).toContain("Hidden message"); expect(sanitized).not.toContain('title="'); expect(sanitized).toContain("

Test

"); }); }); describe("stripHtmlComments (legacy)", () => { it("should remove HTML comments", () => { expect(stripHtmlComments("Hello World")).toBe( "Hello World", ); expect(stripHtmlComments("Text")).toBe("Text"); expect(stripHtmlComments("Text")).toBe("Text"); }); it("should handle multiline comments", () => { expect(stripHtmlComments("Hello World")).toBe( "Hello World", ); }); });