import { describe, expect, it } from "bun:test";
import {
stripInvisibleCharacters,
stripMarkdownImageAltText,
stripMarkdownLinkTitles,
stripHiddenAttributes,
normalizeHtmlEntities,
sanitizeContent,
stripHtmlComments,
} from "../src/github/utils/sanitizer";
describe("stripInvisibleCharacters", () => {
it("should remove zero-width characters", () => {
expect(stripInvisibleCharacters("Hello\u200BWorld")).toBe("HelloWorld");
expect(stripInvisibleCharacters("Text\u200C\u200D")).toBe("Text");
expect(stripInvisibleCharacters("\uFEFFStart")).toBe("Start");
});
it("should remove control characters", () => {
expect(stripInvisibleCharacters("Hello\u0000World")).toBe("HelloWorld");
expect(stripInvisibleCharacters("Text\u001F\u007F")).toBe("Text");
});
it("should preserve common whitespace", () => {
expect(stripInvisibleCharacters("Hello\nWorld")).toBe("Hello\nWorld");
expect(stripInvisibleCharacters("Tab\there")).toBe("Tab\there");
expect(stripInvisibleCharacters("Carriage\rReturn")).toBe(
"Carriage\rReturn",
);
});
it("should remove soft hyphens", () => {
expect(stripInvisibleCharacters("Soft\u00ADHyphen")).toBe("SoftHyphen");
});
it("should remove Unicode direction overrides", () => {
expect(stripInvisibleCharacters("Text\u202A\u202BMore")).toBe("TextMore");
expect(stripInvisibleCharacters("\u2066Isolated\u2069")).toBe("Isolated");
});
});
describe("stripMarkdownImageAltText", () => {
it("should remove alt text from markdown images", () => {
expect(stripMarkdownImageAltText("")).toBe(
"",
);
expect(
stripMarkdownImageAltText("Text  more text"),
).toBe("Text  more text");
});
it("should handle multiple images", () => {
expect(stripMarkdownImageAltText(" ")).toBe(
" ",
);
});
it("should handle empty alt text", () => {
expect(stripMarkdownImageAltText("")).toBe("");
});
});
describe("stripMarkdownLinkTitles", () => {
it("should remove titles from markdown links", () => {
expect(stripMarkdownLinkTitles('[Link](url.com "example title")')).toBe(
"[Link](url.com)",
);
expect(stripMarkdownLinkTitles("[Link](url.com 'example title')")).toBe(
"[Link](url.com)",
);
});
it("should handle multiple links", () => {
expect(
stripMarkdownLinkTitles('[One](1.com "first") [Two](2.com "second")'),
).toBe("[One](1.com) [Two](2.com)");
});
it("should preserve links without titles", () => {
expect(stripMarkdownLinkTitles("[Link](url.com)")).toBe("[Link](url.com)");
});
});
describe("stripHiddenAttributes", () => {
it("should remove alt attributes", () => {
expect(
stripHiddenAttributes('
'),
).toBe('
');
expect(stripHiddenAttributes("
")).toBe(
'
',
);
expect(stripHiddenAttributes('
')).toBe(
'
',
);
});
it("should remove title attributes", () => {
expect(
stripHiddenAttributes('Link'),
).toBe('Link');
expect(stripHiddenAttributes("
',
),
).toBe('
');
});
});
describe("normalizeHtmlEntities", () => {
it("should decode numeric entities", () => {
expect(normalizeHtmlEntities("Hello")).toBe(
"Hello",
);
expect(normalizeHtmlEntities("ABC")).toBe("ABC");
});
it("should decode hex entities", () => {
expect(normalizeHtmlEntities("Hello")).toBe(
"Hello",
);
expect(normalizeHtmlEntities("ABC")).toBe("ABC");
});
it("should remove non-printable entities", () => {
expect(normalizeHtmlEntities("")).toBe("");
expect(normalizeHtmlEntities("")).toBe("");
});
it("should preserve normal text", () => {
expect(normalizeHtmlEntities("Normal text")).toBe("Normal text");
});
});
describe("sanitizeContent", () => {
it("should apply all sanitization measures", () => {
const testContent = `

[click here](https://example.com "example title")
');
expect(sanitized).toContain("");
expect(sanitized).toContain("[click here](https://example.com)");
});
it("should handle complex nested patterns", () => {
const complexContent = `
Text with  and more.
Link
Normal paragraph