();
-
- const result = formatBody(body, imageUrlMap);
- expect(result).toBe("Issue description visible text");
- });
-
- test("strips HTML comments and replaces images", () => {
- const body = `Check this `;
- const imageUrlMap = new Map([
- [
- "https://github.com/user-attachments/assets/test.png",
- "/tmp/github-images/image-1234-0.png",
- ],
- ]);
-
- const result = formatBody(body, imageUrlMap);
- expect(result).toBe(
- "Check this ",
- );
- });
-});
-
-describe("formatComments with HTML comment stripping", () => {
- test("strips HTML comments from comment bodies", () => {
- const comments: GitHubComment[] = [
- {
- id: "1",
- databaseId: "100001",
- body: "Good work on this PR",
- author: { login: "user1" },
- createdAt: "2023-01-01T00:00:00Z",
- },
- ];
-
- const result = formatComments(comments);
- expect(result).toBe(
- "[user1 at 2023-01-01T00:00:00Z]: Good work on this PR",
- );
- });
-});
-
-describe("formatReviewComments with HTML comment stripping", () => {
- test("strips HTML comments from review comment bodies", () => {
- const reviewData = {
- nodes: [
- {
- id: "review1",
- databaseId: "300001",
- author: { login: "reviewer1" },
- body: "LGTM",
- state: "APPROVED",
- submittedAt: "2023-01-01T00:00:00Z",
- comments: {
- nodes: [
- {
- id: "comment1",
- databaseId: "200001",
- body: "Nice work here",
- author: { login: "reviewer1" },
- createdAt: "2023-01-01T00:00:00Z",
- path: "src/index.ts",
- line: 42,
- },
- ],
- },
- },
- ],
- };
-
- const result = formatReviewComments(reviewData);
- expect(result).toBe(
- `[Review by reviewer1 at 2023-01-01T00:00:00Z]: APPROVED\n [Comment on src/index.ts:42]: Nice work here`,
- );
- });
-});
diff --git a/test/integration-sanitization.test.ts b/test/integration-sanitization.test.ts
new file mode 100644
index 0000000..13ba45a
--- /dev/null
+++ b/test/integration-sanitization.test.ts
@@ -0,0 +1,134 @@
+import { describe, expect, it } from "bun:test";
+import { formatBody, formatComments } from "../src/github/data/formatter";
+import type { GitHubComment } from "../src/github/types";
+
+describe("Sanitization Integration", () => {
+ it("should sanitize complete issue/PR body with various hidden content patterns", () => {
+ const issueBody = `
+# Feature Request: Add user dashboard
+
+## Description
+We need a new dashboard for users to track their activity.
+
+
+
+## Technical Details
+The dashboard should display:
+- User statistics 
+- Activity graphs
+- Recent actions
+
+## Implementation Notes
+See [documentation](https://docs.example.com "internal docs title") for API details.
+
+
+ The implementation should follow our standard patterns.
+
+
+Additional notes: Textwithsofthyphens and Hidden encoded content.
+
+
+
+Direction override test: reversed text should be normalized.`;
+
+ const imageUrlMap = new Map();
+ const result = formatBody(issueBody, imageUrlMap);
+
+ // Verify hidden content is removed
+ expect(result).not.toContain("
+
+I've updated the proposal based on your suggestions.
+
+Test note: All systems checked.
+
+Ready for implementation`,
+ author: { login: "author1" },
+ createdAt: "2023-01-01T12:00:00Z",
+ },
+ ];
+
+ const result = formatComments(comments);
+
+ // Verify hidden content is removed
+ expect(result).not.toContain("
+
+ 
+ [click here](https://example.com "example title")
+
+ Normal text with hidden\u200Bcharacters
+
+ Hidden message
+ `;
+
+ const sanitized = sanitizeContent(testContent);
+
+ expect(sanitized).not.toContain("");
+ expect(sanitized).not.toContain("example alt text");
+ expect(sanitized).not.toContain("example image description");
+ expect(sanitized).not.toContain("example title");
+ expect(sanitized).not.toContain("example data");
+ expect(sanitized).not.toContain("example label");
+ expect(sanitized).not.toContain("\u200B");
+ expect(sanitized).not.toContain("alt=");
+ expect(sanitized).not.toContain("data-prompt=");
+ expect(sanitized).not.toContain("aria-label=");
+
+ expect(sanitized).toContain("Normal text with hiddencharacters");
+ expect(sanitized).toContain("Hidden message");
+ expect(sanitized).toContain('
');
+ expect(sanitized).toContain("");
+ expect(sanitized).toContain("[click here](https://example.com)");
+ });
+
+ it("should handle complex nested patterns", () => {
+ const complexContent = `
+ Text with  and more.
+ Link
+ Content
+ `;
+
+ const sanitized = sanitizeContent(complexContent);
+
+ expect(sanitized).not.toContain("\u200B");
+ expect(sanitized).not.toContain("\u00AD");
+ expect(sanitized).not.toContain("alt ");
+ expect(sanitized).not.toContain('title="');
+ expect(sanitized).not.toContain('data-x="');
+ expect(sanitized).toContain("");
+ expect(sanitized).toContain('Link');
+ });
+
+ it("should preserve legitimate markdown and HTML", () => {
+ const legitimateContent = `
+ # Heading
+
+ This is **bold** and *italic* text.
+
+ Here's a normal image: 
+ And a normal link: [Click here](https://example.com)
+
+
+ `;
+
+ const sanitized = sanitizeContent(legitimateContent);
+
+ expect(sanitized).toBe(legitimateContent);
+ });
+
+ it("should handle entity-encoded text", () => {
+ const encodedText = `
+ Hidden message
+ Test
+ `;
+
+ const sanitized = sanitizeContent(encodedText);
+
+ expect(sanitized).toContain("Hidden message");
+ expect(sanitized).not.toContain('title="');
+ expect(sanitized).toContain("Test
");
+ });
+});
+
+describe("stripHtmlComments (legacy)", () => {
+ it("should remove HTML comments", () => {
+ expect(stripHtmlComments("Hello World")).toBe(
+ "Hello World",
+ );
+ expect(stripHtmlComments("Text")).toBe("Text");
+ expect(stripHtmlComments("Text")).toBe("Text");
+ });
+
+ it("should handle multiline comments", () => {
+ expect(stripHtmlComments("Hello World")).toBe(
+ "Hello World",
+ );
+ });
+});