import { describe, expect, it } from "vitest"; import { markdownToTelegramChunks, markdownToTelegramHtml, renderTelegramHtmlText, wrapFileReferencesInHtml, } from "./format.js"; describe("wrapFileReferencesInHtml", () => { it("wraps supported file references and paths", () => { const cases = [ ["Check README.md", "Check README.md"], ["See HEARTBEAT.md for status", "See HEARTBEAT.md for status"], ["Check main.go", "Check main.go"], ["Run script.py", "Run script.py"], ["Check backup.pl", "Check backup.pl"], ["Run backup.sh", "Run backup.sh"], ["Look at squad/friday/HEARTBEAT.md", "Look at squad/friday/HEARTBEAT.md"], ] as const; for (const [input, expected] of cases) { expect(wrapFileReferencesInHtml(input), input).toContain(expected); } }); it("does not wrap inside protected html contexts", () => { const cases = [ "Already wrapped.md here", "
README.md
", 'Link', 'Visit example.com/README.md', ] as const; for (const input of cases) { const result = wrapFileReferencesInHtml(input); expect(result, input).toBe(input); } expect(wrapFileReferencesInHtml(cases[0])).not.toContain(""); }); it("handles mixed content correctly", () => { const result = wrapFileReferencesInHtml("Check README.md and CONTRIBUTING.md"); expect(result).toContain("README.md"); expect(result).toContain("CONTRIBUTING.md"); }); it("handles boundary and punctuation wrapping cases", () => { const cases = [ { input: "No markdown files here", contains: undefined }, { input: "File.md at start", contains: "File.md" }, { input: "Ends with file.md", contains: "file.md" }, { input: "See README.md.", contains: "README.md." }, { input: "See README.md,", contains: "README.md," }, { input: "(README.md)", contains: "(README.md)" }, { input: "README.md:", contains: "README.md:" }, ] as const; for (const testCase of cases) { const result = wrapFileReferencesInHtml(testCase.input); if (!testCase.contains) { expect(result).not.toContain(""); continue; } expect(result).toContain(testCase.contains); } }); it("de-linkifies auto-linkified anchors for plain files and paths", () => { const cases = [ { input: 'README.md', expected: "README.md", }, { input: 'squad/friday/HEARTBEAT.md', expected: "squad/friday/HEARTBEAT.md", }, ] as const; for (const testCase of cases) { expect(wrapFileReferencesInHtml(testCase.input)).toBe(testCase.expected); } }); it("preserves explicit links where label differs from href", () => { const cases = [ 'click here', 'README.md', ] as const; for (const input of cases) { expect(wrapFileReferencesInHtml(input)).toBe(input); } }); it("wraps file ref after closing anchor tag", () => { const input = 'link then README.md'; const result = wrapFileReferencesInHtml(input); expect(result).toContain(" then README.md"); }); }); describe("renderTelegramHtmlText - file reference wrapping", () => { it("wraps file references in markdown mode", () => { const result = renderTelegramHtmlText("Check README.md"); expect(result).toContain("README.md"); }); it("does not wrap in HTML mode (trusts caller markup)", () => { // textMode: "html" should pass through unchanged - caller owns the markup const result = renderTelegramHtmlText("Check README.md", { textMode: "html" }); expect(result).toBe("Check README.md"); expect(result).not.toContain(""); }); it("does not double-wrap already code-formatted content", () => { const result = renderTelegramHtmlText("Already `wrapped.md` here"); // Should have code tags but not nested expect(result).toContain(""); expect(result).not.toContain(""); }); }); describe("markdownToTelegramHtml - file reference wrapping", () => { it("wraps file references by default", () => { const result = markdownToTelegramHtml("Check README.md"); expect(result).toContain("README.md"); }); it("can skip wrapping when requested", () => { const result = markdownToTelegramHtml("Check README.md", { wrapFileRefs: false }); expect(result).not.toContain("README.md"); }); it("wraps multiple file types in a single message", () => { const result = markdownToTelegramHtml("Edit main.go and script.py"); expect(result).toContain("main.go"); expect(result).toContain("script.py"); }); it("preserves real URLs as anchor tags", () => { const result = markdownToTelegramHtml("Visit https://example.com"); expect(result).toContain(''); }); it("preserves explicit markdown links even when href looks like a file ref", () => { const result = markdownToTelegramHtml("[docs](http://README.md)"); expect(result).toContain('docs'); }); it("wraps file ref after real URL in same message", () => { const result = markdownToTelegramHtml("Visit https://example.com and README.md"); expect(result).toContain(''); expect(result).toContain("README.md"); }); }); describe("markdownToTelegramChunks - file reference wrapping", () => { it("wraps file references in chunked output", () => { const chunks = markdownToTelegramChunks("Check README.md and backup.sh", 4096); expect(chunks.length).toBeGreaterThan(0); expect(chunks[0].html).toContain("README.md"); expect(chunks[0].html).toContain("backup.sh"); }); it("keeps rendered html chunks within the provided limit", () => { const input = "<".repeat(1500); const chunks = markdownToTelegramChunks(input, 512); expect(chunks.length).toBeGreaterThan(1); expect(chunks.map((chunk) => chunk.text).join("")).toBe(input); expect(chunks.every((chunk) => chunk.html.length <= 512)).toBe(true); }); it("preserves whitespace when html-limit retry splitting runs", () => { const input = "a < b"; const chunks = markdownToTelegramChunks(input, 5); expect(chunks.length).toBeGreaterThan(1); expect(chunks.map((chunk) => chunk.text).join("")).toBe(input); expect(chunks.every((chunk) => chunk.html.length <= 5)).toBe(true); }); it("prefers word boundaries when html-limit retry splits formatted prose", () => { const input = "**Which of these**"; const chunks = markdownToTelegramChunks(input, 16); expect(chunks.map((chunk) => chunk.text)).toEqual(["Which of ", "these"]); expect(chunks.every((chunk) => chunk.html.length <= 16)).toBe(true); }); it("falls back to in-paren word boundaries when the parenthesis is unbalanced", () => { const input = "**foo (bar baz qux quux**"; const chunks = markdownToTelegramChunks(input, 20); expect(chunks.map((chunk) => chunk.text)).toEqual(["foo", "(bar baz qux ", "quux"]); expect(chunks.every((chunk) => chunk.html.length <= 20)).toBe(true); }); it("does not emit whitespace-only chunks during html-limit retry splitting", () => { const input = "**ab <<**"; const chunks = markdownToTelegramChunks(input, 11); expect(chunks.map((chunk) => chunk.text).join("")).toBe("ab <<"); expect(chunks.every((chunk) => chunk.text.trim().length > 0)).toBe(true); expect(chunks.every((chunk) => chunk.html.length <= 11)).toBe(true); }); it("preserves paragraph separators when retry chunking produces whitespace-only spans", () => { const input = "ab\n\n<<"; const chunks = markdownToTelegramChunks(input, 6); expect(chunks.map((chunk) => chunk.text).join("")).toBe(input); expect(chunks.every((chunk) => chunk.html.length <= 6)).toBe(true); }); }); describe("edge cases", () => { it("wraps file refs inside emphasis tags", () => { const cases = [ ["**README.md**", "README.md"], ["*script.py*", "script.py"], ] as const; for (const [input, expected] of cases) { expect(markdownToTelegramHtml(input), input).toBe(expected); } }); it("does not wrap inside fenced code blocks", () => { const result = markdownToTelegramHtml("```\nREADME.md\n```"); expect(result).toBe("
README.md\n
"); expect(result).not.toContain(""); }); it("preserves real URL/domain paths as anchors", () => { const cases = [ { input: "example.com/README.md", href: 'href="http://example.com/README.md"', }, { input: "https://github.com/foo/README.md", href: 'href="https://github.com/foo/README.md"', }, ] as const; for (const testCase of cases) { const result = markdownToTelegramHtml(testCase.input); expect(result).toContain(`
`); expect(result).not.toContain(""); } }); it("handles wrapFileRefs: false (plain text output)", () => { const result = markdownToTelegramHtml("README.md", { wrapFileRefs: false }); // buildTelegramLink returns null, so no tag; wrapFileRefs: false skips expect(result).toBe("README.md"); }); it("classifies extension-like tokens as file refs or domains", () => { const cases = [ { name: "supported file-style extensions", input: "Makefile.am and code.at and app.be and main.cc", contains: [ "Makefile.am", "code.at", "app.be", "main.cc", ], }, { name: "popular domain TLDs stay links", input: "Check x.ai and vercel.io and app.tv and radio.fm", contains: [ '', '', '', '', ], }, { name: ".co stays links", input: "Visit t.co and openclaw.co", contains: ['', ''], notContains: ["t.co", "openclaw.co"], }, { name: "non-target extensions stay plain text", input: "image.png and style.css and script.js", notContains: ["image.png", "style.css", "script.js"], }, ] as const; for (const testCase of cases) { const result = markdownToTelegramHtml(testCase.input); if ("contains" in testCase && testCase.contains) { for (const expected of testCase.contains) { expect(result, testCase.name).toContain(expected); } } if ("notContains" in testCase && testCase.notContains) { for (const unexpected of testCase.notContains) { expect(result, testCase.name).not.toContain(unexpected); } } } }); it("wraps file refs across boundaries, sequences, and path variants", () => { const cases = [ { name: "message start boundary", input: "README.md is important", expectedExact: "README.md is important", }, { name: "message end boundary", input: "Check the README.md", expectedExact: "Check the README.md", }, { name: "multiple file refs", input: "README.md CHANGELOG.md LICENSE.md", contains: [ "README.md", "CHANGELOG.md", "LICENSE.md", ], }, { name: "nested path", input: "src/utils/helpers/format.go", contains: ["src/utils/helpers/format.go"], }, { name: "version-like non-domain path", input: "v1.0/README.md", contains: ["v1.0/README.md"], }, { name: "domain with version path", input: "example.com/v1.0/README.md", contains: [''], }, { name: "hyphen underscore and uppercase extensions", input: "my-file_name.md README.MD and SCRIPT.PY", contains: [ "my-file_name.md", "README.MD", "SCRIPT.PY", ], }, ] as const; for (const testCase of cases) { const result = markdownToTelegramHtml(testCase.input); if ("expectedExact" in testCase) { expect(result, testCase.name).toBe(testCase.expectedExact); } if ("contains" in testCase && testCase.contains) { for (const expected of testCase.contains) { expect(result, testCase.name).toContain(expected); } } } }); it("handles nested code tags (depth tracking)", () => { // Nested inside
 - should not wrap inner content
    const input = "
README.md
then script.py"; const result = wrapFileReferencesInHtml(input); expect(result).toBe("
README.md
then script.py"); }); it("handles multiple anchor tags in sequence", () => { const input = '
link1 README.md link2 script.py'; const result = wrapFileReferencesInHtml(input); expect(result).toContain(" README.md script.py"); }); it("wraps orphaned TLD pattern after special character", () => { // R&D.md - the & breaks the main pattern, but D.md could be auto-linked // So we wrap the orphaned D.md part to prevent Telegram linking it const input = "R&D.md"; const result = wrapFileReferencesInHtml(input); expect(result).toBe("R&D.md"); }); it("wraps orphaned single-letter TLD patterns", () => { // Use extensions still in the set (md, sh, py, go) const result1 = wrapFileReferencesInHtml("X.md is cool"); expect(result1).toContain("X.md"); const result2 = wrapFileReferencesInHtml("Check R.sh"); expect(result2).toContain("R.sh"); }); it("does not match filenames containing angle brackets", () => { // The regex character class [a-zA-Z0-9_.\\-./] doesn't include < > // so these won't be matched and wrapped (which is correct/safe) const input = "file