import { describe, expect, it } from "vitest";
import {
markdownToTelegramChunks,
markdownToTelegramHtml,
renderTelegramHtmlText,
wrapFileReferencesInHtml,
} from "./format.js";
describe("wrapFileReferencesInHtml", () => {
it("wraps supported file references and paths", () => {
const cases = [
["Check README.md", "Check README.md"],
["See HEARTBEAT.md for status", "See HEARTBEAT.md for status"],
["Check main.go", "Check main.go"],
["Run script.py", "Run script.py"],
["Check backup.pl", "Check backup.pl"],
["Run backup.sh", "Run backup.sh"],
["Look at squad/friday/HEARTBEAT.md", "Look at squad/friday/HEARTBEAT.md"],
] as const;
for (const [input, expected] of cases) {
expect(wrapFileReferencesInHtml(input), input).toContain(expected);
}
});
it("does not wrap inside protected html contexts", () => {
const cases = [
"Already wrapped.md here",
"
README.md",
'Link',
'Visit example.com/README.md',
] as const;
for (const input of cases) {
const result = wrapFileReferencesInHtml(input);
expect(result, input).toBe(input);
}
expect(wrapFileReferencesInHtml(cases[0])).not.toContain("");
});
it("handles mixed content correctly", () => {
const result = wrapFileReferencesInHtml("Check README.md and CONTRIBUTING.md");
expect(result).toContain("README.md");
expect(result).toContain("CONTRIBUTING.md");
});
it("handles boundary and punctuation wrapping cases", () => {
const cases = [
{ input: "No markdown files here", contains: undefined },
{ input: "File.md at start", contains: "File.md" },
{ input: "Ends with file.md", contains: "file.md" },
{ input: "See README.md.", contains: "README.md." },
{ input: "See README.md,", contains: "README.md," },
{ input: "(README.md)", contains: "(README.md)" },
{ input: "README.md:", contains: "README.md:" },
] as const;
for (const testCase of cases) {
const result = wrapFileReferencesInHtml(testCase.input);
if (!testCase.contains) {
expect(result).not.toContain("");
continue;
}
expect(result).toContain(testCase.contains);
}
});
it("de-linkifies auto-linkified anchors for plain files and paths", () => {
const cases = [
{
input: 'README.md',
expected: "README.md",
},
{
input: 'squad/friday/HEARTBEAT.md',
expected: "squad/friday/HEARTBEAT.md",
},
] as const;
for (const testCase of cases) {
expect(wrapFileReferencesInHtml(testCase.input)).toBe(testCase.expected);
}
});
it("preserves explicit links where label differs from href", () => {
const cases = [
'click here',
'README.md',
] as const;
for (const input of cases) {
expect(wrapFileReferencesInHtml(input)).toBe(input);
}
});
it("wraps file ref after closing anchor tag", () => {
const input = 'link then README.md';
const result = wrapFileReferencesInHtml(input);
expect(result).toContain(" then README.md");
});
});
describe("renderTelegramHtmlText - file reference wrapping", () => {
it("wraps file references in markdown mode", () => {
const result = renderTelegramHtmlText("Check README.md");
expect(result).toContain("README.md");
});
it("does not wrap in HTML mode (trusts caller markup)", () => {
// textMode: "html" should pass through unchanged - caller owns the markup
const result = renderTelegramHtmlText("Check README.md", { textMode: "html" });
expect(result).toBe("Check README.md");
expect(result).not.toContain("");
});
it("does not double-wrap already code-formatted content", () => {
const result = renderTelegramHtmlText("Already `wrapped.md` here");
// Should have code tags but not nested
expect(result).toContain("");
expect(result).not.toContain("");
});
});
describe("markdownToTelegramHtml - file reference wrapping", () => {
it("wraps file references by default", () => {
const result = markdownToTelegramHtml("Check README.md");
expect(result).toContain("README.md");
});
it("can skip wrapping when requested", () => {
const result = markdownToTelegramHtml("Check README.md", { wrapFileRefs: false });
expect(result).not.toContain("README.md");
});
it("wraps multiple file types in a single message", () => {
const result = markdownToTelegramHtml("Edit main.go and script.py");
expect(result).toContain("main.go");
expect(result).toContain("script.py");
});
it("preserves real URLs as anchor tags", () => {
const result = markdownToTelegramHtml("Visit https://example.com");
expect(result).toContain('');
});
it("preserves explicit markdown links even when href looks like a file ref", () => {
const result = markdownToTelegramHtml("[docs](http://README.md)");
expect(result).toContain('docs');
});
it("wraps file ref after real URL in same message", () => {
const result = markdownToTelegramHtml("Visit https://example.com and README.md");
expect(result).toContain('');
expect(result).toContain("README.md");
});
});
describe("markdownToTelegramChunks - file reference wrapping", () => {
it("wraps file references in chunked output", () => {
const chunks = markdownToTelegramChunks("Check README.md and backup.sh", 4096);
expect(chunks.length).toBeGreaterThan(0);
expect(chunks[0].html).toContain("README.md");
expect(chunks[0].html).toContain("backup.sh");
});
it("keeps rendered html chunks within the provided limit", () => {
const input = "<".repeat(1500);
const chunks = markdownToTelegramChunks(input, 512);
expect(chunks.length).toBeGreaterThan(1);
expect(chunks.map((chunk) => chunk.text).join("")).toBe(input);
expect(chunks.every((chunk) => chunk.html.length <= 512)).toBe(true);
});
it("preserves whitespace when html-limit retry splitting runs", () => {
const input = "a < b";
const chunks = markdownToTelegramChunks(input, 5);
expect(chunks.length).toBeGreaterThan(1);
expect(chunks.map((chunk) => chunk.text).join("")).toBe(input);
expect(chunks.every((chunk) => chunk.html.length <= 5)).toBe(true);
});
it("prefers word boundaries when html-limit retry splits formatted prose", () => {
const input = "**Which of these**";
const chunks = markdownToTelegramChunks(input, 16);
expect(chunks.map((chunk) => chunk.text)).toEqual(["Which of ", "these"]);
expect(chunks.every((chunk) => chunk.html.length <= 16)).toBe(true);
});
it("falls back to in-paren word boundaries when the parenthesis is unbalanced", () => {
const input = "**foo (bar baz qux quux**";
const chunks = markdownToTelegramChunks(input, 20);
expect(chunks.map((chunk) => chunk.text)).toEqual(["foo", "(bar baz qux ", "quux"]);
expect(chunks.every((chunk) => chunk.html.length <= 20)).toBe(true);
});
it("does not emit whitespace-only chunks during html-limit retry splitting", () => {
const input = "**ab <<**";
const chunks = markdownToTelegramChunks(input, 11);
expect(chunks.map((chunk) => chunk.text).join("")).toBe("ab <<");
expect(chunks.every((chunk) => chunk.text.trim().length > 0)).toBe(true);
expect(chunks.every((chunk) => chunk.html.length <= 11)).toBe(true);
});
it("preserves paragraph separators when retry chunking produces whitespace-only spans", () => {
const input = "ab\n\n<<";
const chunks = markdownToTelegramChunks(input, 6);
expect(chunks.map((chunk) => chunk.text).join("")).toBe(input);
expect(chunks.every((chunk) => chunk.html.length <= 6)).toBe(true);
});
});
describe("edge cases", () => {
it("wraps file refs inside emphasis tags", () => {
const cases = [
["**README.md**", "README.md"],
["*script.py*", "script.py"],
] as const;
for (const [input, expected] of cases) {
expect(markdownToTelegramHtml(input), input).toBe(expected);
}
});
it("does not wrap inside fenced code blocks", () => {
const result = markdownToTelegramHtml("```\nREADME.md\n```");
expect(result).toBe("README.md\n
");
expect(result).not.toContain("");
});
it("preserves real URL/domain paths as anchors", () => {
const cases = [
{
input: "example.com/README.md",
href: 'href="http://example.com/README.md"',
},
{
input: "https://github.com/foo/README.md",
href: 'href="https://github.com/foo/README.md"',
},
] as const;
for (const testCase of cases) {
const result = markdownToTelegramHtml(testCase.input);
expect(result).toContain(``);
expect(result).not.toContain("");
}
});
it("handles wrapFileRefs: false (plain text output)", () => {
const result = markdownToTelegramHtml("README.md", { wrapFileRefs: false });
// buildTelegramLink returns null, so no tag; wrapFileRefs: false skips
expect(result).toBe("README.md");
});
it("classifies extension-like tokens as file refs or domains", () => {
const cases = [
{
name: "supported file-style extensions",
input: "Makefile.am and code.at and app.be and main.cc",
contains: [
"Makefile.am",
"code.at",
"app.be",
"main.cc",
],
},
{
name: "popular domain TLDs stay links",
input: "Check x.ai and vercel.io and app.tv and radio.fm",
contains: [
'',
'',
'',
'',
],
},
{
name: ".co stays links",
input: "Visit t.co and openclaw.co",
contains: ['', ''],
notContains: ["t.co", "openclaw.co"],
},
{
name: "non-target extensions stay plain text",
input: "image.png and style.css and script.js",
notContains: ["image.png", "style.css", "script.js"],
},
] as const;
for (const testCase of cases) {
const result = markdownToTelegramHtml(testCase.input);
if ("contains" in testCase && testCase.contains) {
for (const expected of testCase.contains) {
expect(result, testCase.name).toContain(expected);
}
}
if ("notContains" in testCase && testCase.notContains) {
for (const unexpected of testCase.notContains) {
expect(result, testCase.name).not.toContain(unexpected);
}
}
}
});
it("wraps file refs across boundaries, sequences, and path variants", () => {
const cases = [
{
name: "message start boundary",
input: "README.md is important",
expectedExact: "README.md is important",
},
{
name: "message end boundary",
input: "Check the README.md",
expectedExact: "Check the README.md",
},
{
name: "multiple file refs",
input: "README.md CHANGELOG.md LICENSE.md",
contains: [
"README.md",
"CHANGELOG.md",
"LICENSE.md",
],
},
{
name: "nested path",
input: "src/utils/helpers/format.go",
contains: ["src/utils/helpers/format.go"],
},
{
name: "version-like non-domain path",
input: "v1.0/README.md",
contains: ["v1.0/README.md"],
},
{
name: "domain with version path",
input: "example.com/v1.0/README.md",
contains: [''],
},
{
name: "hyphen underscore and uppercase extensions",
input: "my-file_name.md README.MD and SCRIPT.PY",
contains: [
"my-file_name.md",
"README.MD",
"SCRIPT.PY",
],
},
] as const;
for (const testCase of cases) {
const result = markdownToTelegramHtml(testCase.input);
if ("expectedExact" in testCase) {
expect(result, testCase.name).toBe(testCase.expectedExact);
}
if ("contains" in testCase && testCase.contains) {
for (const expected of testCase.contains) {
expect(result, testCase.name).toContain(expected);
}
}
}
});
it("handles nested code tags (depth tracking)", () => {
// Nested inside - should not wrap inner content
const input = "README.md
then script.py";
const result = wrapFileReferencesInHtml(input);
expect(result).toBe("README.md
then script.py");
});
it("handles multiple anchor tags in sequence", () => {
const input =
'link1 README.md link2 script.py';
const result = wrapFileReferencesInHtml(input);
expect(result).toContain(" README.md script.py");
});
it("wraps orphaned TLD pattern after special character", () => {
// R&D.md - the & breaks the main pattern, but D.md could be auto-linked
// So we wrap the orphaned D.md part to prevent Telegram linking it
const input = "R&D.md";
const result = wrapFileReferencesInHtml(input);
expect(result).toBe("R&D.md");
});
it("wraps orphaned single-letter TLD patterns", () => {
// Use extensions still in the set (md, sh, py, go)
const result1 = wrapFileReferencesInHtml("X.md is cool");
expect(result1).toContain("X.md");
const result2 = wrapFileReferencesInHtml("Check R.sh");
expect(result2).toContain("R.sh");
});
it("does not match filenames containing angle brackets", () => {
// The regex character class [a-zA-Z0-9_.\\-./] doesn't include < >
// so these won't be matched and wrapped (which is correct/safe)
const input = "file