diff --git a/extensions/telegram/src/format.ts b/extensions/telegram/src/format.ts index 591e4c35a84..3fbe1224359 100644 --- a/extensions/telegram/src/format.ts +++ b/extensions/telegram/src/format.ts @@ -433,28 +433,21 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] { return chunks.length > 0 ? chunks : [html]; } -function splitTelegramChunkByHtmlLimit( - chunk: MarkdownIR, - htmlLimit: number, - renderedHtmlLength: number, -): MarkdownIR[] { +function splitTelegramChunkByHtmlLimit(chunk: MarkdownIR, htmlLimit: number): MarkdownIR[] { const currentTextLength = chunk.text.length; if (currentTextLength <= 1) { return [chunk]; } - const proportionalLimit = Math.floor( - (currentTextLength * htmlLimit) / Math.max(renderedHtmlLength, 1), - ); - const candidateLimit = Math.min(currentTextLength - 1, proportionalLimit); - const splitLimit = - Number.isFinite(candidateLimit) && candidateLimit > 0 - ? candidateLimit - : Math.max(1, Math.floor(currentTextLength / 2)); + const splitLimit = findLargestTelegramChunkTextLengthWithinHtmlLimit(chunk, htmlLimit); + if (splitLimit <= 0) { + return [chunk]; + } const split = splitMarkdownIRPreserveWhitespace(chunk, splitLimit); - if (split.length > 1) { + const firstChunk = split[0]; + if (firstChunk && renderTelegramChunkHtml(firstChunk).length <= htmlLimit) { return split; } - return splitMarkdownIRPreserveWhitespace(chunk, Math.max(1, Math.floor(currentTextLength / 2))); + return [sliceMarkdownIR(chunk, 0, splitLimit), sliceMarkdownIR(chunk, splitLimit, currentTextLength)]; } function sliceStyleSpans( @@ -554,6 +547,26 @@ function renderTelegramChunkHtml(ir: MarkdownIR): string { return wrapFileReferencesInHtml(renderTelegramHtml(ir)); } +function findLargestTelegramChunkTextLengthWithinHtmlLimit( + chunk: MarkdownIR, + htmlLimit: number, +): number { + const currentTextLength = chunk.text.length; + if (currentTextLength <= 1) { + return currentTextLength; + } + + // Prefix HTML length is not monotonic because a sliced auto-link can render as + // a long fragment, while a longer completed file ref de-linkifies to + // a shorter ... wrapper. Search exact candidates instead. + for (let candidateLength = currentTextLength - 1; candidateLength >= 1; candidateLength -= 1) { + if (renderTelegramChunkHtml(sliceMarkdownIR(chunk, 0, candidateLength)).length <= htmlLimit) { + return candidateLength; + } + } + return 0; +} + function findMarkdownIRPreservedSplitIndex(text: string, start: number, limit: number): number { const maxEnd = Math.min(text.length, start + limit); if (maxEnd >= text.length) { @@ -735,7 +748,7 @@ function renderTelegramChunksWithinHtmlLimit( finalized.push(chunk); continue; } - const split = splitTelegramChunkByHtmlLimit(chunk, normalizedLimit, html.length); + const split = splitTelegramChunkByHtmlLimit(chunk, normalizedLimit); if (split.length <= 1) { // Worst-case safety: avoid retry loops, deliver the chunk as-is. finalized.push(chunk); diff --git a/extensions/telegram/src/format.wrap-md.test.ts b/extensions/telegram/src/format.wrap-md.test.ts index de3cab42056..f8cdbda85ac 100644 --- a/extensions/telegram/src/format.wrap-md.test.ts +++ b/extensions/telegram/src/format.wrap-md.test.ts @@ -175,6 +175,14 @@ describe("markdownToTelegramChunks - file reference wrapping", () => { expect(chunks.every((chunk) => chunk.html.length <= 5)).toBe(true); }); + it("prefers word boundaries when escaped html shrinks the retry window", () => { + const input = "alpha <<"; + const chunks = markdownToTelegramChunks(input, 8); + expect(chunks.map((chunk) => chunk.text).join("")).toBe(input); + expect(chunks[0]?.text).toBe("alpha "); + expect(chunks.every((chunk) => chunk.html.length <= 8)).toBe(true); + }); + it("prefers word boundaries when html-limit retry splits formatted prose", () => { const input = "**Which of these**"; const chunks = markdownToTelegramChunks(input, 16); @@ -182,6 +190,35 @@ describe("markdownToTelegramChunks - file reference wrapping", () => { expect(chunks.every((chunk) => chunk.html.length <= 16)).toBe(true); }); + it("preserves formatting while splitting at word boundaries", () => { + const input = "**alpha <<**"; + const chunks = markdownToTelegramChunks(input, 13); + expect(chunks.map((chunk) => chunk.text).join("")).toBe("alpha <<"); + expect(chunks[0]?.text).toBe("alpha "); + expect(chunks.every((chunk) => chunk.html.length <= 13)).toBe(true); + expect(chunks.every((chunk) => chunk.html.startsWith("") && chunk.html.endsWith(""))).toBe( + true, + ); + }); + + it("does not rely on monotonic html length for sliced file refs", () => { + const input = "README.md<"; + const chunks = markdownToTelegramChunks(input, 22); + expect(chunks.map((chunk) => chunk.text).join("")).toBe(input); + expect(chunks[0]?.text).toBe("README.md"); + expect(chunks[0]?.html).toBe("README.md"); + expect(chunks.every((chunk) => chunk.html.length <= 22)).toBe(true); + }); + + it("gracefully returns the original chunk when tag overhead exceeds the limit", () => { + const input = "**ab**"; + expect(() => markdownToTelegramChunks(input, 6)).not.toThrow(); + const chunks = markdownToTelegramChunks(input, 6); + expect(chunks).toHaveLength(1); + expect(chunks[0]?.text).toBe("ab"); + expect(chunks[0]?.html).toBe("ab"); + }); + it("falls back to in-paren word boundaries when the parenthesis is unbalanced", () => { const input = "**foo (bar baz qux quux**"; const chunks = markdownToTelegramChunks(input, 20); @@ -189,6 +226,13 @@ describe("markdownToTelegramChunks - file reference wrapping", () => { expect(chunks.every((chunk) => chunk.html.length <= 20)).toBe(true); }); + it("falls back to hard splits when a single word exceeds the limit", () => { + const input = "supercalifragilistic"; + const chunks = markdownToTelegramChunks(input, 8); + expect(chunks.map((chunk) => chunk.text)).toEqual(["supercal", "ifragili", "stic"]); + expect(chunks.every((chunk) => chunk.html.length <= 8)).toBe(true); + }); + it("does not emit whitespace-only chunks during html-limit retry splitting", () => { const input = "**ab <<**"; const chunks = markdownToTelegramChunks(input, 11);