From f93ccc344358009dd42d6f040aed25c33d6be773 Mon Sep 17 00:00:00 2001 From: jnuyao Date: Sun, 29 Mar 2026 09:49:29 +0800 Subject: [PATCH] fix(ui): prevent marked from auto-linking adjacent CJK characters (openclaw#48410) Verified: - ui: pnpm test -- --run src/ui/markdown.test.ts - local full gate relaxed for this run; no required GitHub checks reported on the branch Co-authored-by: jnuyao <2928523+jnuyao@users.noreply.github.com> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com> --- CHANGELOG.md | 1 + ui/src/ui/markdown.test.ts | 18 +++++++++++++++ ui/src/ui/markdown.ts | 45 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fec2bdfd69f..c798c658527 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai - Gateway/health: carry webhook-vs-polling account mode from channel descriptors into runtime snapshots so passive channels like LINE and BlueBubbles skip false stale-socket health failures. (#47488) Thanks @karesansui-u. - Memory/QMD: honor `memory.qmd.update.embedInterval` even when regular QMD update cadence is disabled or slower by arming a dedicated embed-cadence maintenance timer, while avoiding redundant timers when regular updates are already frequent enough. (#37326) Thanks @barronlroth. - Agents/memory flush: keep daily memory flush files append-only during embedded attempts so compaction writes do not overwrite earlier notes. (#53725) Thanks @HPluseven. +- Web UI/markdown: stop bare auto-links from swallowing adjacent CJK text while preserving valid mixed-script path and query characters in rendered links. (#48410) Thanks @jnuyao. ## 2026.3.28 diff --git a/ui/src/ui/markdown.test.ts b/ui/src/ui/markdown.test.ts index 8c2f37cbea4..e27faf8fbaa 100644 --- a/ui/src/ui/markdown.test.ts +++ b/ui/src/ui/markdown.test.ts @@ -162,4 +162,22 @@ describe("toSanitizedMarkdownHtml", () => { warnSpy.mockRestore(); } }); + + it("keeps adjacent trailing CJK text outside bare auto-links", () => { + const html = toSanitizedMarkdownHtml("https://example.com重新解读"); + expect(html).toContain('https://example.com重新解读"); + }); + + it("preserves valid mixed-script query parameters inside auto-links", () => { + const html = toSanitizedMarkdownHtml("https://api.example.com?q=重新&lang=en"); + expect(html).toContain('href="https://api.example.com?q=%E9%87%8D%E6%96%B0&lang=en"'); + expect(html).toContain(">https://api.example.com?q=重新&lang=en"); + }); + + it("preserves valid mixed-script path segments inside auto-links", () => { + const html = toSanitizedMarkdownHtml("https://example.com/path/重新/file"); + expect(html).toContain('href="https://example.com/path/%E9%87%8D%E6%96%B0/file"'); + expect(html).toContain(">https://example.com/path/重新/file"); + }); }); diff --git a/ui/src/ui/markdown.ts b/ui/src/ui/markdown.ts index 160d0e96399..6f5c69b2b30 100644 --- a/ui/src/ui/markdown.ts +++ b/ui/src/ui/markdown.ts @@ -63,6 +63,7 @@ const MARKDOWN_CACHE_MAX_CHARS = 50_000; const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i; const markdownCache = new Map(); const TAIL_LINK_BLUR_CLASS = "chat-link-tail-blur"; +const TRAILING_CJK_TAIL_RE = /([\u4E00-\u9FFF\u3000-\u303F\uFF01-\uFF5E\s]+)$/; function getCachedMarkdown(key: string): string | null { const cached = markdownCache.get(key); @@ -121,6 +122,50 @@ function installHooks() { }); } +// Extension to prevent auto-linking algorithms from swallowing adjacent CJK characters. +const cjkAutoLinkExtension = { + name: "url", + level: "inline", + // Indicate where an auto-link might start + start(src: string) { + const match = src.match(/https?:\/\//i); + return match ? match.index! : -1; + }, + tokenizer(src: string) { + // GFM standard regex for auto-links + const rule = /^https?:\/\/[^\s<]+[^<.,:;"')\]\s]/i; + const match = rule.exec(src); + if (match) { + let urlText = match[0]; + + // Stop before any CJK character or typical punctuation following CJK + // This stops link boundaries from bleeding into mixed-language paragraphs. + const cjkMatch = urlText.match(TRAILING_CJK_TAIL_RE); + if (cjkMatch) { + urlText = urlText.substring(0, urlText.length - cjkMatch[1].length); + } + + return { + type: "link", + raw: urlText, + text: urlText, + href: urlText, + tokens: [ + { + type: "text", + raw: urlText, + text: urlText, + }, + ], + }; + } + }, +}; + +marked.use({ + extensions: [cjkAutoLinkExtension as unknown as import("marked").TokenizerAndRendererExtension], +}); + export function toSanitizedMarkdownHtml(markdown: string): string { const input = markdown.trim(); if (!input) {