fix(ui): prevent marked from auto-linking adjacent CJK characters (openclaw#48410)

Verified:
- ui: pnpm test -- --run src/ui/markdown.test.ts
- local full gate relaxed for this run; no required GitHub checks reported on the branch

Co-authored-by: jnuyao <2928523+jnuyao@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
jnuyao 2026-03-29 09:49:29 +08:00 committed by GitHub
parent f9b1079283
commit f93ccc3443
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 64 additions and 0 deletions

View File

@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
- Gateway/health: carry webhook-vs-polling account mode from channel descriptors into runtime snapshots so passive channels like LINE and BlueBubbles skip false stale-socket health failures. (#47488) Thanks @karesansui-u.
- Memory/QMD: honor `memory.qmd.update.embedInterval` even when regular QMD update cadence is disabled or slower by arming a dedicated embed-cadence maintenance timer, while avoiding redundant timers when regular updates are already frequent enough. (#37326) Thanks @barronlroth.
- Agents/memory flush: keep daily memory flush files append-only during embedded attempts so compaction writes do not overwrite earlier notes. (#53725) Thanks @HPluseven.
- Web UI/markdown: stop bare auto-links from swallowing adjacent CJK text while preserving valid mixed-script path and query characters in rendered links. (#48410) Thanks @jnuyao.
## 2026.3.28

View File

@ -162,4 +162,22 @@ describe("toSanitizedMarkdownHtml", () => {
warnSpy.mockRestore();
}
});
it("keeps adjacent trailing CJK text outside bare auto-links", () => {
const html = toSanitizedMarkdownHtml("https://example.com重新解读");
expect(html).toContain('<a href="https://example.com"');
expect(html).toContain(">https://example.com</a>重新解读");
});
it("preserves valid mixed-script query parameters inside auto-links", () => {
const html = toSanitizedMarkdownHtml("https://api.example.com?q=重新&lang=en");
expect(html).toContain('href="https://api.example.com?q=%E9%87%8D%E6%96%B0&amp;lang=en"');
expect(html).toContain(">https://api.example.com?q=重新&amp;lang=en</a>");
});
it("preserves valid mixed-script path segments inside auto-links", () => {
const html = toSanitizedMarkdownHtml("https://example.com/path/重新/file");
expect(html).toContain('href="https://example.com/path/%E9%87%8D%E6%96%B0/file"');
expect(html).toContain(">https://example.com/path/重新/file</a>");
});
});

View File

@ -63,6 +63,7 @@ const MARKDOWN_CACHE_MAX_CHARS = 50_000;
const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i;
const markdownCache = new Map<string, string>();
const TAIL_LINK_BLUR_CLASS = "chat-link-tail-blur";
const TRAILING_CJK_TAIL_RE = /([\u4E00-\u9FFF\u3000-\u303F\uFF01-\uFF5E\s]+)$/;
function getCachedMarkdown(key: string): string | null {
const cached = markdownCache.get(key);
@ -121,6 +122,50 @@ function installHooks() {
});
}
// Extension to prevent auto-linking algorithms from swallowing adjacent CJK characters.
const cjkAutoLinkExtension = {
name: "url",
level: "inline",
// Indicate where an auto-link might start
start(src: string) {
const match = src.match(/https?:\/\//i);
return match ? match.index! : -1;
},
tokenizer(src: string) {
// GFM standard regex for auto-links
const rule = /^https?:\/\/[^\s<]+[^<.,:;"')\]\s]/i;
const match = rule.exec(src);
if (match) {
let urlText = match[0];
// Stop before any CJK character or typical punctuation following CJK
// This stops link boundaries from bleeding into mixed-language paragraphs.
const cjkMatch = urlText.match(TRAILING_CJK_TAIL_RE);
if (cjkMatch) {
urlText = urlText.substring(0, urlText.length - cjkMatch[1].length);
}
return {
type: "link",
raw: urlText,
text: urlText,
href: urlText,
tokens: [
{
type: "text",
raw: urlText,
text: urlText,
},
],
};
}
},
};
marked.use({
extensions: [cjkAutoLinkExtension as unknown as import("marked").TokenizerAndRendererExtension],
});
export function toSanitizedMarkdownHtml(markdown: string): string {
const input = markdown.trim();
if (!input) {