Merge 133fb1a300 into c4265a5f16

style: fix indentation
fix: preserve Telegram word boundaries when rechunking HTML (#47274 )
2026-03-15 11:40:34 -03:00 · 2026-03-15 11:40:30 -03:00 · 2026-03-15 18:10:49 +05:30 · 2026-03-15 13:03:39 +01:00
7 changed files with 287 additions and 8 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -44,6 +44,7 @@ Docs: https://docs.openclaw.ai
 - Email/webhook wrapping: sanitize sender and subject metadata before external-content wrapping so metadata fields cannot break the wrapper structure. Thanks @vincentkoc.
 - Node/startup: remove leftover debug `console.log("node host PATH: ...")` that printed the resolved PATH on every `openclaw node run` invocation. (#46411)
 - Telegram/message send: forward `--force-document` through the `sendPayload` path as well as `sendMedia`, so Telegram payload sends with `channelData` keep uploading images as documents instead of silently falling back to compressed photo sends. (#47119) Thanks @thepagent.
+- Telegram/message chunking: preserve spaces, paragraph separators, and word boundaries when HTML overflow rechunking splits formatted replies. (#47274)

 ## 2026.3.13

--- a/extensions/telegram/src/format.ts
+++ b/extensions/telegram/src/format.ts
@ -512,6 +512,146 @@ function sliceLinkSpans(
  });
 }

+function sliceMarkdownIR(ir: MarkdownIR, start: number, end: number): MarkdownIR {
+  return {
+    text: ir.text.slice(start, end),
+    styles: sliceStyleSpans(ir.styles, start, end),
+    links: sliceLinkSpans(ir.links, start, end),
+  };
+}
+
+function mergeAdjacentStyleSpans(styles: MarkdownIR["styles"]): MarkdownIR["styles"] {
+  const merged: MarkdownIR["styles"] = [];
+  for (const span of styles) {
+    const last = merged.at(-1);
+    if (last && last.style === span.style && span.start <= last.end) {
+      last.end = Math.max(last.end, span.end);
+      continue;
+    }
+    merged.push({ ...span });
+  }
+  return merged;
+}
+
+function mergeAdjacentLinkSpans(links: MarkdownIR["links"]): MarkdownIR["links"] {
+  const merged: MarkdownIR["links"] = [];
+  for (const link of links) {
+    const last = merged.at(-1);
+    if (last && last.href === link.href && link.start <= last.end) {
+      last.end = Math.max(last.end, link.end);
+      continue;
+    }
+    merged.push({ ...link });
+  }
+  return merged;
+}
+
+function mergeMarkdownIRChunks(left: MarkdownIR, right: MarkdownIR): MarkdownIR {
+  const offset = left.text.length;
+  return {
+    text: left.text + right.text,
+    styles: mergeAdjacentStyleSpans([
+      ...left.styles,
+      ...right.styles.map((span) => ({
+        ...span,
+        start: span.start + offset,
+        end: span.end + offset,
+      })),
+    ]),
+    links: mergeAdjacentLinkSpans([
+      ...left.links,
+      ...right.links.map((link) => ({
+        ...link,
+        start: link.start + offset,
+        end: link.end + offset,
+      })),
+    ]),
+  };
+}
+
+function renderTelegramChunkHtml(ir: MarkdownIR): string {
+  return wrapFileReferencesInHtml(renderTelegramHtml(ir));
+}
+
+function findMarkdownIRPreservedSplitIndex(text: string, start: number, limit: number): number {
+  const maxEnd = Math.min(text.length, start + limit);
+  if (maxEnd >= text.length) {
+    return text.length;
+  }
+
+  let lastOutsideParenNewlineBreak = -1;
+  let lastOutsideParenWhitespaceBreak = -1;
+  let lastOutsideParenWhitespaceRunStart = -1;
+  let lastAnyNewlineBreak = -1;
+  let lastAnyWhitespaceBreak = -1;
+  let lastAnyWhitespaceRunStart = -1;
+  let parenDepth = 0;
+  let sawNonWhitespace = false;
+
+  for (let index = start; index < maxEnd; index += 1) {
+    const char = text[index];
+    if (char === "(") {
+      sawNonWhitespace = true;
+      parenDepth += 1;
+      continue;
+    }
+    if (char === ")" && parenDepth > 0) {
+      sawNonWhitespace = true;
+      parenDepth -= 1;
+      continue;
+    }
+    if (!/\s/.test(char)) {
+      sawNonWhitespace = true;
+      continue;
+    }
+    if (!sawNonWhitespace) {
+      continue;
+    }
+    if (char === "\n") {
+      lastAnyNewlineBreak = index + 1;
+      if (parenDepth === 0) {
+        lastOutsideParenNewlineBreak = index + 1;
+      }
+      continue;
+    }
+    const whitespaceRunStart =
+      index === start || !/\s/.test(text[index - 1] ?? "") ? index : lastAnyWhitespaceRunStart;
+    lastAnyWhitespaceBreak = index + 1;
+    lastAnyWhitespaceRunStart = whitespaceRunStart;
+    if (parenDepth === 0) {
+      lastOutsideParenWhitespaceBreak = index + 1;
+      lastOutsideParenWhitespaceRunStart = whitespaceRunStart;
+    }
+  }
+
+  const resolveWhitespaceBreak = (breakIndex: number, runStart: number): number => {
+    if (breakIndex <= start) {
+      return breakIndex;
+    }
+    if (runStart <= start) {
+      return breakIndex;
+    }
+    return /\s/.test(text[breakIndex] ?? "") ? runStart : breakIndex;
+  };
+
+  if (lastOutsideParenNewlineBreak > start) {
+    return lastOutsideParenNewlineBreak;
+  }
+  if (lastOutsideParenWhitespaceBreak > start) {
+    return resolveWhitespaceBreak(
+      lastOutsideParenWhitespaceBreak,
+      lastOutsideParenWhitespaceRunStart,
+    );
+  }
+  if (lastAnyNewlineBreak > start) {
+    return lastAnyNewlineBreak;
+  }
+  if (lastAnyWhitespaceBreak > start) {
+    return resolveWhitespaceBreak(lastAnyWhitespaceBreak, lastAnyWhitespaceRunStart);
+  }
+  return maxEnd;
+}
+
 function splitMarkdownIRPreserveWhitespace(ir: MarkdownIR, limit: number): MarkdownIR[] {
  if (!ir.text) {
    return [];
@ -523,7 +663,7 @@ function splitMarkdownIRPreserveWhitespace(ir: MarkdownIR, limit: number): Markd
  const chunks: MarkdownIR[] = [];
  let cursor = 0;
  while (cursor < ir.text.length) {
-    const end = Math.min(ir.text.length, cursor + normalizedLimit);
+    const end = findMarkdownIRPreservedSplitIndex(ir.text, cursor, normalizedLimit);
    chunks.push({
      text: ir.text.slice(cursor, end),
      styles: sliceStyleSpans(ir.styles, cursor, end),
@ -534,32 +674,98 @@ function splitMarkdownIRPreserveWhitespace(ir: MarkdownIR, limit: number): Markd
  return chunks;
 }

+function coalesceWhitespaceOnlyMarkdownIRChunks(chunks: MarkdownIR[], limit: number): MarkdownIR[] {
+  const coalesced: MarkdownIR[] = [];
+  let index = 0;
+
+  while (index < chunks.length) {
+    const chunk = chunks[index];
+    if (!chunk) {
+      index += 1;
+      continue;
+    }
+    if (chunk.text.trim().length > 0) {
+      coalesced.push(chunk);
+      index += 1;
+      continue;
+    }
+
+    const prev = coalesced.at(-1);
+    const next = chunks[index + 1];
+    const chunkLength = chunk.text.length;
+
+    const canMergePrev = (candidate: MarkdownIR) =>
+      renderTelegramChunkHtml(candidate).length <= limit;
+    const canMergeNext = (candidate: MarkdownIR) =>
+      renderTelegramChunkHtml(candidate).length <= limit;
+
+    if (prev) {
+      const mergedPrev = mergeMarkdownIRChunks(prev, chunk);
+      if (canMergePrev(mergedPrev)) {
+        coalesced[coalesced.length - 1] = mergedPrev;
+        index += 1;
+        continue;
+      }
+    }
+
+    if (next) {
+      const mergedNext = mergeMarkdownIRChunks(chunk, next);
+      if (canMergeNext(mergedNext)) {
+        chunks[index + 1] = mergedNext;
+        index += 1;
+        continue;
+      }
+    }
+
+    if (prev && next) {
+      for (let prefixLength = chunkLength - 1; prefixLength >= 1; prefixLength -= 1) {
+        const prefix = sliceMarkdownIR(chunk, 0, prefixLength);
+        const suffix = sliceMarkdownIR(chunk, prefixLength, chunkLength);
+        const mergedPrev = mergeMarkdownIRChunks(prev, prefix);
+        const mergedNext = mergeMarkdownIRChunks(suffix, next);
+        if (canMergePrev(mergedPrev) && canMergeNext(mergedNext)) {
+          coalesced[coalesced.length - 1] = mergedPrev;
+          chunks[index + 1] = mergedNext;
+          break;
+        }
+      }
+    }
+
+    index += 1;
+  }
+
+  return coalesced;
+}
+
 function renderTelegramChunksWithinHtmlLimit(
  ir: MarkdownIR,
  limit: number,
 ): TelegramFormattedChunk[] {
  const normalizedLimit = Math.max(1, Math.floor(limit));
  const pending = chunkMarkdownIR(ir, normalizedLimit);
-  const rendered: TelegramFormattedChunk[] = [];
+  const finalized: MarkdownIR[] = [];
  while (pending.length > 0) {
    const chunk = pending.shift();
    if (!chunk) {
      continue;
    }
-    const html = wrapFileReferencesInHtml(renderTelegramHtml(chunk));
+    const html = renderTelegramChunkHtml(chunk);
    if (html.length <= normalizedLimit || chunk.text.length <= 1) {
-      rendered.push({ html, text: chunk.text });
+      finalized.push(chunk);
      continue;
    }
    const split = splitTelegramChunkByHtmlLimit(chunk, normalizedLimit, html.length);
    if (split.length <= 1) {
      // Worst-case safety: avoid retry loops, deliver the chunk as-is.
-      rendered.push({ html, text: chunk.text });
+      finalized.push(chunk);
      continue;
    }
    pending.unshift(...split);
  }
-  return rendered;
+  return coalesceWhitespaceOnlyMarkdownIRChunks(finalized, normalizedLimit).map((chunk) => ({
+    html: renderTelegramChunkHtml(chunk),
+    text: chunk.text,
+  }));
 }

 export function markdownToTelegramChunks(
--- a/extensions/telegram/src/format.wrap-md.test.ts
+++ b/extensions/telegram/src/format.wrap-md.test.ts
@ -174,6 +174,35 @@ describe("markdownToTelegramChunks - file reference wrapping", () => {
    expect(chunks.map((chunk) => chunk.text).join("")).toBe(input);
    expect(chunks.every((chunk) => chunk.html.length <= 5)).toBe(true);
  });
+
+  it("prefers word boundaries when html-limit retry splits formatted prose", () => {
+    const input = "**Which of these**";
+    const chunks = markdownToTelegramChunks(input, 16);
+    expect(chunks.map((chunk) => chunk.text)).toEqual(["Which of ", "these"]);
+    expect(chunks.every((chunk) => chunk.html.length <= 16)).toBe(true);
+  });
+
+  it("falls back to in-paren word boundaries when the parenthesis is unbalanced", () => {
+    const input = "**foo (bar baz qux quux**";
+    const chunks = markdownToTelegramChunks(input, 20);
+    expect(chunks.map((chunk) => chunk.text)).toEqual(["foo", "(bar baz qux ", "quux"]);
+    expect(chunks.every((chunk) => chunk.html.length <= 20)).toBe(true);
+  });
+
+  it("does not emit whitespace-only chunks during html-limit retry splitting", () => {
+    const input = "**ab  <<**";
+    const chunks = markdownToTelegramChunks(input, 11);
+    expect(chunks.map((chunk) => chunk.text).join("")).toBe("ab  <<");
+    expect(chunks.every((chunk) => chunk.text.trim().length > 0)).toBe(true);
+    expect(chunks.every((chunk) => chunk.html.length <= 11)).toBe(true);
+  });
+
+  it("preserves paragraph separators when retry chunking produces whitespace-only spans", () => {
+    const input = "ab\n\n<<";
+    const chunks = markdownToTelegramChunks(input, 6);
+    expect(chunks.map((chunk) => chunk.text).join("")).toBe(input);
+    expect(chunks.every((chunk) => chunk.html.length <= 6)).toBe(true);
+  });
 });

 describe("edge cases", () => {
--- a/src/gateway/server/ws-connection/connect-policy.test.ts
+++ b/src/gateway/server/ws-connection/connect-policy.test.ts
@ -226,6 +226,30 @@ describe("ws connect policy", () => {
    expect(shouldSkipControlUiPairing(strict, "operator", true)).toBe(true);
  });

+  test("auth.mode=none skips pairing for operator control-ui only", () => {
+    const controlUi = resolveControlUiAuthPolicy({
+      isControlUi: true,
+      controlUiConfig: undefined,
+      deviceRaw: null,
+    });
+    const nonControlUi = resolveControlUiAuthPolicy({
+      isControlUi: false,
+      controlUiConfig: undefined,
+      deviceRaw: null,
+    });
+    // Control UI + operator + auth.mode=none: skip pairing (the fix for #42931)
+    expect(shouldSkipControlUiPairing(controlUi, "operator", false, "none")).toBe(true);
+    // Control UI + node role + auth.mode=none: still require pairing
+    expect(shouldSkipControlUiPairing(controlUi, "node", false, "none")).toBe(false);
+    // Non-Control-UI + operator + auth.mode=none: still require pairing
+    // (prevents #43478 regression where ALL clients bypassed pairing)
+    expect(shouldSkipControlUiPairing(nonControlUi, "operator", false, "none")).toBe(false);
+    // Control UI + operator + auth.mode=shared-key: no change
+    expect(shouldSkipControlUiPairing(controlUi, "operator", false, "shared-key")).toBe(false);
+    // Control UI + operator + no authMode: no change
+    expect(shouldSkipControlUiPairing(controlUi, "operator", false)).toBe(false);
+  });
+
  test("trusted-proxy control-ui bypass only applies to operator + trusted-proxy auth", () => {
    const cases: Array<{
      role: "operator" | "node";
--- a/src/gateway/server/ws-connection/connect-policy.ts
+++ b/src/gateway/server/ws-connection/connect-policy.ts
@ -3,6 +3,7 @@ import type { GatewayRole } from "../../role-policy.js";
 import { roleCanSkipDeviceIdentity } from "../../role-policy.js";

 export type ControlUiAuthPolicy = {
+  isControlUi: boolean;
  allowInsecureAuthConfigured: boolean;
  dangerouslyDisableDeviceAuth: boolean;
  allowBypass: boolean;
@ -24,6 +25,7 @@ export function resolveControlUiAuthPolicy(params: {
  const dangerouslyDisableDeviceAuth =
    params.isControlUi && params.controlUiConfig?.dangerouslyDisableDeviceAuth === true;
  return {
+    isControlUi: params.isControlUi,
    allowInsecureAuthConfigured,
    dangerouslyDisableDeviceAuth,
    // `allowInsecureAuth` must not bypass secure-context/device-auth requirements.
@ -36,10 +38,21 @@ export function shouldSkipControlUiPairing(
  policy: ControlUiAuthPolicy,
  role: GatewayRole,
  trustedProxyAuthOk = false,
+  authMode?: string,
 ): boolean {
  if (trustedProxyAuthOk) {
    return true;
  }
+  // When auth is completely disabled (mode=none), there is no shared secret
+  // or token to gate pairing. Requiring pairing in this configuration adds
+  // friction without security value since any client can already connect
+  // without credentials. Guard with policy.isControlUi because this function
+  // is called for ALL clients (not just Control UI) at the call site.
+  // Scope to operator role so node-role sessions still need device identity
+  // (#43478 was reverted for skipping ALL clients).
+  if (policy.isControlUi && role === "operator" && authMode === "none") {
+    return true;
+  }
  // dangerouslyDisableDeviceAuth is the break-glass path for Control UI
  // operators. Keep pairing aligned with the missing-device bypass, including
  // open-auth deployments where there is no shared token/password to prove.
--- a/src/gateway/server/ws-connection/message-handler.ts
+++ b/src/gateway/server/ws-connection/message-handler.ts
@ -681,7 +681,13 @@ export function attachGatewayWsMessageHandler(params: {
            hasBrowserOriginHeader,
            sharedAuthOk,
            authMethod,
-          }) || shouldSkipControlUiPairing(controlUiAuthPolicy, role, trustedProxyAuthOk);
+          }) ||
+          shouldSkipControlUiPairing(
+            controlUiAuthPolicy,
+            role,
+            trustedProxyAuthOk,
+            resolvedAuth.mode,
+          );
        if (device && devicePublicKey && !skipPairing) {
          const formatAuditList = (items: string[] | undefined): string => {
            if (!items || items.length === 0) {
--- a/src/tts/tts-core.ts
+++ b/src/tts/tts-core.ts
@ -729,7 +729,7 @@ export async function edgeTTS(params: {
    ({ size } = statSync(outputPath));

    if (size === 0) {
-      throw new Error("Edge TTS produced empty audio file after retry");
+      throw new Error("Edge TTS produced empty audio file after retry.");
    }
  }
 }
Author	SHA1	Message	Date
Hiago Silva	d683c4e6a4	Merge `133fb1a300` into `c4265a5f16`	2026-03-15 11:40:34 -03:00
Hiago Silva	133fb1a300	style: fix indentation	2026-03-15 11:40:30 -03:00
Ayaan Zaidi	c4265a5f16	fix: preserve Telegram word boundaries when rechunking HTML (#47274 ) * fix: preserve Telegram chunk word boundaries * fix: address Telegram chunking review feedback * fix: preserve Telegram retry separators * fix: preserve Telegram chunking boundaries (#47274)	2026-03-15 18:10:49 +05:30
Andrew Demczuk	26e0a3ee9a	fix(gateway): skip Control UI pairing when auth.mode=none (closes #42931 ) (#47148 ) When auth is completely disabled (mode=none), requiring device pairing for Control UI operator sessions adds friction without security value since any client can already connect without credentials. Add authMode parameter to shouldSkipControlUiPairing so the bypass fires only for Control UI + operator role + auth.mode=none. This avoids the #43478 regression where a top-level OR disabled pairing for ALL websocket clients.	2026-03-15 13:03:39 +01:00