From 1c8758fbd51d2af197834dc2afdef2bac88f2b93 Mon Sep 17 00:00:00 2001
From: Edward-Qiang-2024 <zhangqiang2011@gmail.com>
Date: Sun, 29 Mar 2026 10:16:52 +0800
Subject: [PATCH] Fix: Correctly estimate CJK character token count in context
 pruner (openclaw#39985)

Verified:
- pnpm install --frozen-lockfile
- pnpm build
- pnpm check
- pnpm test -- src/agents/pi-extensions/context-pruning.test.ts src/utils/cjk-chars.test.ts

Co-authored-by: Edward-Qiang-2024 <176464463+Edward-Qiang-2024@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
---
 CHANGELOG.md                                  |  1 +
 .../pi-extensions/context-pruning.test.ts     | 28 +++++++++++++++++++
 .../pi-extensions/context-pruning/pruner.ts   | 14 ++++++----
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8efc07164cc..dffc3f39c6a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai
 
 - LINE/ACP: add current-conversation binding and inbound binding-routing parity so `/acp spawn ... --thread here`, configured ACP bindings, and active conversation-bound ACP sessions work on LINE like the other conversation channels.
 - TTS/Microsoft: auto-switch the default Edge voice to Chinese for CJK-dominant text without overriding explicitly selected Microsoft voices. (#52355) Thanks @extrasmall0.
+- Agents/context pruning: count supplementary-plane CJK characters with the shared code-point-aware estimator so context pruning stops underestimating Japanese and Chinese text that uses Extension B ideographs. (#39985) Thanks @Edward-Qiang-2024.
 - macOS/local gateway: stop OpenClaw.app from killing healthy local gateway listeners after startup by recognizing the current `openclaw-gateway` process title and using the current `openclaw gateway` launch shape.
 - Memory/QMD: resolve slugified `memory_search` file hints back to the indexed filesystem path before returning search hits, so `memory_get` works again for mixed-case and spaced paths. (#50313) Thanks @erra9x.
 - Memory/QMD: weight CJK-heavy text correctly when estimating chunk sizes, preserve surrogate-pair characters during fine splits, and keep long Latin lines on the old chunk boundaries so memory indexing produces better-sized chunks for CJK notes. (#40271) Thanks @AaronLuo00.
diff --git a/src/agents/pi-extensions/context-pruning.test.ts b/src/agents/pi-extensions/context-pruning.test.ts
index 9dedff97def..76f39a60f08 100644
--- a/src/agents/pi-extensions/context-pruning.test.ts
+++ b/src/agents/pi-extensions/context-pruning.test.ts
@@ -269,6 +269,34 @@ describe("context-pruning", () => {
     expect(toolText(findToolResult(next, "t3"))).toContain("z".repeat(20_000));
   });
 
+  it("accounts for CJK Extension B text when deciding whether to prune", () => {
+    const extensionBText = "𠀀".repeat(50);
+    const messages: AgentMessage[] = [
+      makeUser(extensionBText),
+      makeToolResult({
+        toolCallId: "t1",
+        toolName: "exec",
+        text: "keep me",
+      }),
+    ];
+
+    const next = pruneContextMessages({
+      messages,
+      settings: makeAggressiveSettings({
+        keepLastAssistants: 0,
+        softTrimRatio: 1,
+        hardClearRatio: 1,
+        minPrunableToolChars: 0,
+        hardClear: { enabled: true, placeholder: "[cleared]" },
+      }),
+      ctx: CONTEXT_WINDOW_1000,
+      contextWindowTokensOverride: 40,
+      isToolPrunable: () => true,
+    });
+
+    expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]");
+  });
+
   it("uses contextWindow override when ctx.model is missing", () => {
     const messages = makeSimpleToolPruningMessages(true);
 
diff --git a/src/agents/pi-extensions/context-pruning/pruner.ts b/src/agents/pi-extensions/context-pruning/pruner.ts
index a0f4458f6d4..55a9da89b21 100644
--- a/src/agents/pi-extensions/context-pruning/pruner.ts
+++ b/src/agents/pi-extensions/context-pruning/pruner.ts
@@ -1,10 +1,10 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import type { ImageContent, TextContent, ToolResultMessage } from "@mariozechner/pi-ai";
 import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
+import { CHARS_PER_TOKEN_ESTIMATE, estimateStringChars } from "../../../utils/cjk-chars.js";
 import type { EffectiveContextPruningSettings } from "./settings.js";
 import { makeToolPrunablePredicate } from "./tools.js";
 
-const CHARS_PER_TOKEN_ESTIMATE = 4;
 const IMAGE_CHAR_ESTIMATE = 8_000;
 const PRUNED_CONTEXT_IMAGE_MARKER = "[image removed during context pruning]";
 
@@ -111,11 +111,15 @@ function hasImageBlocks(content: ReadonlyArray<TextContent | ImageContent>): boo
   return false;
 }
 
+function estimateWeightedTextChars(text: string): number {
+  return estimateStringChars(text);
+}
+
 function estimateTextAndImageChars(content: ReadonlyArray<TextContent | ImageContent>): number {
   let chars = 0;
   for (const block of content) {
     if (block.type === "text") {
-      chars += block.text.length;
+      chars += estimateWeightedTextChars(block.text);
     }
     if (block.type === "image") {
       chars += IMAGE_CHAR_ESTIMATE;
@@ -128,7 +132,7 @@ function estimateMessageChars(message: AgentMessage): number {
   if (message.role === "user") {
     const content = message.content;
     if (typeof content === "string") {
-      return content.length;
+      return estimateWeightedTextChars(content);
     }
     return estimateTextAndImageChars(content);
   }
@@ -140,10 +144,10 @@ function estimateMessageChars(message: AgentMessage): number {
         continue;
       }
       if (b.type === "text" && typeof b.text === "string") {
-        chars += b.text.length;
+        chars += estimateWeightedTextChars(b.text);
       }
       if (b.type === "thinking" && typeof b.thinking === "string") {
-        chars += b.thinking.length;
+        chars += estimateWeightedTextChars(b.thinking);
       }
       if (b.type === "toolCall") {
         try {