mirror of https://github.com/openclaw/openclaw.git
Fix: Correctly estimate CJK character token count in context pruner (openclaw#39985)
Verified: - pnpm install --frozen-lockfile - pnpm build - pnpm check - pnpm test -- src/agents/pi-extensions/context-pruning.test.ts src/utils/cjk-chars.test.ts Co-authored-by: Edward-Qiang-2024 <176464463+Edward-Qiang-2024@users.noreply.github.com> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
parent
7cf87c4e53
commit
1c8758fbd5
|
|
@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai
|
|||
|
||||
- LINE/ACP: add current-conversation binding and inbound binding-routing parity so `/acp spawn ... --thread here`, configured ACP bindings, and active conversation-bound ACP sessions work on LINE like the other conversation channels.
|
||||
- TTS/Microsoft: auto-switch the default Edge voice to Chinese for CJK-dominant text without overriding explicitly selected Microsoft voices. (#52355) Thanks @extrasmall0.
|
||||
- Agents/context pruning: count supplementary-plane CJK characters with the shared code-point-aware estimator so context pruning stops underestimating Japanese and Chinese text that uses Extension B ideographs. (#39985) Thanks @Edward-Qiang-2024.
|
||||
- macOS/local gateway: stop OpenClaw.app from killing healthy local gateway listeners after startup by recognizing the current `openclaw-gateway` process title and using the current `openclaw gateway` launch shape.
|
||||
- Memory/QMD: resolve slugified `memory_search` file hints back to the indexed filesystem path before returning search hits, so `memory_get` works again for mixed-case and spaced paths. (#50313) Thanks @erra9x.
|
||||
- Memory/QMD: weight CJK-heavy text correctly when estimating chunk sizes, preserve surrogate-pair characters during fine splits, and keep long Latin lines on the old chunk boundaries so memory indexing produces better-sized chunks for CJK notes. (#40271) Thanks @AaronLuo00.
|
||||
|
|
|
|||
|
|
@ -269,6 +269,34 @@ describe("context-pruning", () => {
|
|||
expect(toolText(findToolResult(next, "t3"))).toContain("z".repeat(20_000));
|
||||
});
|
||||
|
||||
it("accounts for CJK Extension B text when deciding whether to prune", () => {
|
||||
const extensionBText = "𠀀".repeat(50);
|
||||
const messages: AgentMessage[] = [
|
||||
makeUser(extensionBText),
|
||||
makeToolResult({
|
||||
toolCallId: "t1",
|
||||
toolName: "exec",
|
||||
text: "keep me",
|
||||
}),
|
||||
];
|
||||
|
||||
const next = pruneContextMessages({
|
||||
messages,
|
||||
settings: makeAggressiveSettings({
|
||||
keepLastAssistants: 0,
|
||||
softTrimRatio: 1,
|
||||
hardClearRatio: 1,
|
||||
minPrunableToolChars: 0,
|
||||
hardClear: { enabled: true, placeholder: "[cleared]" },
|
||||
}),
|
||||
ctx: CONTEXT_WINDOW_1000,
|
||||
contextWindowTokensOverride: 40,
|
||||
isToolPrunable: () => true,
|
||||
});
|
||||
|
||||
expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]");
|
||||
});
|
||||
|
||||
it("uses contextWindow override when ctx.model is missing", () => {
|
||||
const messages = makeSimpleToolPruningMessages(true);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { ImageContent, TextContent, ToolResultMessage } from "@mariozechner/pi-ai";
|
||||
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
|
||||
import { CHARS_PER_TOKEN_ESTIMATE, estimateStringChars } from "../../../utils/cjk-chars.js";
|
||||
import type { EffectiveContextPruningSettings } from "./settings.js";
|
||||
import { makeToolPrunablePredicate } from "./tools.js";
|
||||
|
||||
const CHARS_PER_TOKEN_ESTIMATE = 4;
|
||||
const IMAGE_CHAR_ESTIMATE = 8_000;
|
||||
const PRUNED_CONTEXT_IMAGE_MARKER = "[image removed during context pruning]";
|
||||
|
||||
|
|
@ -111,11 +111,15 @@ function hasImageBlocks(content: ReadonlyArray<TextContent | ImageContent>): boo
|
|||
return false;
|
||||
}
|
||||
|
||||
function estimateWeightedTextChars(text: string): number {
|
||||
return estimateStringChars(text);
|
||||
}
|
||||
|
||||
function estimateTextAndImageChars(content: ReadonlyArray<TextContent | ImageContent>): number {
|
||||
let chars = 0;
|
||||
for (const block of content) {
|
||||
if (block.type === "text") {
|
||||
chars += block.text.length;
|
||||
chars += estimateWeightedTextChars(block.text);
|
||||
}
|
||||
if (block.type === "image") {
|
||||
chars += IMAGE_CHAR_ESTIMATE;
|
||||
|
|
@ -128,7 +132,7 @@ function estimateMessageChars(message: AgentMessage): number {
|
|||
if (message.role === "user") {
|
||||
const content = message.content;
|
||||
if (typeof content === "string") {
|
||||
return content.length;
|
||||
return estimateWeightedTextChars(content);
|
||||
}
|
||||
return estimateTextAndImageChars(content);
|
||||
}
|
||||
|
|
@ -140,10 +144,10 @@ function estimateMessageChars(message: AgentMessage): number {
|
|||
continue;
|
||||
}
|
||||
if (b.type === "text" && typeof b.text === "string") {
|
||||
chars += b.text.length;
|
||||
chars += estimateWeightedTextChars(b.text);
|
||||
}
|
||||
if (b.type === "thinking" && typeof b.thinking === "string") {
|
||||
chars += b.thinking.length;
|
||||
chars += estimateWeightedTextChars(b.thinking);
|
||||
}
|
||||
if (b.type === "toolCall") {
|
||||
try {
|
||||
|
|
|
|||
Loading…
Reference in New Issue