From 4fdcacdb2cb034d5e5237c2bc732a0ad44af727e Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Mon, 6 Apr 2026 02:25:17 +0100 Subject: [PATCH] fix(agents): preserve latest read output during compaction --- CHANGELOG.md | 2 +- .../tool-result-context-guard.test.ts | 30 ++++++++++++------- .../tool-result-context-guard.ts | 28 ++++++++++++----- .../tool-result-truncation.test.ts | 16 +--------- .../tool-result-truncation.ts | 8 ++--- 5 files changed, 45 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a902024de12..84de0f548aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -130,7 +130,7 @@ Docs: https://docs.openclaw.ai - Discord: keep REST, webhook, and monitor traffic on the configured proxy, preserve component-only media sends, honor `@everyone` and `@here` mention gates, keep ACK reactions on the active account, and split voice connect/playback timeouts so auto-join is more reliable. (#57465, #60361, #60345) Thanks @geekhuashan. - WhatsApp: restore `channels.whatsapp.blockStreaming` and reset watchdog timeouts after reconnect so quiet chats stop falling into reconnect loops. (#60007, #60069) Thanks @MonkeyLeeT and @mcaxtr. - Memory: keep `memory-core` builtin embedding registration on the already-registered path so selecting `memory-core` no longer recurses through plugin discovery and crashes during startup. (#61402) Thanks @ngutman. -- Agents/tool results: keep larger `read` outputs visible on big-window models by raising the live tool-result ceiling instead of compacting normal file reads right after the first section. Thanks @vincentkoc. +- Agents/tool results: preserve the latest `read` output during tool-result context compaction so fresh file reads stop getting replaced by compacted stubs when older tool output can absorb the overflow budget. Thanks @vincentkoc. - Memory/QMD: prefer modern `qmd collection add --glob`, accept newer single-line JSON hit metadata while keeping legacy line fields, refresh QMD docs/doctor install guidance and model-override guidance, and keep older QMD releases working. Thanks @vincentkoc. - MS Teams: download inline DM images via Graph API and preserve channel reply threading in proactive fallback. (#52212, #55198) Thanks @Ted-developer and @hyojin. - MS Teams: replace the deprecated Teams SDK HttpPlugin stub with `httpServerAdapter` so recurring gateway deprecation warnings stop firing and the Express 5 compatibility workaround stays on the supported SDK path. (#60939) Thanks @coolramukaka-sys. diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts index 61107230289..fb80de18478 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts @@ -110,6 +110,13 @@ function expectReadableToolSlice(text: string, prefix: string) { ).toBe(true); } +function expectCompactedOrPlaceholder(text: string, prefix: string) { + if (text === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER) { + return; + } + expectReadableCompaction(text, prefix); +} + describe("installToolResultContextGuard", () => { it("returns a cloned guarded context so original tool output stays visible", async () => { const agent = makeGuardableAgent(); @@ -124,7 +131,7 @@ describe("installToolResultContextGuard", () => { expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(1_000)); }); - it("keeps readable slices of overflowing tool results before using a placeholder", async () => { + it("keeps at least one readable older slice before falling back to a placeholder", async () => { const agent = makeGuardableAgent(); installToolResultContextGuard({ @@ -149,11 +156,13 @@ describe("installToolResultContextGuard", () => { const third = getToolResultText(transformed[3]); expectReadableCompaction(first, "a"); - expectReadableCompaction(second, "b"); - expect(third).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + expectReadableCompaction(third, "c"); + expect( + second === "b".repeat(800) || second === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER, + ).toBe(true); }); - it("survives repeated large tool results by compacting the newest output each turn", async () => { + it("keeps the newest large tool result visible when an older one can absorb overflow", async () => { const agent = makeGuardableAgent(); installToolResultContextGuard({ @@ -175,11 +184,10 @@ describe("installToolResultContextGuard", () => { .filter((msg) => msg.role === "toolResult") .map((msg) => getToolResultText(msg as AgentMessage)); - // Large outputs are capped per-tool before aggregate compaction kicks in. - expect(toolResultTexts[0]?.length).toBe(50_000); expect(toolResultTexts[0]).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); - expectReadableCompaction(toolResultTexts[3] ?? "", "4"); - expect(toolResultTexts[3]).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); + expectReadableCompaction(toolResultTexts[1] ?? "", "2"); + expectReadableCompaction(toolResultTexts[2] ?? "", "3"); + expectReadableToolSlice(toolResultTexts[3] ?? "", "4"); }); it("truncates an individually oversized tool result with a context-limit notice", async () => { @@ -202,7 +210,7 @@ describe("installToolResultContextGuard", () => { expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); }); - it("keeps compacting newest-first until overflow clears, reaching older tool results when needed", async () => { + it("falls back to compacting the newest tool result when older ones are insufficient", async () => { const agent = makeGuardableAgent(); installToolResultContextGuard({ @@ -220,8 +228,8 @@ describe("installToolResultContextGuard", () => { contextForNextCall, new AbortController().signal, )) as AgentMessage[]; - expectReadableCompaction(getToolResultText(transformed[1]), "x"); - expect(getToolResultText(transformed[2])).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + expectCompactedOrPlaceholder(getToolResultText(transformed[1]), "x"); + expectCompactedOrPlaceholder(getToolResultText(transformed[2]), "y"); }); it("wraps an existing transformContext and guards the transformed output", async () => { diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.ts index 03c30912653..42177eca6e9 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts @@ -145,7 +145,7 @@ function compactToPlaceholderInPlace(params: { } let reduced = 0; - for (let i = messages.length - 1; i >= 0; i--) { + for (const i of resolveToolResultCompactionOrder(messages)) { const msg = messages[i]; if (!isToolResultMessage(msg)) { continue; @@ -215,11 +215,10 @@ function compactExistingToolResultsInPlace(params: { } let reduced = 0; - // Compact newest-first so more of the cached prefix survives: rewriting - // messages[k] for small k invalidates the provider prompt cache from that point onward. - // Keep a truncated slice of newer tool output before falling back to a - // full placeholder so recent, user-visible results remain readable when possible. - for (let i = messages.length - 1; i >= 0; i--) { + // Keep the most recent tool result visible as long as older tool outputs can + // absorb the overflow. Among older tool results, compact newest-first so we + // still preserve as much of the cached prefix as possible. + for (const i of resolveToolResultCompactionOrder(messages)) { const msg = messages[i]; if (!isToolResultMessage(msg)) { continue; @@ -264,6 +263,21 @@ function compactExistingToolResultsInPlace(params: { return reduced; } +function resolveToolResultCompactionOrder(messages: AgentMessage[]): number[] { + const toolResultIndexes: number[] = []; + for (let i = 0; i < messages.length; i += 1) { + if (isToolResultMessage(messages[i])) { + toolResultIndexes.push(i); + } + } + if (toolResultIndexes.length <= 1) { + return toolResultIndexes; + } + const newestIndex = toolResultIndexes[toolResultIndexes.length - 1]; + const olderIndexes = toolResultIndexes.slice(0, -1).toReversed(); + return [...olderIndexes, newestIndex]; +} + function cloneMessagesForGuard(messages: AgentMessage[]): AgentMessage[] { return messages.map( (msg) => ({ ...(msg as unknown as Record) }) as unknown as AgentMessage, @@ -334,7 +348,7 @@ function enforceToolResultContextBudgetInPlace(params: { return; } - // Compact newest tool outputs first so more of the cached prefix survives; + // Prefer compacting older tool outputs before sacrificing the newest one; // stop once the context is back under budget. compactExistingToolResultsInPlace({ messages, diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts index a05b84fe378..556974b3047 100644 --- a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts +++ b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts @@ -199,7 +199,7 @@ describe("calculateMaxToolResultChars", () => { }); it("exports the live cap through both constant names", () => { - expect(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS).toBe(120_000); + expect(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS).toBe(40_000); expect(HARD_MAX_TOOL_RESULT_CHARS).toBe(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS); }); @@ -212,20 +212,6 @@ describe("calculateMaxToolResultChars", () => { const result = calculateMaxToolResultChars(128_000); expect(result).toBe(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS); }); - - it("keeps moderately large reads intact on 128K contexts", () => { - const messages: AgentMessage[] = [ - makeUserMessage("hello"), - makeAssistantMessage("reading changelog"), - makeToolResult("x".repeat(60_000)), - ]; - const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages( - messages, - 128_000, - ); - expect(truncatedCount).toBe(0); - expect(result).toEqual(messages); - }); }); describe("isOversizedToolResult", () => { diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.ts b/src/agents/pi-embedded-runner/tool-result-truncation.ts index 46436560a51..c9684c694c9 100644 --- a/src/agents/pi-embedded-runner/tool-result-truncation.ts +++ b/src/agents/pi-embedded-runner/tool-result-truncation.ts @@ -17,12 +17,10 @@ const MAX_TOOL_RESULT_CONTEXT_SHARE = 0.3; * Default hard cap for a single live tool result text block. * * Pi already truncates tool results aggressively when serializing old history - * for compaction summaries. For the live request path we keep a larger slice so - * the model can still act on recent tool output, especially large read results - * on modern 128K+ context models, while still keeping a bounded request-local - * ceiling that cannot dominate the next turn. + * for compaction summaries. For the live request path we still keep a bounded + * request-local ceiling so oversized tool output cannot dominate the next turn. */ -export const DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS = 120_000; +export const DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS = 40_000; /** * Backwards-compatible alias for older call sites/tests.