From 4fdcacdb2cb034d5e5237c2bc732a0ad44af727e Mon Sep 17 00:00:00 2001
From: Vincent Koc <vincentkoc@ieee.org>
Date: Mon, 6 Apr 2026 02:25:17 +0100
Subject: [PATCH] fix(agents): preserve latest read output during compaction

---
 CHANGELOG.md                                  |  2 +-
 .../tool-result-context-guard.test.ts         | 30 ++++++++++++-------
 .../tool-result-context-guard.ts              | 28 ++++++++++++-----
 .../tool-result-truncation.test.ts            | 16 +---------
 .../tool-result-truncation.ts                 |  8 ++---
 5 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a902024de12..84de0f548aa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -130,7 +130,7 @@ Docs: https://docs.openclaw.ai
 - Discord: keep REST, webhook, and monitor traffic on the configured proxy, preserve component-only media sends, honor `@everyone` and `@here` mention gates, keep ACK reactions on the active account, and split voice connect/playback timeouts so auto-join is more reliable. (#57465, #60361, #60345) Thanks @geekhuashan.
 - WhatsApp: restore `channels.whatsapp.blockStreaming` and reset watchdog timeouts after reconnect so quiet chats stop falling into reconnect loops. (#60007, #60069) Thanks @MonkeyLeeT and @mcaxtr.
 - Memory: keep `memory-core` builtin embedding registration on the already-registered path so selecting `memory-core` no longer recurses through plugin discovery and crashes during startup. (#61402) Thanks @ngutman.
-- Agents/tool results: keep larger `read` outputs visible on big-window models by raising the live tool-result ceiling instead of compacting normal file reads right after the first section. Thanks @vincentkoc.
+- Agents/tool results: preserve the latest `read` output during tool-result context compaction so fresh file reads stop getting replaced by compacted stubs when older tool output can absorb the overflow budget. Thanks @vincentkoc.
 - Memory/QMD: prefer modern `qmd collection add --glob`, accept newer single-line JSON hit metadata while keeping legacy line fields, refresh QMD docs/doctor install guidance and model-override guidance, and keep older QMD releases working. Thanks @vincentkoc.
 - MS Teams: download inline DM images via Graph API and preserve channel reply threading in proactive fallback. (#52212, #55198) Thanks @Ted-developer and @hyojin.
 - MS Teams: replace the deprecated Teams SDK HttpPlugin stub with `httpServerAdapter` so recurring gateway deprecation warnings stop firing and the Express 5 compatibility workaround stays on the supported SDK path. (#60939) Thanks @coolramukaka-sys.
diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
index 61107230289..fb80de18478 100644
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
@@ -110,6 +110,13 @@ function expectReadableToolSlice(text: string, prefix: string) {
   ).toBe(true);
 }
 
+function expectCompactedOrPlaceholder(text: string, prefix: string) {
+  if (text === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER) {
+    return;
+  }
+  expectReadableCompaction(text, prefix);
+}
+
 describe("installToolResultContextGuard", () => {
   it("returns a cloned guarded context so original tool output stays visible", async () => {
     const agent = makeGuardableAgent();
@@ -124,7 +131,7 @@ describe("installToolResultContextGuard", () => {
     expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(1_000));
   });
 
-  it("keeps readable slices of overflowing tool results before using a placeholder", async () => {
+  it("keeps at least one readable older slice before falling back to a placeholder", async () => {
     const agent = makeGuardableAgent();
 
     installToolResultContextGuard({
@@ -149,11 +156,13 @@ describe("installToolResultContextGuard", () => {
     const third = getToolResultText(transformed[3]);
 
     expectReadableCompaction(first, "a");
-    expectReadableCompaction(second, "b");
-    expect(third).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+    expectReadableCompaction(third, "c");
+    expect(
+      second === "b".repeat(800) || second === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
+    ).toBe(true);
   });
 
-  it("survives repeated large tool results by compacting the newest output each turn", async () => {
+  it("keeps the newest large tool result visible when an older one can absorb overflow", async () => {
     const agent = makeGuardableAgent();
 
     installToolResultContextGuard({
@@ -175,11 +184,10 @@ describe("installToolResultContextGuard", () => {
       .filter((msg) => msg.role === "toolResult")
       .map((msg) => getToolResultText(msg as AgentMessage));
 
-    // Large outputs are capped per-tool before aggregate compaction kicks in.
-    expect(toolResultTexts[0]?.length).toBe(50_000);
     expect(toolResultTexts[0]).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
-    expectReadableCompaction(toolResultTexts[3] ?? "", "4");
-    expect(toolResultTexts[3]).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
+    expectReadableCompaction(toolResultTexts[1] ?? "", "2");
+    expectReadableCompaction(toolResultTexts[2] ?? "", "3");
+    expectReadableToolSlice(toolResultTexts[3] ?? "", "4");
   });
 
   it("truncates an individually oversized tool result with a context-limit notice", async () => {
@@ -202,7 +210,7 @@ describe("installToolResultContextGuard", () => {
     expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
   });
 
-  it("keeps compacting newest-first until overflow clears, reaching older tool results when needed", async () => {
+  it("falls back to compacting the newest tool result when older ones are insufficient", async () => {
     const agent = makeGuardableAgent();
 
     installToolResultContextGuard({
@@ -220,8 +228,8 @@ describe("installToolResultContextGuard", () => {
       contextForNextCall,
       new AbortController().signal,
     )) as AgentMessage[];
-    expectReadableCompaction(getToolResultText(transformed[1]), "x");
-    expect(getToolResultText(transformed[2])).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+    expectCompactedOrPlaceholder(getToolResultText(transformed[1]), "x");
+    expectCompactedOrPlaceholder(getToolResultText(transformed[2]), "y");
   });
 
   it("wraps an existing transformContext and guards the transformed output", async () => {
diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
index 03c30912653..42177eca6e9 100644
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
@@ -145,7 +145,7 @@ function compactToPlaceholderInPlace(params: {
   }
 
   let reduced = 0;
-  for (let i = messages.length - 1; i >= 0; i--) {
+  for (const i of resolveToolResultCompactionOrder(messages)) {
     const msg = messages[i];
     if (!isToolResultMessage(msg)) {
       continue;
@@ -215,11 +215,10 @@ function compactExistingToolResultsInPlace(params: {
   }
 
   let reduced = 0;
-  // Compact newest-first so more of the cached prefix survives: rewriting
-  // messages[k] for small k invalidates the provider prompt cache from that point onward.
-  // Keep a truncated slice of newer tool output before falling back to a
-  // full placeholder so recent, user-visible results remain readable when possible.
-  for (let i = messages.length - 1; i >= 0; i--) {
+  // Keep the most recent tool result visible as long as older tool outputs can
+  // absorb the overflow. Among older tool results, compact newest-first so we
+  // still preserve as much of the cached prefix as possible.
+  for (const i of resolveToolResultCompactionOrder(messages)) {
     const msg = messages[i];
     if (!isToolResultMessage(msg)) {
       continue;
@@ -264,6 +263,21 @@ function compactExistingToolResultsInPlace(params: {
   return reduced;
 }
 
+function resolveToolResultCompactionOrder(messages: AgentMessage[]): number[] {
+  const toolResultIndexes: number[] = [];
+  for (let i = 0; i < messages.length; i += 1) {
+    if (isToolResultMessage(messages[i])) {
+      toolResultIndexes.push(i);
+    }
+  }
+  if (toolResultIndexes.length <= 1) {
+    return toolResultIndexes;
+  }
+  const newestIndex = toolResultIndexes[toolResultIndexes.length - 1];
+  const olderIndexes = toolResultIndexes.slice(0, -1).toReversed();
+  return [...olderIndexes, newestIndex];
+}
+
 function cloneMessagesForGuard(messages: AgentMessage[]): AgentMessage[] {
   return messages.map(
     (msg) => ({ ...(msg as unknown as Record<string, unknown>) }) as unknown as AgentMessage,
@@ -334,7 +348,7 @@ function enforceToolResultContextBudgetInPlace(params: {
     return;
   }
 
-  // Compact newest tool outputs first so more of the cached prefix survives;
+  // Prefer compacting older tool outputs before sacrificing the newest one;
   // stop once the context is back under budget.
   compactExistingToolResultsInPlace({
     messages,
diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts
index a05b84fe378..556974b3047 100644
--- a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts
@@ -199,7 +199,7 @@ describe("calculateMaxToolResultChars", () => {
   });
 
   it("exports the live cap through both constant names", () => {
-    expect(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS).toBe(120_000);
+    expect(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS).toBe(40_000);
     expect(HARD_MAX_TOOL_RESULT_CHARS).toBe(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS);
   });
 
@@ -212,20 +212,6 @@ describe("calculateMaxToolResultChars", () => {
     const result = calculateMaxToolResultChars(128_000);
     expect(result).toBe(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS);
   });
-
-  it("keeps moderately large reads intact on 128K contexts", () => {
-    const messages: AgentMessage[] = [
-      makeUserMessage("hello"),
-      makeAssistantMessage("reading changelog"),
-      makeToolResult("x".repeat(60_000)),
-    ];
-    const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages(
-      messages,
-      128_000,
-    );
-    expect(truncatedCount).toBe(0);
-    expect(result).toEqual(messages);
-  });
 });
 
 describe("isOversizedToolResult", () => {
diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.ts b/src/agents/pi-embedded-runner/tool-result-truncation.ts
index 46436560a51..c9684c694c9 100644
--- a/src/agents/pi-embedded-runner/tool-result-truncation.ts
+++ b/src/agents/pi-embedded-runner/tool-result-truncation.ts
@@ -17,12 +17,10 @@ const MAX_TOOL_RESULT_CONTEXT_SHARE = 0.3;
  * Default hard cap for a single live tool result text block.
  *
  * Pi already truncates tool results aggressively when serializing old history
- * for compaction summaries. For the live request path we keep a larger slice so
- * the model can still act on recent tool output, especially large read results
- * on modern 128K+ context models, while still keeping a bounded request-local
- * ceiling that cannot dominate the next turn.
+ * for compaction summaries. For the live request path we still keep a bounded
+ * request-local ceiling so oversized tool output cannot dominate the next turn.
  */
-export const DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS = 120_000;
+export const DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS = 40_000;
 
 /**
  * Backwards-compatible alias for older call sites/tests.