From f6380ae4b7886f0cb5cc7dca45e9457017864c39 Mon Sep 17 00:00:00 2001
From: Boris Cherny <boris@performancejs.com>
Date: Fri, 3 Apr 2026 17:19:15 -0700
Subject: [PATCH] fix(cache): compact newest tool results first to preserve
 prompt cache prefix (#58036)

* fix(cache): compact newest tool results first to preserve prompt cache prefix

compactExistingToolResultsInPlace iterated front-to-back, replacing the
oldest tool results with placeholders when context exceeded 75%. This
rewrote messages[k] for small k, invalidating the provider prompt cache
from that point onward on every subsequent turn.

Reverse the loop to compact newest-first. The cached prefix stays intact;
the tradeoff is the model loses recent tool output instead of old, which
is acceptable since this guard only fires as an emergency measure past
the 75% threshold.

* fix(cache): compact newest tool results first to preserve prompt cache prefix (#58036) Thanks @bcherny

---------

Co-authored-by: George Zhang <georgezhangtj97@gmail.com>
---
 CHANGELOG.md                                       |  1 +
 .../tool-result-context-guard.test.ts              | 14 ++++++++------
 .../tool-result-context-guard.ts                   |  7 +++++--
 3 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b2937d914ea..36e320e62c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -48,6 +48,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/runtime: reuse compatible active registries for `web_search` and `web_fetch` provider snapshot resolution so repeated runtime reads do not re-import the same bundled plugin set on each agent message. Related #48380.
 - Infra/tailscale: ignore `OPENCLAW_TEST_TAILSCALE_BINARY` outside explicit test environments and block it from workspace `.env`, so test-only binary overrides cannot be injected through trusted repository state. (#58468) Thanks @eleqtrizit.
 - Plugins/OpenAI: enable reference-image edits for `gpt-image-1` by routing edit calls to `/images/edits` with multipart image uploads, and update image-generation capability/docs metadata accordingly. Thanks @steipete.
+- Cache/context guard: compact newest tool results first so the cached prompt prefix stays byte-identical and avoids full re-tokenization every turn past the 75% context threshold. (#58036) Thanks @bcherny.
 - Agents/tools: include value-shape hints in missing-parameter tool errors so dropped, empty-string, and wrong-type write payloads are easier to diagnose from logs. (#55317) Thanks @priyansh19.
 - Android/assistant: keep queued App Actions prompts pending when auto-send enqueue is rejected, so transient chat-health drops do not silently lose the assistant request. Thanks @obviyus.
 - Plugins/startup: migrate legacy `tools.web.search.<provider>` config before strict startup validation, and record plugin failure phase/timestamp so degraded plugin startup is easier to diagnose from logs and `plugins list`.
diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
index 9f265d3b56e..6fed7c61197 100644
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
@@ -106,7 +106,7 @@ function expectCompactedToolResultsWithoutContextNotice(
 }
 
 describe("installToolResultContextGuard", () => {
-  it("compacts oldest-first when total context overflows, even if each result fits individually", async () => {
+  it("compacts newest-first when total context overflows, even if each result fits individually", async () => {
     const agent = makeGuardableAgent();
     const contextForNextCall = makeTwoToolResultOverflowContext();
     const transformed = await applyGuardToContext(agent, contextForNextCall);
@@ -115,7 +115,7 @@ describe("installToolResultContextGuard", () => {
     expectCompactedToolResultsWithoutContextNotice(contextForNextCall, 1, 2);
   });
 
-  it("keeps compacting oldest-first until context is back under budget", async () => {
+  it("keeps compacting newest-first until context is back under budget", async () => {
     const agent = makeGuardableAgent();
 
     installToolResultContextGuard({
@@ -141,7 +141,7 @@ describe("installToolResultContextGuard", () => {
     expect(third).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
   });
 
-  it("survives repeated large tool results by compacting older outputs before later turns", async () => {
+  it("survives repeated large tool results by compacting the newest output each turn", async () => {
     const agent = makeGuardableAgent();
 
     installToolResultContextGuard({
@@ -159,8 +159,10 @@ describe("installToolResultContextGuard", () => {
       .filter((msg) => msg.role === "toolResult")
       .map((msg) => getToolResultText(msg as AgentMessage));
 
-    expect(toolResultTexts[0]).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-    expect(toolResultTexts[3]?.length).toBe(95_000);
+    // Newest-first compaction: oldest results stay intact to preserve the
+    // cached prefix; the newest overflowing result is compacted.
+    expect(toolResultTexts[0]?.length).toBe(95_000);
+    expect(toolResultTexts[3]).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
     expect(toolResultTexts.join("\n")).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
   });
 
@@ -181,7 +183,7 @@ describe("installToolResultContextGuard", () => {
     expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
   });
 
-  it("keeps compacting oldest-first until overflow clears, including the newest tool result when needed", async () => {
+  it("keeps compacting newest-first until overflow clears, reaching older tool results when needed", async () => {
     const agent = makeGuardableAgent();
 
     installToolResultContextGuard({
diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
index 1ab23ede3cf..35aa50828d6 100644
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
@@ -108,7 +108,9 @@ function compactExistingToolResultsInPlace(params: {
   }
 
   let reduced = 0;
-  for (let i = 0; i < messages.length; i++) {
+  // Compact newest-first so the cached prefix stays intact: rewriting messages[k]
+  // for small k invalidates the provider prompt cache from that point onward.
+  for (let i = messages.length - 1; i >= 0; i--) {
     const msg = messages[i];
     if (!isToolResultMessage(msg)) {
       continue;
@@ -179,7 +181,8 @@ function enforceToolResultContextBudgetInPlace(params: {
     return;
   }
 
-  // Compact oldest tool outputs first until the context is back under budget.
+  // Compact newest tool outputs first to preserve the cached prefix; stop once
+  // the context is back under budget.
   compactExistingToolResultsInPlace({
     messages,
     charsNeeded: currentChars - contextBudgetChars,