fix(agents): preserve latest read output during compaction

2026-04-06 02:25:17 +01:00 · 2026-04-06 02:25:17 +01:00 · 4fdcacdb2c
parent 92fa7ad42a
commit 4fdcacdb2c
5 changed files with 45 additions and 39 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -130,7 +130,7 @@ Docs: https://docs.openclaw.ai
 - Discord: keep REST, webhook, and monitor traffic on the configured proxy, preserve component-only media sends, honor `@everyone` and `@here` mention gates, keep ACK reactions on the active account, and split voice connect/playback timeouts so auto-join is more reliable. (#57465, #60361, #60345) Thanks @geekhuashan.
 - WhatsApp: restore `channels.whatsapp.blockStreaming` and reset watchdog timeouts after reconnect so quiet chats stop falling into reconnect loops. (#60007, #60069) Thanks @MonkeyLeeT and @mcaxtr.
 - Memory: keep `memory-core` builtin embedding registration on the already-registered path so selecting `memory-core` no longer recurses through plugin discovery and crashes during startup. (#61402) Thanks @ngutman.
- Agents/tool results: keep larger `read` outputs visible on big-window models by raising the live tool-result ceiling instead of compacting normal file reads right after the first section. Thanks @vincentkoc.
+- Agents/tool results: preserve the latest `read` output during tool-result context compaction so fresh file reads stop getting replaced by compacted stubs when older tool output can absorb the overflow budget. Thanks @vincentkoc.
 - Memory/QMD: prefer modern `qmd collection add --glob`, accept newer single-line JSON hit metadata while keeping legacy line fields, refresh QMD docs/doctor install guidance and model-override guidance, and keep older QMD releases working. Thanks @vincentkoc.
 - MS Teams: download inline DM images via Graph API and preserve channel reply threading in proactive fallback. (#52212, #55198) Thanks @Ted-developer and @hyojin.
 - MS Teams: replace the deprecated Teams SDK HttpPlugin stub with `httpServerAdapter` so recurring gateway deprecation warnings stop firing and the Express 5 compatibility workaround stays on the supported SDK path. (#60939) Thanks @coolramukaka-sys.
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
@ -110,6 +110,13 @@ function expectReadableToolSlice(text: string, prefix: string) {
  ).toBe(true);
 }

+function expectCompactedOrPlaceholder(text: string, prefix: string) {
+  if (text === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER) {
+    return;
+  }
+  expectReadableCompaction(text, prefix);
+}
+
 describe("installToolResultContextGuard", () => {
  it("returns a cloned guarded context so original tool output stays visible", async () => {
    const agent = makeGuardableAgent();
@ -124,7 +131,7 @@ describe("installToolResultContextGuard", () => {
    expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(1_000));
  });

-  it("keeps readable slices of overflowing tool results before using a placeholder", async () => {
+  it("keeps at least one readable older slice before falling back to a placeholder", async () => {
    const agent = makeGuardableAgent();

    installToolResultContextGuard({
@ -149,11 +156,13 @@ describe("installToolResultContextGuard", () => {
    const third = getToolResultText(transformed[3]);

    expectReadableCompaction(first, "a");
-    expectReadableCompaction(second, "b");
-    expect(third).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+    expectReadableCompaction(third, "c");
+    expect(
+      second === "b".repeat(800) || second === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
+    ).toBe(true);
  });

-  it("survives repeated large tool results by compacting the newest output each turn", async () => {
+  it("keeps the newest large tool result visible when an older one can absorb overflow", async () => {
    const agent = makeGuardableAgent();

    installToolResultContextGuard({
@ -175,11 +184,10 @@ describe("installToolResultContextGuard", () => {
      .filter((msg) => msg.role === "toolResult")
      .map((msg) => getToolResultText(msg as AgentMessage));

-    // Large outputs are capped per-tool before aggregate compaction kicks in.
-    expect(toolResultTexts[0]?.length).toBe(50_000);
    expect(toolResultTexts[0]).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
-    expectReadableCompaction(toolResultTexts[3] ?? "", "4");
-    expect(toolResultTexts[3]).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
+    expectReadableCompaction(toolResultTexts[1] ?? "", "2");
+    expectReadableCompaction(toolResultTexts[2] ?? "", "3");
+    expectReadableToolSlice(toolResultTexts[3] ?? "", "4");
  });

  it("truncates an individually oversized tool result with a context-limit notice", async () => {
@ -202,7 +210,7 @@ describe("installToolResultContextGuard", () => {
    expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
  });

-  it("keeps compacting newest-first until overflow clears, reaching older tool results when needed", async () => {
+  it("falls back to compacting the newest tool result when older ones are insufficient", async () => {
    const agent = makeGuardableAgent();

    installToolResultContextGuard({
@ -220,8 +228,8 @@ describe("installToolResultContextGuard", () => {
      contextForNextCall,
      new AbortController().signal,
    )) as AgentMessage[];
-    expectReadableCompaction(getToolResultText(transformed[1]), "x");
-    expect(getToolResultText(transformed[2])).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+    expectCompactedOrPlaceholder(getToolResultText(transformed[1]), "x");
+    expectCompactedOrPlaceholder(getToolResultText(transformed[2]), "y");
  });

  it("wraps an existing transformContext and guards the transformed output", async () => {
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
@ -145,7 +145,7 @@ function compactToPlaceholderInPlace(params: {
  }

  let reduced = 0;
-  for (let i = messages.length - 1; i >= 0; i--) {
+  for (const i of resolveToolResultCompactionOrder(messages)) {
    const msg = messages[i];
    if (!isToolResultMessage(msg)) {
      continue;
@ -215,11 +215,10 @@ function compactExistingToolResultsInPlace(params: {
  }

  let reduced = 0;
-  // Compact newest-first so more of the cached prefix survives: rewriting
-  // messages[k] for small k invalidates the provider prompt cache from that point onward.
-  // Keep a truncated slice of newer tool output before falling back to a
-  // full placeholder so recent, user-visible results remain readable when possible.
-  for (let i = messages.length - 1; i >= 0; i--) {
+  // Keep the most recent tool result visible as long as older tool outputs can
+  // absorb the overflow. Among older tool results, compact newest-first so we
+  // still preserve as much of the cached prefix as possible.
+  for (const i of resolveToolResultCompactionOrder(messages)) {
    const msg = messages[i];
    if (!isToolResultMessage(msg)) {
      continue;
@ -264,6 +263,21 @@ function compactExistingToolResultsInPlace(params: {
  return reduced;
 }

+function resolveToolResultCompactionOrder(messages: AgentMessage[]): number[] {
+  const toolResultIndexes: number[] = [];
+  for (let i = 0; i < messages.length; i += 1) {
+    if (isToolResultMessage(messages[i])) {
+      toolResultIndexes.push(i);
+    }
+  }
+  if (toolResultIndexes.length <= 1) {
+    return toolResultIndexes;
+  }
+  const newestIndex = toolResultIndexes[toolResultIndexes.length - 1];
+  const olderIndexes = toolResultIndexes.slice(0, -1).toReversed();
+  return [...olderIndexes, newestIndex];
+}
+
 function cloneMessagesForGuard(messages: AgentMessage[]): AgentMessage[] {
  return messages.map(
    (msg) => ({ ...(msg as unknown as Record<string, unknown>) }) as unknown as AgentMessage,
@ -334,7 +348,7 @@ function enforceToolResultContextBudgetInPlace(params: {
    return;
  }

-  // Compact newest tool outputs first so more of the cached prefix survives;
+  // Prefer compacting older tool outputs before sacrificing the newest one;
  // stop once the context is back under budget.
  compactExistingToolResultsInPlace({
    messages,
--- a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts
@ -199,7 +199,7 @@ describe("calculateMaxToolResultChars", () => {
  });

  it("exports the live cap through both constant names", () => {
-    expect(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS).toBe(120_000);
+    expect(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS).toBe(40_000);
    expect(HARD_MAX_TOOL_RESULT_CHARS).toBe(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS);
  });

@ -212,20 +212,6 @@ describe("calculateMaxToolResultChars", () => {
    const result = calculateMaxToolResultChars(128_000);
    expect(result).toBe(DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS);
  });
-
-  it("keeps moderately large reads intact on 128K contexts", () => {
-    const messages: AgentMessage[] = [
-      makeUserMessage("hello"),
-      makeAssistantMessage("reading changelog"),
-      makeToolResult("x".repeat(60_000)),
-    ];
-    const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages(
-      messages,
-      128_000,
-    );
-    expect(truncatedCount).toBe(0);
-    expect(result).toEqual(messages);
-  });
 });

 describe("isOversizedToolResult", () => {
--- a/src/agents/pi-embedded-runner/tool-result-truncation.ts
+++ b/src/agents/pi-embedded-runner/tool-result-truncation.ts
@ -17,12 +17,10 @@ const MAX_TOOL_RESULT_CONTEXT_SHARE = 0.3;
 * Default hard cap for a single live tool result text block.
 *
 * Pi already truncates tool results aggressively when serializing old history
- * for compaction summaries. For the live request path we keep a larger slice so
- * the model can still act on recent tool output, especially large read results
- * on modern 128K+ context models, while still keeping a bounded request-local
- * ceiling that cannot dominate the next turn.
+ * for compaction summaries. For the live request path we still keep a bounded
+ * request-local ceiling so oversized tool output cannot dominate the next turn.
 */
-export const DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS = 120_000;
+export const DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS = 40_000;

 /**
 * Backwards-compatible alias for older call sites/tests.