fix: preserve anthropic thinking replay (#58916)

* test: add anthropic thinking replay regressions * fix: preserve anthropic thinking blocks on replay * fix: preserve anthropic thinking replay (#58916) * fix: move anthropic replay changelog entry (#58916)
2026-04-01 16:23:47 +05:30 · 2026-04-01 16:23:47 +05:30 · c65e152b39
parent 00a49fe8b4
commit c65e152b39
9 changed files with 308 additions and 33 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -45,6 +45,7 @@ Docs: https://docs.openclaw.ai
 - Chat/error replies: stop leaking raw provider/runtime failures into external chat channels, return a friendly retry message instead, and add a specific `/new` hint for Bedrock toolResult/toolUse session mismatches. (#58831) Thanks @ImLukeF.
 - Memory/session indexing: keep full reindexes from skipping session transcripts when sync is triggered by `session-start` or `watch`, so restart-driven reindexes preserve session memory (#39732) thanks @upupc
 - Telegram/retries: keep non-idempotent sends on the strict safe-send path, retry wrapped pre-connect failures, and preserve `429` / `retry_after` backoff for safe delivery retries. (#51895) Thanks @chinar-amrutkar
+- Agents/Anthropic: preserve thinking blocks and signatures across replay, cache-control patching, and context pruning so compacted Anthropic sessions continue working instead of failing on later turns. (#58916) Thanks @obviyus

 ## 2026.3.31

--- a/src/agents/pi-embedded-helpers/bootstrap.test.ts
+++ b/src/agents/pi-embedded-helpers/bootstrap.test.ts
@ -0,0 +1,43 @@
+import { describe, expect, it } from "vitest";
+import { stripThoughtSignatures } from "./bootstrap.js";
+
+describe("stripThoughtSignatures", () => {
+  it("preserves thinkingSignature while still stripping invalid thought signatures", () => {
+    const thinkingBlock = {
+      type: "thinking",
+      thinking: "internal",
+      thinkingSignature: "keep_me",
+      thoughtSignature: "msg_123",
+    };
+    const redactedBlock = {
+      type: "redacted_thinking",
+      redacted_thinking: "...",
+      thinkingSignature: "keep_me_too",
+      thoughtSignature: "msg_456",
+    };
+    const textBlock = {
+      type: "text",
+      text: "visible",
+      thoughtSignature: "msg_789",
+    };
+
+    const result = stripThoughtSignatures([thinkingBlock, redactedBlock, textBlock], {
+      includeCamelCase: true,
+    });
+
+    expect(result[0]).toEqual({
+      type: "thinking",
+      thinking: "internal",
+      thinkingSignature: "keep_me",
+    });
+    expect(result[1]).toEqual({
+      type: "redacted_thinking",
+      redacted_thinking: "...",
+      thinkingSignature: "keep_me_too",
+    });
+    expect(result[2]).toEqual({
+      type: "text",
+      text: "visible",
+    });
+  });
+});
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@ -769,17 +769,24 @@ describe("sanitizeSessionHistory", () => {
    ).toBe(false);
  });

-  it("drops assistant thinking blocks for github-copilot models", async () => {
+  it("preserves latest assistant thinking blocks for github-copilot models", async () => {
    setNonGoogleModelApi();

    const messages = makeThinkingAndTextAssistantMessages("reasoning_text");

    const result = await sanitizeGithubCopilotHistory({ messages });
    const assistant = getAssistantMessage(result);
-    expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
+    expect(assistant.content).toEqual([
+      {
+        type: "thinking",
+        thinking: "internal",
+        thinkingSignature: "reasoning_text",
+      },
+      { type: "text", text: "hi" },
+    ]);
  });

-  it("preserves assistant turn when all content is thinking blocks (github-copilot)", async () => {
+  it("preserves latest assistant turn when all content is thinking blocks (github-copilot)", async () => {
    setNonGoogleModelApi();

    const messages: AgentMessage[] = [
@ -796,13 +803,18 @@ describe("sanitizeSessionHistory", () => {

    const result = await sanitizeGithubCopilotHistory({ messages });

-    // Assistant turn should be preserved (not dropped) to maintain turn alternation
    expect(result).toHaveLength(3);
    const assistant = getAssistantMessage(result);
-    expect(assistant.content).toEqual([{ type: "text", text: "" }]);
+    expect(assistant.content).toEqual([
+      {
+        type: "thinking",
+        thinking: "some reasoning",
+        thinkingSignature: "reasoning_text",
+      },
+    ]);
  });

-  it("preserves tool_use blocks when dropping thinking blocks (github-copilot)", async () => {
+  it("preserves thinking blocks alongside tool_use blocks in latest assistant message (github-copilot)", async () => {
    setNonGoogleModelApi();

    const messages: AgentMessage[] = [
@ -820,12 +832,12 @@ describe("sanitizeSessionHistory", () => {

    const result = await sanitizeGithubCopilotHistory({ messages });
    const types = getAssistantContentTypes(result);
+    expect(types).toContain("thinking");
    expect(types).toContain("toolCall");
    expect(types).toContain("text");
-    expect(types).not.toContain("thinking");
  });

-  it("drops assistant thinking blocks for anthropic replay", async () => {
+  it("preserves latest assistant thinking blocks for anthropic replay", async () => {
    setNonGoogleModelApi();

    const messages = makeThinkingAndTextAssistantMessages();
@ -833,10 +845,17 @@ describe("sanitizeSessionHistory", () => {
    const result = await sanitizeAnthropicHistory({ messages });

    const assistant = getAssistantMessage(result);
-    expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
+    expect(assistant.content).toEqual([
+      {
+        type: "thinking",
+        thinking: "internal",
+        thinkingSignature: "some_sig",
+      },
+      { type: "text", text: "hi" },
+    ]);
  });

-  it("drops assistant thinking blocks for amazon-bedrock replay", async () => {
+  it("preserves latest assistant thinking blocks for amazon-bedrock replay", async () => {
    setNonGoogleModelApi();

    const messages = makeThinkingAndTextAssistantMessages();
@ -848,7 +867,14 @@ describe("sanitizeSessionHistory", () => {
    });

    const assistant = getAssistantMessage(result);
-    expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
+    expect(assistant.content).toEqual([
+      {
+        type: "thinking",
+        thinking: "internal",
+        thinkingSignature: "some_sig",
+      },
+      { type: "text", text: "hi" },
+    ]);
  });

  it("does not drop thinking blocks for non-claude copilot models", async () => {
--- a/src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts
+++ b/src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts
@ -89,4 +89,51 @@ describe("extra-params: OpenRouter Anthropic cache_control", () => {

    expect(payload.messages[0].content).toBe("Hello");
  });
+
+  it("does not inject cache_control into thinking blocks", () => {
+    const payload = {
+      messages: [
+        {
+          role: "system",
+          content: [
+            { type: "text", text: "Part 1" },
+            { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+          ],
+        },
+      ],
+    };
+
+    runOpenRouterPayload(payload, "anthropic/claude-opus-4-6");
+
+    expect(payload.messages[0].content).toEqual([
+      { type: "text", text: "Part 1" },
+      { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+    ]);
+  });
+
+  it("removes pre-existing cache_control from assistant thinking blocks", () => {
+    const payload = {
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "thinking",
+              thinking: "internal",
+              thinkingSignature: "sig_1",
+              cache_control: { type: "ephemeral" },
+            },
+            { type: "text", text: "visible" },
+          ],
+        },
+      ],
+    };
+
+    runOpenRouterPayload(payload, "anthropic/claude-opus-4-6");
+
+    expect(payload.messages[0].content).toEqual([
+      { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+      { type: "text", text: "visible" },
+    ]);
+  });
 });
--- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts
+++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts
@ -71,17 +71,32 @@ export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | unde
      const messages = payloadObj.messages;
      if (Array.isArray(messages)) {
        for (const msg of messages as Array<{ role?: string; content?: unknown }>) {
-          if (msg.role !== "system" && msg.role !== "developer") {
+          if (msg.role === "system" || msg.role === "developer") {
+            if (typeof msg.content === "string") {
+              msg.content = [
+                { type: "text", text: msg.content, cache_control: { type: "ephemeral" } },
+              ];
+            } else if (Array.isArray(msg.content) && msg.content.length > 0) {
+              const last = msg.content[msg.content.length - 1];
+              if (last && typeof last === "object") {
+                const record = last as Record<string, unknown>;
+                if (record.type !== "thinking" && record.type !== "redacted_thinking") {
+                  record.cache_control = { type: "ephemeral" };
+                }
+              }
+            }
            continue;
          }
-          if (typeof msg.content === "string") {
-            msg.content = [
-              { type: "text", text: msg.content, cache_control: { type: "ephemeral" } },
-            ];
-          } else if (Array.isArray(msg.content) && msg.content.length > 0) {
-            const last = msg.content[msg.content.length - 1];
-            if (last && typeof last === "object") {
-              (last as Record<string, unknown>).cache_control = { type: "ephemeral" };
+
+          if (msg.role === "assistant" && Array.isArray(msg.content)) {
+            for (const block of msg.content) {
+              if (!block || typeof block !== "object") {
+                continue;
+              }
+              const record = block as Record<string, unknown>;
+              if (record.type === "thinking" || record.type === "redacted_thinking") {
+                delete record.cache_control;
+              }
            }
          }
        }
--- a/src/agents/pi-embedded-runner/thinking.test.ts
+++ b/src/agents/pi-embedded-runner/thinking.test.ts
@ -45,19 +45,53 @@ describe("dropThinkingBlocks", () => {
    expect(result).toBe(messages);
  });

-  it("drops thinking blocks while preserving non-thinking assistant content", () => {
+  it("preserves thinking blocks when the assistant message is the latest assistant turn", () => {
    const { assistant, messages, result } = dropSingleAssistantContent([
      { type: "thinking", thinking: "internal" },
      { type: "text", text: "final" },
    ]);
-    expect(result).not.toBe(messages);
-    expect(assistant.content).toEqual([{ type: "text", text: "final" }]);
+    expect(result).toBe(messages);
+    expect(assistant.content).toEqual([
+      { type: "thinking", thinking: "internal" },
+      { type: "text", text: "final" },
+    ]);
  });

-  it("keeps assistant turn structure when all content blocks were thinking", () => {
+  it("preserves a latest assistant turn even when all content blocks are thinking", () => {
    const { assistant } = dropSingleAssistantContent([
      { type: "thinking", thinking: "internal-only" },
    ]);
-    expect(assistant.content).toEqual([{ type: "text", text: "" }]);
+    expect(assistant.content).toEqual([{ type: "thinking", thinking: "internal-only" }]);
+  });
+
+  it("preserves thinking blocks in the latest assistant message", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "first" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "old" },
+          { type: "text", text: "old text" },
+        ],
+      }),
+      castAgentMessage({ role: "user", content: "second" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" },
+          { type: "text", text: "latest text" },
+        ],
+      }),
+    ];
+
+    const result = dropThinkingBlocks(messages);
+    const firstAssistant = result[1] as Extract<AgentMessage, { role: "assistant" }>;
+    const latestAssistant = result[3] as Extract<AgentMessage, { role: "assistant" }>;
+
+    expect(firstAssistant.content).toEqual([{ type: "text", text: "old text" }]);
+    expect(latestAssistant.content).toEqual([
+      { type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" },
+      { type: "text", text: "latest text" },
+    ]);
  });
 });
--- a/src/agents/pi-embedded-runner/thinking.ts
+++ b/src/agents/pi-embedded-runner/thinking.ts
@ -12,28 +12,54 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is
  );
 }

+function isThinkingBlock(block: AssistantContentBlock): boolean {
+  return (
+    !!block &&
+    typeof block === "object" &&
+    ((block as { type?: unknown }).type === "thinking" ||
+      (block as { type?: unknown }).type === "redacted_thinking")
+  );
+}
+
 /**
- * Strip all `type: "thinking"` content blocks from assistant messages.
+ * Strip `type: "thinking"` and `type: "redacted_thinking"` content blocks from
+ * all assistant messages except the latest one.
 *
- * If an assistant message becomes empty after stripping, it is replaced with
- * a synthetic `{ type: "text", text: "" }` block to preserve turn structure
- * (some providers require strict user/assistant alternation).
+ * Thinking blocks in the latest assistant turn are preserved verbatim so
+ * providers that require replay signatures can continue the conversation.
+ *
+ * If a non-latest assistant message becomes empty after stripping, it is
+ * replaced with a synthetic `{ type: "text", text: "" }` block to preserve
+ * turn structure (some providers require strict user/assistant alternation).
 *
 * Returns the original array reference when nothing was changed (callers can
 * use reference equality to skip downstream work).
 */
 export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
+  let latestAssistantIndex = -1;
+  for (let i = messages.length - 1; i >= 0; i -= 1) {
+    if (isAssistantMessageWithContent(messages[i])) {
+      latestAssistantIndex = i;
+      break;
+    }
+  }
+
  let touched = false;
  const out: AgentMessage[] = [];
-  for (const msg of messages) {
+  for (let i = 0; i < messages.length; i += 1) {
+    const msg = messages[i];
    if (!isAssistantMessageWithContent(msg)) {
      out.push(msg);
      continue;
    }
+    if (i === latestAssistantIndex) {
+      out.push(msg);
+      continue;
+    }
    const nextContent: AssistantContentBlock[] = [];
    let changed = false;
    for (const block of msg.content) {
-      if (block && typeof block === "object" && (block as { type?: unknown }).type === "thinking") {
+      if (isThinkingBlock(block)) {
        touched = true;
        changed = true;
        continue;
--- a/src/agents/pi-hooks/context-pruning/pruner.test.ts
+++ b/src/agents/pi-hooks/context-pruning/pruner.test.ts
@ -123,6 +123,77 @@ describe("pruneContextMessages", () => {
    expect(result).toHaveLength(2);
  });

+  it("counts thinkingSignature bytes when estimating assistant message size", () => {
+    const messages: AgentMessage[] = [
+      makeUser("hello"),
+      makeToolResult([{ type: "text", text: "X".repeat(2_000) }]),
+      makeAssistant([
+        {
+          type: "thinking",
+          thinking: "[redacted]",
+          thinkingSignature: "S".repeat(40_000),
+          redacted: true,
+        } as unknown as AssistantContentBlock,
+        { type: "text", text: "done" },
+      ]),
+    ];
+
+    const result = pruneContextMessages({
+      messages,
+      settings: {
+        ...DEFAULT_CONTEXT_PRUNING_SETTINGS,
+        keepLastAssistants: 1,
+        softTrimRatio: 0.5,
+        softTrim: { maxChars: 200, headChars: 100, tailChars: 50 },
+        hardClear: { ...DEFAULT_CONTEXT_PRUNING_SETTINGS.hardClear, enabled: false },
+      },
+      ctx: { model: { contextWindow: 5_000 } } as unknown as ExtensionContext,
+      isToolPrunable: () => true,
+    });
+
+    const toolResult = result.find((message) => message.role === "toolResult") as Extract<
+      AgentMessage,
+      { role: "toolResult" }
+    >;
+    const textBlock = toolResult.content[0] as { type: "text"; text: string };
+    expect(textBlock.text).toContain("[Tool result trimmed:");
+  });
+
+  it("counts redacted_thinking data bytes when estimating assistant message size", () => {
+    const messages: AgentMessage[] = [
+      makeUser("hello"),
+      makeToolResult([{ type: "text", text: "X".repeat(2_000) }]),
+      makeAssistant([
+        {
+          type: "redacted_thinking",
+          data: "D".repeat(40_000),
+          thinkingSignature: "sig",
+        } as unknown as AssistantContentBlock,
+        { type: "text", text: "done" },
+      ]),
+    ];
+
+    const result = pruneContextMessages({
+      messages,
+      settings: {
+        ...DEFAULT_CONTEXT_PRUNING_SETTINGS,
+        keepLastAssistants: 1,
+        softTrimRatio: 0.5,
+        softTrim: { maxChars: 200, headChars: 100, tailChars: 50 },
+        hardClear: { ...DEFAULT_CONTEXT_PRUNING_SETTINGS.hardClear, enabled: false },
+      },
+      ctx: { model: { contextWindow: 5_000 } } as unknown as ExtensionContext,
+      isToolPrunable: () => true,
+    });
+
+    const toolResult = result.find((message) => message.role === "toolResult") as Extract<
+      AgentMessage,
+      { role: "toolResult" }
+    >;
+    const textBlock = toolResult.content[0] as { type: "text"; text: string };
+    expect(textBlock.text).toContain("[Tool result trimmed:");
+  });
+
  it("soft-trims image-containing tool results by replacing image blocks with placeholders", () => {
    const messages: AgentMessage[] = [
      makeUser("summarize this"),
--- a/src/agents/pi-hooks/context-pruning/pruner.ts
+++ b/src/agents/pi-hooks/context-pruning/pruner.ts
@ -146,8 +146,20 @@ function estimateMessageChars(message: AgentMessage): number {
      if (b.type === "text" && typeof b.text === "string") {
        chars += estimateWeightedTextChars(b.text);
      }
-      if (b.type === "thinking" && typeof b.thinking === "string") {
-        chars += estimateWeightedTextChars(b.thinking);
+      const blockType = (b as { type?: unknown }).type;
+      if (blockType === "thinking" || blockType === "redacted_thinking") {
+        const thinking = (b as { thinking?: unknown }).thinking;
+        if (typeof thinking === "string") {
+          chars += estimateWeightedTextChars(thinking);
+        }
+        const data = (b as { data?: unknown }).data;
+        if (blockType === "redacted_thinking" && typeof data === "string") {
+          chars += estimateWeightedTextChars(data);
+        }
+        const signature = (b as { thinkingSignature?: unknown }).thinkingSignature;
+        if (typeof signature === "string") {
+          chars += estimateWeightedTextChars(signature);
+        }
      }
      if (b.type === "toolCall") {
        try {