diff --git a/CHANGELOG.md b/CHANGELOG.md index be20765ec6a..fe47bfa4257 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ Docs: https://docs.openclaw.ai - Chat/error replies: stop leaking raw provider/runtime failures into external chat channels, return a friendly retry message instead, and add a specific `/new` hint for Bedrock toolResult/toolUse session mismatches. (#58831) Thanks @ImLukeF. - Memory/session indexing: keep full reindexes from skipping session transcripts when sync is triggered by `session-start` or `watch`, so restart-driven reindexes preserve session memory (#39732) thanks @upupc - Telegram/retries: keep non-idempotent sends on the strict safe-send path, retry wrapped pre-connect failures, and preserve `429` / `retry_after` backoff for safe delivery retries. (#51895) Thanks @chinar-amrutkar +- Agents/Anthropic: preserve thinking blocks and signatures across replay, cache-control patching, and context pruning so compacted Anthropic sessions continue working instead of failing on later turns. (#58916) Thanks @obviyus ## 2026.3.31 diff --git a/src/agents/pi-embedded-helpers/bootstrap.test.ts b/src/agents/pi-embedded-helpers/bootstrap.test.ts new file mode 100644 index 00000000000..61a9ca48cfb --- /dev/null +++ b/src/agents/pi-embedded-helpers/bootstrap.test.ts @@ -0,0 +1,43 @@ +import { describe, expect, it } from "vitest"; +import { stripThoughtSignatures } from "./bootstrap.js"; + +describe("stripThoughtSignatures", () => { + it("preserves thinkingSignature while still stripping invalid thought signatures", () => { + const thinkingBlock = { + type: "thinking", + thinking: "internal", + thinkingSignature: "keep_me", + thoughtSignature: "msg_123", + }; + const redactedBlock = { + type: "redacted_thinking", + redacted_thinking: "...", + thinkingSignature: "keep_me_too", + thoughtSignature: "msg_456", + }; + const textBlock = { + type: "text", + text: "visible", + thoughtSignature: "msg_789", + }; + + const result = stripThoughtSignatures([thinkingBlock, redactedBlock, textBlock], { + includeCamelCase: true, + }); + + expect(result[0]).toEqual({ + type: "thinking", + thinking: "internal", + thinkingSignature: "keep_me", + }); + expect(result[1]).toEqual({ + type: "redacted_thinking", + redacted_thinking: "...", + thinkingSignature: "keep_me_too", + }); + expect(result[2]).toEqual({ + type: "text", + text: "visible", + }); + }); +}); diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 0e42d3df91c..e7c869b98e9 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -769,17 +769,24 @@ describe("sanitizeSessionHistory", () => { ).toBe(false); }); - it("drops assistant thinking blocks for github-copilot models", async () => { + it("preserves latest assistant thinking blocks for github-copilot models", async () => { setNonGoogleModelApi(); const messages = makeThinkingAndTextAssistantMessages("reasoning_text"); const result = await sanitizeGithubCopilotHistory({ messages }); const assistant = getAssistantMessage(result); - expect(assistant.content).toEqual([{ type: "text", text: "hi" }]); + expect(assistant.content).toEqual([ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "reasoning_text", + }, + { type: "text", text: "hi" }, + ]); }); - it("preserves assistant turn when all content is thinking blocks (github-copilot)", async () => { + it("preserves latest assistant turn when all content is thinking blocks (github-copilot)", async () => { setNonGoogleModelApi(); const messages: AgentMessage[] = [ @@ -796,13 +803,18 @@ describe("sanitizeSessionHistory", () => { const result = await sanitizeGithubCopilotHistory({ messages }); - // Assistant turn should be preserved (not dropped) to maintain turn alternation expect(result).toHaveLength(3); const assistant = getAssistantMessage(result); - expect(assistant.content).toEqual([{ type: "text", text: "" }]); + expect(assistant.content).toEqual([ + { + type: "thinking", + thinking: "some reasoning", + thinkingSignature: "reasoning_text", + }, + ]); }); - it("preserves tool_use blocks when dropping thinking blocks (github-copilot)", async () => { + it("preserves thinking blocks alongside tool_use blocks in latest assistant message (github-copilot)", async () => { setNonGoogleModelApi(); const messages: AgentMessage[] = [ @@ -820,12 +832,12 @@ describe("sanitizeSessionHistory", () => { const result = await sanitizeGithubCopilotHistory({ messages }); const types = getAssistantContentTypes(result); + expect(types).toContain("thinking"); expect(types).toContain("toolCall"); expect(types).toContain("text"); - expect(types).not.toContain("thinking"); }); - it("drops assistant thinking blocks for anthropic replay", async () => { + it("preserves latest assistant thinking blocks for anthropic replay", async () => { setNonGoogleModelApi(); const messages = makeThinkingAndTextAssistantMessages(); @@ -833,10 +845,17 @@ describe("sanitizeSessionHistory", () => { const result = await sanitizeAnthropicHistory({ messages }); const assistant = getAssistantMessage(result); - expect(assistant.content).toEqual([{ type: "text", text: "hi" }]); + expect(assistant.content).toEqual([ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "some_sig", + }, + { type: "text", text: "hi" }, + ]); }); - it("drops assistant thinking blocks for amazon-bedrock replay", async () => { + it("preserves latest assistant thinking blocks for amazon-bedrock replay", async () => { setNonGoogleModelApi(); const messages = makeThinkingAndTextAssistantMessages(); @@ -848,7 +867,14 @@ describe("sanitizeSessionHistory", () => { }); const assistant = getAssistantMessage(result); - expect(assistant.content).toEqual([{ type: "text", text: "hi" }]); + expect(assistant.content).toEqual([ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "some_sig", + }, + { type: "text", text: "hi" }, + ]); }); it("does not drop thinking blocks for non-claude copilot models", async () => { diff --git a/src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts b/src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts index 08010bb0b20..716ee888f4c 100644 --- a/src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts +++ b/src/agents/pi-embedded-runner/extra-params.openrouter-cache-control.test.ts @@ -89,4 +89,51 @@ describe("extra-params: OpenRouter Anthropic cache_control", () => { expect(payload.messages[0].content).toBe("Hello"); }); + + it("does not inject cache_control into thinking blocks", () => { + const payload = { + messages: [ + { + role: "system", + content: [ + { type: "text", text: "Part 1" }, + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + ], + }, + ], + }; + + runOpenRouterPayload(payload, "anthropic/claude-opus-4-6"); + + expect(payload.messages[0].content).toEqual([ + { type: "text", text: "Part 1" }, + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + ]); + }); + + it("removes pre-existing cache_control from assistant thinking blocks", () => { + const payload = { + messages: [ + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "sig_1", + cache_control: { type: "ephemeral" }, + }, + { type: "text", text: "visible" }, + ], + }, + ], + }; + + runOpenRouterPayload(payload, "anthropic/claude-opus-4-6"); + + expect(payload.messages[0].content).toEqual([ + { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" }, + { type: "text", text: "visible" }, + ]); + }); }); diff --git a/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts b/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts index 70202ec05a1..8ea6691ce58 100644 --- a/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/proxy-stream-wrappers.ts @@ -71,17 +71,32 @@ export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | unde const messages = payloadObj.messages; if (Array.isArray(messages)) { for (const msg of messages as Array<{ role?: string; content?: unknown }>) { - if (msg.role !== "system" && msg.role !== "developer") { + if (msg.role === "system" || msg.role === "developer") { + if (typeof msg.content === "string") { + msg.content = [ + { type: "text", text: msg.content, cache_control: { type: "ephemeral" } }, + ]; + } else if (Array.isArray(msg.content) && msg.content.length > 0) { + const last = msg.content[msg.content.length - 1]; + if (last && typeof last === "object") { + const record = last as Record; + if (record.type !== "thinking" && record.type !== "redacted_thinking") { + record.cache_control = { type: "ephemeral" }; + } + } + } continue; } - if (typeof msg.content === "string") { - msg.content = [ - { type: "text", text: msg.content, cache_control: { type: "ephemeral" } }, - ]; - } else if (Array.isArray(msg.content) && msg.content.length > 0) { - const last = msg.content[msg.content.length - 1]; - if (last && typeof last === "object") { - (last as Record).cache_control = { type: "ephemeral" }; + + if (msg.role === "assistant" && Array.isArray(msg.content)) { + for (const block of msg.content) { + if (!block || typeof block !== "object") { + continue; + } + const record = block as Record; + if (record.type === "thinking" || record.type === "redacted_thinking") { + delete record.cache_control; + } } } } diff --git a/src/agents/pi-embedded-runner/thinking.test.ts b/src/agents/pi-embedded-runner/thinking.test.ts index e3d0a8291b6..09eace9c68f 100644 --- a/src/agents/pi-embedded-runner/thinking.test.ts +++ b/src/agents/pi-embedded-runner/thinking.test.ts @@ -45,19 +45,53 @@ describe("dropThinkingBlocks", () => { expect(result).toBe(messages); }); - it("drops thinking blocks while preserving non-thinking assistant content", () => { + it("preserves thinking blocks when the assistant message is the latest assistant turn", () => { const { assistant, messages, result } = dropSingleAssistantContent([ { type: "thinking", thinking: "internal" }, { type: "text", text: "final" }, ]); - expect(result).not.toBe(messages); - expect(assistant.content).toEqual([{ type: "text", text: "final" }]); + expect(result).toBe(messages); + expect(assistant.content).toEqual([ + { type: "thinking", thinking: "internal" }, + { type: "text", text: "final" }, + ]); }); - it("keeps assistant turn structure when all content blocks were thinking", () => { + it("preserves a latest assistant turn even when all content blocks are thinking", () => { const { assistant } = dropSingleAssistantContent([ { type: "thinking", thinking: "internal-only" }, ]); - expect(assistant.content).toEqual([{ type: "text", text: "" }]); + expect(assistant.content).toEqual([{ type: "thinking", thinking: "internal-only" }]); + }); + + it("preserves thinking blocks in the latest assistant message", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ role: "user", content: "first" }), + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "old" }, + { type: "text", text: "old text" }, + ], + }), + castAgentMessage({ role: "user", content: "second" }), + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" }, + { type: "text", text: "latest text" }, + ], + }), + ]; + + const result = dropThinkingBlocks(messages); + const firstAssistant = result[1] as Extract; + const latestAssistant = result[3] as Extract; + + expect(firstAssistant.content).toEqual([{ type: "text", text: "old text" }]); + expect(latestAssistant.content).toEqual([ + { type: "thinking", thinking: "latest", thinkingSignature: "sig_latest" }, + { type: "text", text: "latest text" }, + ]); }); }); diff --git a/src/agents/pi-embedded-runner/thinking.ts b/src/agents/pi-embedded-runner/thinking.ts index f503fd3f164..98238a53e2a 100644 --- a/src/agents/pi-embedded-runner/thinking.ts +++ b/src/agents/pi-embedded-runner/thinking.ts @@ -12,28 +12,54 @@ export function isAssistantMessageWithContent(message: AgentMessage): message is ); } +function isThinkingBlock(block: AssistantContentBlock): boolean { + return ( + !!block && + typeof block === "object" && + ((block as { type?: unknown }).type === "thinking" || + (block as { type?: unknown }).type === "redacted_thinking") + ); +} + /** - * Strip all `type: "thinking"` content blocks from assistant messages. + * Strip `type: "thinking"` and `type: "redacted_thinking"` content blocks from + * all assistant messages except the latest one. * - * If an assistant message becomes empty after stripping, it is replaced with - * a synthetic `{ type: "text", text: "" }` block to preserve turn structure - * (some providers require strict user/assistant alternation). + * Thinking blocks in the latest assistant turn are preserved verbatim so + * providers that require replay signatures can continue the conversation. + * + * If a non-latest assistant message becomes empty after stripping, it is + * replaced with a synthetic `{ type: "text", text: "" }` block to preserve + * turn structure (some providers require strict user/assistant alternation). * * Returns the original array reference when nothing was changed (callers can * use reference equality to skip downstream work). */ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] { + let latestAssistantIndex = -1; + for (let i = messages.length - 1; i >= 0; i -= 1) { + if (isAssistantMessageWithContent(messages[i])) { + latestAssistantIndex = i; + break; + } + } + let touched = false; const out: AgentMessage[] = []; - for (const msg of messages) { + for (let i = 0; i < messages.length; i += 1) { + const msg = messages[i]; if (!isAssistantMessageWithContent(msg)) { out.push(msg); continue; } + if (i === latestAssistantIndex) { + out.push(msg); + continue; + } const nextContent: AssistantContentBlock[] = []; let changed = false; for (const block of msg.content) { - if (block && typeof block === "object" && (block as { type?: unknown }).type === "thinking") { + if (isThinkingBlock(block)) { touched = true; changed = true; continue; diff --git a/src/agents/pi-hooks/context-pruning/pruner.test.ts b/src/agents/pi-hooks/context-pruning/pruner.test.ts index a847bff0e8c..f122fc8e973 100644 --- a/src/agents/pi-hooks/context-pruning/pruner.test.ts +++ b/src/agents/pi-hooks/context-pruning/pruner.test.ts @@ -123,6 +123,77 @@ describe("pruneContextMessages", () => { expect(result).toHaveLength(2); }); + it("counts thinkingSignature bytes when estimating assistant message size", () => { + const messages: AgentMessage[] = [ + makeUser("hello"), + makeToolResult([{ type: "text", text: "X".repeat(2_000) }]), + makeAssistant([ + { + type: "thinking", + thinking: "[redacted]", + thinkingSignature: "S".repeat(40_000), + redacted: true, + } as unknown as AssistantContentBlock, + { type: "text", text: "done" }, + ]), + ]; + + const result = pruneContextMessages({ + messages, + settings: { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 1, + softTrimRatio: 0.5, + softTrim: { maxChars: 200, headChars: 100, tailChars: 50 }, + hardClear: { ...DEFAULT_CONTEXT_PRUNING_SETTINGS.hardClear, enabled: false }, + }, + ctx: { model: { contextWindow: 5_000 } } as unknown as ExtensionContext, + isToolPrunable: () => true, + }); + + const toolResult = result.find((message) => message.role === "toolResult") as Extract< + AgentMessage, + { role: "toolResult" } + >; + const textBlock = toolResult.content[0] as { type: "text"; text: string }; + expect(textBlock.text).toContain("[Tool result trimmed:"); + }); + + it("counts redacted_thinking data bytes when estimating assistant message size", () => { + const messages: AgentMessage[] = [ + makeUser("hello"), + makeToolResult([{ type: "text", text: "X".repeat(2_000) }]), + makeAssistant([ + { + type: "redacted_thinking", + data: "D".repeat(40_000), + thinkingSignature: "sig", + } as unknown as AssistantContentBlock, + { type: "text", text: "done" }, + ]), + ]; + + const result = pruneContextMessages({ + messages, + settings: { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 1, + softTrimRatio: 0.5, + softTrim: { maxChars: 200, headChars: 100, tailChars: 50 }, + hardClear: { ...DEFAULT_CONTEXT_PRUNING_SETTINGS.hardClear, enabled: false }, + }, + ctx: { model: { contextWindow: 5_000 } } as unknown as ExtensionContext, + isToolPrunable: () => true, + }); + + const toolResult = result.find((message) => message.role === "toolResult") as Extract< + AgentMessage, + { role: "toolResult" } + >; + const textBlock = toolResult.content[0] as { type: "text"; text: string }; + expect(textBlock.text).toContain("[Tool result trimmed:"); + }); + it("soft-trims image-containing tool results by replacing image blocks with placeholders", () => { const messages: AgentMessage[] = [ makeUser("summarize this"), diff --git a/src/agents/pi-hooks/context-pruning/pruner.ts b/src/agents/pi-hooks/context-pruning/pruner.ts index 55a9da89b21..06017e92ff1 100644 --- a/src/agents/pi-hooks/context-pruning/pruner.ts +++ b/src/agents/pi-hooks/context-pruning/pruner.ts @@ -146,8 +146,20 @@ function estimateMessageChars(message: AgentMessage): number { if (b.type === "text" && typeof b.text === "string") { chars += estimateWeightedTextChars(b.text); } - if (b.type === "thinking" && typeof b.thinking === "string") { - chars += estimateWeightedTextChars(b.thinking); + const blockType = (b as { type?: unknown }).type; + if (blockType === "thinking" || blockType === "redacted_thinking") { + const thinking = (b as { thinking?: unknown }).thinking; + if (typeof thinking === "string") { + chars += estimateWeightedTextChars(thinking); + } + const data = (b as { data?: unknown }).data; + if (blockType === "redacted_thinking" && typeof data === "string") { + chars += estimateWeightedTextChars(data); + } + const signature = (b as { thinkingSignature?: unknown }).thinkingSignature; + if (typeof signature === "string") { + chars += estimateWeightedTextChars(signature); + } } if (b.type === "toolCall") { try {