From 4e63dc0b1c84fd7c0dbffb57cf71a4928cec9ad3 Mon Sep 17 00:00:00 2001 From: ImLukeF <92253590+ImLukeF@users.noreply.github.com> Date: Wed, 1 Apr 2026 18:33:58 +1100 Subject: [PATCH] fix: hide raw provider errors from chat replies --- .../reply/agent-runner-execution.test.ts | 125 +++++++++++++++++- .../reply/agent-runner-execution.ts | 21 ++- 2 files changed, 143 insertions(+), 3 deletions(-) diff --git a/src/auto-reply/reply/agent-runner-execution.test.ts b/src/auto-reply/reply/agent-runner-execution.test.ts index 6cccbc0a365..688ec45fe1b 100644 --- a/src/auto-reply/reply/agent-runner-execution.test.ts +++ b/src/auto-reply/reply/agent-runner-execution.test.ts @@ -9,6 +9,7 @@ import type { TypingSignaler } from "./typing-mode.js"; const state = vi.hoisted(() => ({ runEmbeddedPiAgentMock: vi.fn(), runWithModelFallbackMock: vi.fn(), + isInternalMessageChannelMock: vi.fn((_: unknown) => false), })); vi.mock("../../agents/pi-embedded.js", () => ({ @@ -74,7 +75,7 @@ vi.mock("../../runtime.js", () => ({ vi.mock("../../utils/message-channel.js", () => ({ isMarkdownCapableMessageChannel: () => true, resolveMessageChannel: () => "whatsapp", - isInternalMessageChannel: () => false, + isInternalMessageChannel: (value: unknown) => state.isInternalMessageChannelMock(value), })); vi.mock("../heartbeat.js", () => ({ @@ -167,6 +168,8 @@ describe("runAgentTurnWithFallback", () => { beforeEach(() => { state.runEmbeddedPiAgentMock.mockReset(); state.runWithModelFallbackMock.mockReset(); + state.isInternalMessageChannelMock.mockReset(); + state.isInternalMessageChannelMock.mockReturnValue(false); state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => ({ result: await params.run("anthropic", "claude"), provider: "anthropic", @@ -270,11 +273,129 @@ describe("runAgentTurnWithFallback", () => { expect(result.kind).toBe("final"); if (result.kind === "final") { - expect(result.payload.text).toContain("Agent failed before reply"); + expect(result.payload.text).toContain("Something went wrong while processing your request"); expect(result.payload.text).not.toContain("Rate-limited"); } }); + it("returns a friendly generic error on external chat channels", async () => { + state.runEmbeddedPiAgentMock.mockRejectedValueOnce( + new Error("INVALID_ARGUMENT: some other failure"), + ); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback({ + commandBody: "hello", + followupRun: createFollowupRun(), + sessionCtx: { + Provider: "whatsapp", + MessageSid: "msg", + } as unknown as TemplateContext, + opts: {}, + typingSignals: createMockTypingSignaler(), + blockReplyPipeline: null, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end", + applyReplyToMode: (payload) => payload, + shouldEmitToolResult: () => true, + shouldEmitToolOutput: () => false, + pendingToolTasks: new Set(), + resetSessionAfterCompactionFailure: async () => false, + resetSessionAfterRoleOrderingConflict: async () => false, + isHeartbeat: false, + sessionKey: "main", + getActiveSessionEntry: () => undefined, + resolvedVerboseLevel: "off", + }); + + expect(result.kind).toBe("final"); + if (result.kind === "final") { + expect(result.payload.text).toBe( + "⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.", + ); + } + }); + + it("returns a session reset hint for Bedrock tool mismatch errors on external chat channels", async () => { + state.runEmbeddedPiAgentMock.mockRejectedValueOnce( + new Error( + "The number of toolResult blocks at messages.186.content exceeds the number of toolUse blocks of previous turn.", + ), + ); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback({ + commandBody: "hello", + followupRun: createFollowupRun(), + sessionCtx: { + Provider: "whatsapp", + MessageSid: "msg", + } as unknown as TemplateContext, + opts: {}, + typingSignals: createMockTypingSignaler(), + blockReplyPipeline: null, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end", + applyReplyToMode: (payload) => payload, + shouldEmitToolResult: () => true, + shouldEmitToolOutput: () => false, + pendingToolTasks: new Set(), + resetSessionAfterCompactionFailure: async () => false, + resetSessionAfterRoleOrderingConflict: async () => false, + isHeartbeat: false, + sessionKey: "main", + getActiveSessionEntry: () => undefined, + resolvedVerboseLevel: "off", + }); + + expect(result.kind).toBe("final"); + if (result.kind === "final") { + expect(result.payload.text).toBe( + "⚠️ Session history got out of sync. Please try again, or use /new to start a fresh session.", + ); + } + }); + + it("keeps raw generic errors on internal control surfaces", async () => { + state.isInternalMessageChannelMock.mockReturnValue(true); + state.runEmbeddedPiAgentMock.mockRejectedValueOnce( + new Error("INVALID_ARGUMENT: some other failure"), + ); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback({ + commandBody: "hello", + followupRun: createFollowupRun(), + sessionCtx: { + Provider: "chat", + Surface: "chat", + MessageSid: "msg", + } as unknown as TemplateContext, + opts: {}, + typingSignals: createMockTypingSignaler(), + blockReplyPipeline: null, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end", + applyReplyToMode: (payload) => payload, + shouldEmitToolResult: () => true, + shouldEmitToolOutput: () => false, + pendingToolTasks: new Set(), + resetSessionAfterCompactionFailure: async () => false, + resetSessionAfterRoleOrderingConflict: async () => false, + isHeartbeat: false, + sessionKey: "main", + getActiveSessionEntry: () => undefined, + resolvedVerboseLevel: "off", + }); + + expect(result.kind).toBe("final"); + if (result.kind === "final") { + expect(result.payload.text).toContain("Agent failed before reply"); + expect(result.payload.text).toContain("INVALID_ARGUMENT: some other failure"); + expect(result.payload.text).toContain("Logs: openclaw logs --follow"); + } + }); + it("restarts the active prompt when a live model switch is requested", async () => { let fallbackInvocation = 0; state.runWithModelFallbackMock.mockImplementation( diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 3a0f09c1a5e..455ce16ff67 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -119,6 +119,23 @@ function isPureTransientRateLimitSummary(err: unknown): boolean { ); } +function isToolResultTurnMismatchError(message: string): boolean { + const lower = message.toLowerCase(); + return ( + lower.includes("toolresult") && + lower.includes("tooluse") && + lower.includes("exceeds the number") && + lower.includes("previous turn") + ); +} + +function buildExternalRunFailureText(message: string): string { + if (isToolResultTurnMismatchError(message)) { + return "⚠️ Session history got out of sync. Please try again, or use /new to start a fresh session."; + } + return "⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session."; +} + export async function runAgentTurnWithFallback(params: { commandBody: string; followupRun: FollowupRun; @@ -769,7 +786,9 @@ export async function runAgentTurnWithFallback(params: { ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model." : isRoleOrderingError ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session." - : `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`; + : shouldSurfaceToControlUi + ? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow` + : buildExternalRunFailureText(message); return { kind: "final",