From b1c982bb2d8d56a45891c00158afa4dde95827ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8A=A9=E7=88=AA?= Date: Fri, 27 Mar 2026 09:51:39 -0400 Subject: [PATCH] fix(agents): fail over and sanitize Codex server_error payloads (#42892) Merged via squash. Prepared head SHA: 6db9a5f02da20f17d9f63354fc0a100483156f5c Co-authored-by: xaeon2026 <264572156+xaeon2026@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf --- CHANGELOG.md | 1 + ...d-helpers.formatassistanterrortext.test.ts | 8 ++- ...dded-helpers.isbillingerrormessage.test.ts | 5 ++ ...ded-helpers.sanitizeuserfacingtext.test.ts | 40 +++++++++++ src/agents/pi-embedded-helpers/errors.ts | 47 ++++++++++++- .../pi-embedded-helpers/failover-matches.ts | 17 +++++ .../run.codex-server-error-fallback.test.ts | 70 +++++++++++++++++++ .../run.overflow-compaction.harness.ts | 64 +++++++++++++---- src/shared/assistant-error-format.ts | 5 +- src/tui/tui-formatters.test.ts | 2 +- 10 files changed, 241 insertions(+), 18 deletions(-) create mode 100644 src/agents/pi-embedded-runner/run.codex-server-error-fallback.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 40ef8ac411d..9034bad4d54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -84,6 +84,7 @@ Docs: https://docs.openclaw.ai - TUI/chat log: keep system messages as single logical entries and prune overflow at whole-message boundaries so wrapped system spacing stays intact. (#55732) Thanks @shakkernerd. - TUI/activation: validate `/activation` arguments in the TUI and reject invalid values instead of silently coercing them to `mention`. (#55733) Thanks @shakkernerd. - Agents/model switching: apply `/model` changes to active embedded runs at the next safe retry boundary, so overloaded or retrying turns switch to the newly selected model instead of staying pinned to the old provider. +- Agents/Codex fallback: classify Codex `server_error` payloads as failoverable, sanitize `Codex error:` payloads before they reach chat, preserve context-overflow guidance for prefixed `invalid_request_error` payloads, and omit provider `request_id` values from user-facing UI copy. (#42892) Thanks @xaeon2026. ## 2026.3.24 diff --git a/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts b/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts index cef0c6a4d35..c65774697b7 100644 --- a/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts +++ b/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts @@ -82,6 +82,12 @@ describe("formatAssistantErrorText", () => { ); expect(formatAssistantErrorText(msg)).toBe("LLM error server_error: Something exploded"); }); + it("sanitizes Codex error-prefixed JSON payloads", () => { + const msg = makeAssistantError( + 'Codex error: {"type":"error","error":{"message":"Something exploded","type":"server_error"},"sequence_number":2}', + ); + expect(formatAssistantErrorText(msg)).toBe("LLM error server_error: Something exploded"); + }); it("returns a friendly billing message for credit balance errors", () => { const msg = makeAssistantError("Your credit balance is too low to access the Anthropic API."); const result = formatAssistantErrorText(msg); @@ -209,7 +215,7 @@ describe("formatRawAssistantErrorForUi", () => { expect(text).toContain("HTTP 429"); expect(text).toContain("rate_limit_error"); expect(text).toContain("Rate limited."); - expect(text).toContain("req_123"); + expect(text).not.toContain("req_123"); }); it("renders a generic unknown error message when raw is empty", () => { diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 30190c818c8..b5058ce40c4 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -794,6 +794,11 @@ describe("classifyFailoverReason", () => { "521 Web server is downCloudflare", ), ).toBe("timeout"); + expect( + classifyFailoverReason( + 'Codex error: {"type":"error","error":{"type":"server_error","code":"server_error","message":"An error occurred while processing your request."},"sequence_number":2}', + ), + ).toBe("timeout"); expect(classifyFailoverReason("string should match pattern")).toBe("format"); expect(classifyFailoverReason("bad request")).toBeNull(); expect( diff --git a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts index 82fe67c47f4..850bed8d535 100644 --- a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts +++ b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts @@ -82,6 +82,46 @@ describe("sanitizeUserFacingText", () => { ); }); + it("does not rewrite unprefixed raw API payloads without explicit errorContext", () => { + const raw = '{"type":"error","error":{"type":"server_error","message":"Something exploded"}}'; + expect(sanitizeUserFacingText(raw)).toBe(raw); + }); + + it("sanitizes Codex error-prefixed API payloads", () => { + const raw = + 'Codex error: {"type":"error","error":{"type":"server_error","message":"Something exploded"},"sequence_number":2}'; + expect(sanitizeUserFacingText(raw, { errorContext: true })).toBe( + "LLM error server_error: Something exploded", + ); + }); + + it("sanitizes Codex error-prefixed API payloads without explicit errorContext", () => { + const raw = + 'Codex error: {"type":"error","error":{"type":"server_error","message":"Something exploded"},"sequence_number":2}'; + expect(sanitizeUserFacingText(raw)).toBe("LLM error server_error: Something exploded"); + }); + + it("keeps regular JSON examples intact without explicit errorContext", () => { + const raw = '{"error":{"type":"validation_error","message":"showing an example payload"}}'; + expect(sanitizeUserFacingText(raw)).toBe(raw); + }); + + it("preserves specialized context overflow guidance for raw API payloads", () => { + const raw = + '{"type":"error","error":{"type":"invalid_request_error","message":"Request size exceeds model context window"}}'; + expect(sanitizeUserFacingText(raw, { errorContext: true })).toContain( + "Context overflow: prompt too large for the model.", + ); + }); + + it("preserves specialized context overflow guidance for Codex-prefixed API payloads", () => { + const raw = + 'Codex error: {"type":"error","error":{"type":"invalid_request_error","message":"Request size exceeds model context window"}}'; + expect(sanitizeUserFacingText(raw, { errorContext: true })).toContain( + "Context overflow: prompt too large for the model.", + ); + }); + it("returns a friendly message for rate limit errors in Error: prefixed payloads", () => { expect(sanitizeUserFacingText("Error: 429 Rate limit exceeded", { errorContext: true })).toBe( "⚠️ API rate limit reached. Please try again later.", diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 0167e7f8d0f..3300eccfddc 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -23,6 +23,7 @@ import { isOverloadedErrorMessage, isPeriodicUsageLimitErrorMessage, isRateLimitErrorMessage, + isServerErrorMessage, isTimeoutErrorMessage, matchesFormatErrorPattern, } from "./failover-matches.js"; @@ -34,6 +35,7 @@ export { isBillingErrorMessage, isOverloadedErrorMessage, isRateLimitErrorMessage, + isServerErrorMessage, isTimeoutErrorMessage, } from "./failover-matches.js"; @@ -320,7 +322,7 @@ export function extractObservedOverflowTokenCount(errorMessage?: string): number const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/gi; const ERROR_PREFIX_RE = - /^(?:error|(?:[a-z][\w-]*\s+)?api\s*error|openai\s*error|anthropic\s*error|gateway\s*error|request failed|failed|exception)(?:\s+\d{3})?[:\s-]+/i; + /^(?:error|(?:[a-z][\w-]*\s+)?api\s*error|openai\s*error|anthropic\s*error|gateway\s*error|codex\s*error|request failed|failed|exception)(?:\s+\d{3})?[:\s-]+/i; const CONTEXT_OVERFLOW_ERROR_HEAD_RE = /^(?:context overflow:|request_too_large\b|request size exceeds\b|request exceeds the maximum size\b|context length exceeded\b|maximum context length\b|prompt is too long\b|exceeds model context window\b)/i; const TRANSIENT_HTTP_ERROR_CODES = new Set([499, 500, 502, 503, 504, 521, 522, 523, 524, 529]); @@ -589,6 +591,40 @@ export function isRawApiErrorPayload(raw?: string): boolean { return getApiErrorPayloadFingerprint(raw) !== null; } +function isLikelyProviderErrorType(type?: string): boolean { + const normalized = type?.trim().toLowerCase(); + if (!normalized) { + return false; + } + return normalized.endsWith("_error"); +} + +const NON_ERROR_PROVIDER_PAYLOAD_MAX_LENGTH = 16_384; +const NON_ERROR_PROVIDER_PAYLOAD_PREFIX_RE = /^codex\s*error(?:\s+\d{3})?[:\s-]+/i; + +function shouldRewriteRawPayloadWithoutErrorContext(raw: string): boolean { + if (raw.length > NON_ERROR_PROVIDER_PAYLOAD_MAX_LENGTH) { + return false; + } + if (!NON_ERROR_PROVIDER_PAYLOAD_PREFIX_RE.test(raw)) { + return false; + } + const info = parseApiErrorInfo(raw); + if (!info) { + return false; + } + if (isLikelyProviderErrorType(info.type)) { + return true; + } + if (info.httpCode) { + const parsedCode = Number(info.httpCode); + if (Number.isFinite(parsedCode) && parsedCode >= 400) { + return true; + } + } + return false; +} + export function formatAssistantErrorText( msg: AssistantMessage, opts?: { cfg?: OpenClawConfig; sessionKey?: string; provider?: string; model?: string }, @@ -695,6 +731,12 @@ export function sanitizeUserFacingText(text: string, opts?: { errorContext?: boo return ""; } + // Provider error payloads should not leak directly into user-visible text even + // when a stream chunk was not explicitly flagged as an error. + if (!errorContext && shouldRewriteRawPayloadWithoutErrorContext(trimmed)) { + return formatRawAssistantErrorForUi(trimmed); + } + // Only apply error-pattern rewrites when the caller knows this text is an error payload. // Otherwise we risk swallowing legitimate assistant text that merely *mentions* these errors. if (errorContext) { @@ -965,6 +1007,9 @@ export function classifyFailoverReason(raw: string): FailoverReason | null { if (isAuthErrorMessage(raw)) { return "auth"; } + if (isServerErrorMessage(raw)) { + return "timeout"; + } if (isJsonApiInternalServerError(raw)) { return "timeout"; } diff --git a/src/agents/pi-embedded-helpers/failover-matches.ts b/src/agents/pi-embedded-helpers/failover-matches.ts index 1770a58a989..405a31a81e8 100644 --- a/src/agents/pi-embedded-helpers/failover-matches.ts +++ b/src/agents/pi-embedded-helpers/failover-matches.ts @@ -25,6 +25,19 @@ const ERROR_PATTERNS = { /service[_ ]unavailable.*(?:overload|capacity|high[_ ]demand)|(?:overload|capacity|high[_ ]demand).*service[_ ]unavailable/i, "high demand", ], + serverError: [ + "an error occurred while processing", + "internal server error", + "internal_error", + "server_error", + "service temporarily unavailable", + "service_unavailable", + "bad gateway", + "gateway timeout", + "upstream error", + "upstream connect error", + "connection reset", + ], timeout: [ "timeout", "timed out", @@ -169,3 +182,7 @@ export function isAuthErrorMessage(raw: string): boolean { export function isOverloadedErrorMessage(raw: string): boolean { return matchesErrorPatterns(raw, ERROR_PATTERNS.overloaded); } + +export function isServerErrorMessage(raw: string): boolean { + return matchesErrorPatterns(raw, ERROR_PATTERNS.serverError); +} diff --git a/src/agents/pi-embedded-runner/run.codex-server-error-fallback.test.ts b/src/agents/pi-embedded-runner/run.codex-server-error-fallback.test.ts new file mode 100644 index 00000000000..7c4e0d58670 --- /dev/null +++ b/src/agents/pi-embedded-runner/run.codex-server-error-fallback.test.ts @@ -0,0 +1,70 @@ +import { beforeAll, beforeEach, describe, expect, it } from "vitest"; +import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js"; +import { makeAttemptResult } from "./run.overflow-compaction.fixture.js"; +import { + loadRunOverflowCompactionHarness, + MockedFailoverError, + mockedClassifyFailoverReason, + mockedFormatAssistantErrorText, + mockedGlobalHookRunner, + mockedIsFailoverAssistantError, + mockedRunEmbeddedAttempt, + overflowBaseRunParams, + resetRunOverflowCompactionHarnessMocks, +} from "./run.overflow-compaction.harness.js"; +import type { EmbeddedRunAttemptResult } from "./run/types.js"; + +let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent; + +describe("runEmbeddedPiAgent Codex server_error fallback handoff", () => { + beforeAll(async () => { + ({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness()); + }); + + beforeEach(() => { + resetRunOverflowCompactionHarnessMocks(); + mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); + }); + + it("throws FailoverError for Codex server_error when model fallbacks are configured", async () => { + const rawCodexError = + 'Codex error: {"type":"error","error":{"type":"server_error","code":"server_error","message":"An error occurred while processing your request."},"sequence_number":2}'; + + mockedClassifyFailoverReason.mockReturnValue("timeout"); + mockedIsFailoverAssistantError.mockReturnValue(true); + mockedFormatAssistantErrorText.mockReturnValue( + "LLM error server_error: An error occurred while processing your request.", + ); + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: [], + lastAssistant: { + stopReason: "error", + errorMessage: rawCodexError, + provider: "openai-codex", + model: "gpt-5.4", + } as EmbeddedRunAttemptResult["lastAssistant"], + }), + ); + + const promise = runEmbeddedPiAgent({ + ...overflowBaseRunParams, + runId: "run-codex-server-error-fallback", + config: makeModelFallbackCfg({ + agents: { + defaults: { + model: { + primary: "openai-codex/gpt-5.4", + fallbacks: ["anthropic/claude-opus-4-6"], + }, + }, + }, + }), + }); + + await expect(promise).rejects.toBeInstanceOf(MockedFailoverError); + await expect(promise).rejects.toThrow( + "LLM error server_error: An error occurred while processing your request.", + ); + }); +}); diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts index ac12d7d89c7..001fe728330 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts @@ -6,6 +6,7 @@ import type { PluginHookBeforeModelResolveResult, PluginHookBeforePromptBuildResult, } from "../../plugins/types.js"; +import type { FailoverReason } from "../pi-embedded-helpers/types.js"; import type { EmbeddedRunAttemptResult } from "./run/types.js"; type MockCompactionResult = @@ -96,6 +97,13 @@ type MockTruncateOversizedToolResultsResult = { reason?: string; }; +export class MockedFailoverError extends Error { + constructor(message: string) { + super(message); + this.name = "FailoverError"; + } +} + export const mockedCoerceToFailoverError = vi.fn(); export const mockedDescribeFailoverError = vi.fn( (err: unknown): MockFailoverErrorDescription => ({ @@ -121,12 +129,20 @@ export const mockedLog: { isEnabled: vi.fn(() => false), }; -export const mockedClassifyFailoverReason = vi.fn(() => null); +export const mockedFormatBillingErrorMessage = vi.fn(() => ""); +export const mockedClassifyFailoverReason = vi.fn<(raw: string) => FailoverReason | null>( + () => null, +); export const mockedExtractObservedOverflowTokenCount = vi.fn((msg?: string) => { const match = msg?.match(/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i); return match?.[1] ? Number(match[1].replaceAll(",", "")) : undefined; }); +export const mockedFormatAssistantErrorText = vi.fn(() => ""); +export const mockedIsAuthAssistantError = vi.fn(() => false); +export const mockedIsBillingAssistantError = vi.fn(() => false); export const mockedIsCompactionFailureError = vi.fn(() => false); +export const mockedIsFailoverAssistantError = vi.fn(() => false); +export const mockedIsFailoverErrorMessage = vi.fn(() => false); export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => { const lower = (msg ?? "").toLowerCase(); return ( @@ -135,6 +151,10 @@ export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => { lower.includes("prompt is too long") ); }); +export const mockedParseImageSizeError = vi.fn(() => null); +export const mockedParseImageDimensionError = vi.fn(() => null); +export const mockedIsRateLimitAssistantError = vi.fn(() => false); +export const mockedIsTimeoutErrorMessage = vi.fn(() => false); export const mockedPickFallbackThinkingLevel = vi.fn<(params?: unknown) => ThinkLevel | null>( () => null, ); @@ -228,6 +248,14 @@ export function resetRunOverflowCompactionHarnessMocks(): void { mockedClassifyFailoverReason.mockReset(); mockedClassifyFailoverReason.mockReturnValue(null); + mockedFormatBillingErrorMessage.mockReset(); + mockedFormatBillingErrorMessage.mockReturnValue(""); + mockedFormatAssistantErrorText.mockReset(); + mockedFormatAssistantErrorText.mockReturnValue(""); + mockedIsAuthAssistantError.mockReset(); + mockedIsAuthAssistantError.mockReturnValue(false); + mockedIsBillingAssistantError.mockReset(); + mockedIsBillingAssistantError.mockReturnValue(false); mockedExtractObservedOverflowTokenCount.mockReset(); mockedExtractObservedOverflowTokenCount.mockImplementation((msg?: string) => { const match = msg?.match(/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i); @@ -235,6 +263,10 @@ export function resetRunOverflowCompactionHarnessMocks(): void { }); mockedIsCompactionFailureError.mockReset(); mockedIsCompactionFailureError.mockReturnValue(false); + mockedIsFailoverAssistantError.mockReset(); + mockedIsFailoverAssistantError.mockReturnValue(false); + mockedIsFailoverErrorMessage.mockReset(); + mockedIsFailoverErrorMessage.mockReturnValue(false); mockedIsLikelyContextOverflowError.mockReset(); mockedIsLikelyContextOverflowError.mockImplementation((msg?: string) => { const lower = (msg ?? "").toLowerCase(); @@ -244,6 +276,14 @@ export function resetRunOverflowCompactionHarnessMocks(): void { lower.includes("prompt is too long") ); }); + mockedParseImageSizeError.mockReset(); + mockedParseImageSizeError.mockReturnValue(null); + mockedParseImageDimensionError.mockReset(); + mockedParseImageDimensionError.mockReturnValue(null); + mockedIsRateLimitAssistantError.mockReset(); + mockedIsRateLimitAssistantError.mockReturnValue(false); + mockedIsTimeoutErrorMessage.mockReset(); + mockedIsTimeoutErrorMessage.mockReturnValue(false); mockedPickFallbackThinkingLevel.mockReset(); mockedPickFallbackThinkingLevel.mockReturnValue(null); mockedEvaluateContextWindowGuard.mockReset(); @@ -330,20 +370,20 @@ export async function loadRunOverflowCompactionHarness(): Promise<{ })); vi.doMock("../pi-embedded-helpers.js", () => ({ - formatBillingErrorMessage: vi.fn(() => ""), + formatBillingErrorMessage: mockedFormatBillingErrorMessage, classifyFailoverReason: mockedClassifyFailoverReason, extractObservedOverflowTokenCount: mockedExtractObservedOverflowTokenCount, - formatAssistantErrorText: vi.fn(() => ""), - isAuthAssistantError: vi.fn(() => false), - isBillingAssistantError: vi.fn(() => false), + formatAssistantErrorText: mockedFormatAssistantErrorText, + isAuthAssistantError: mockedIsAuthAssistantError, + isBillingAssistantError: mockedIsBillingAssistantError, isCompactionFailureError: mockedIsCompactionFailureError, isLikelyContextOverflowError: mockedIsLikelyContextOverflowError, - isFailoverAssistantError: vi.fn(() => false), - isFailoverErrorMessage: vi.fn(() => false), - parseImageSizeError: vi.fn(() => null), - parseImageDimensionError: vi.fn(() => null), - isRateLimitAssistantError: vi.fn(() => false), - isTimeoutErrorMessage: vi.fn(() => false), + isFailoverAssistantError: mockedIsFailoverAssistantError, + isFailoverErrorMessage: mockedIsFailoverErrorMessage, + parseImageSizeError: mockedParseImageSizeError, + parseImageDimensionError: mockedParseImageDimensionError, + isRateLimitAssistantError: mockedIsRateLimitAssistantError, + isTimeoutErrorMessage: mockedIsTimeoutErrorMessage, pickFallbackThinkingLevel: mockedPickFallbackThinkingLevel, })); @@ -408,7 +448,7 @@ export async function loadRunOverflowCompactionHarness(): Promise<{ })); vi.doMock("../failover-error.js", () => ({ - FailoverError: class extends Error {}, + FailoverError: MockedFailoverError, coerceToFailoverError: mockedCoerceToFailoverError, describeFailoverError: mockedDescribeFailoverError, resolveFailoverStatus: mockedResolveFailoverStatus, diff --git a/src/shared/assistant-error-format.ts b/src/shared/assistant-error-format.ts index b07d5b2ac53..35bd82e326d 100644 --- a/src/shared/assistant-error-format.ts +++ b/src/shared/assistant-error-format.ts @@ -1,5 +1,5 @@ const ERROR_PAYLOAD_PREFIX_RE = - /^(?:error|(?:[a-z][\w-]*\s+)?api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error)(?:\s+\d{3})?[:\s-]+/i; + /^(?:error|(?:[a-z][\w-]*\s+)?api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error|codex\s*error)(?:\s+\d{3})?[:\s-]+/i; const HTTP_STATUS_PREFIX_RE = /^(?:http\s*)?(\d{3})\s+(.+)$/i; const HTTP_STATUS_CODE_PREFIX_RE = /^(?:http\s*)?(\d{3})(?:\s+([\s\S]+))?$/i; const HTML_ERROR_PREFIX_RE = /^\s*(?: 600 ? `${trimmed.slice(0, 600)}…` : trimmed; diff --git a/src/tui/tui-formatters.test.ts b/src/tui/tui-formatters.test.ts index 3ceb0c56570..791666ea4e4 100644 --- a/src/tui/tui-formatters.test.ts +++ b/src/tui/tui-formatters.test.ts @@ -19,7 +19,7 @@ describe("extractTextFromMessage", () => { expect(text).toContain("HTTP 429"); expect(text).toContain("rate_limit_error"); - expect(text).toContain("req_123"); + expect(text).toContain("This request would exceed your account's rate limit."); }); it("falls back to a generic message when errorMessage is missing", () => {