fix(agents): fail over and sanitize Codex server_error payloads (#42892)

Merged via squash.

Prepared head SHA: 6db9a5f02d
Co-authored-by: xaeon2026 <264572156+xaeon2026@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
This commit is contained in:
助爪 2026-03-27 09:51:39 -04:00 committed by GitHub
parent 546a1aad98
commit b1c982bb2d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 241 additions and 18 deletions

View File

@ -84,6 +84,7 @@ Docs: https://docs.openclaw.ai
- TUI/chat log: keep system messages as single logical entries and prune overflow at whole-message boundaries so wrapped system spacing stays intact. (#55732) Thanks @shakkernerd.
- TUI/activation: validate `/activation` arguments in the TUI and reject invalid values instead of silently coercing them to `mention`. (#55733) Thanks @shakkernerd.
- Agents/model switching: apply `/model` changes to active embedded runs at the next safe retry boundary, so overloaded or retrying turns switch to the newly selected model instead of staying pinned to the old provider.
- Agents/Codex fallback: classify Codex `server_error` payloads as failoverable, sanitize `Codex error:` payloads before they reach chat, preserve context-overflow guidance for prefixed `invalid_request_error` payloads, and omit provider `request_id` values from user-facing UI copy. (#42892) Thanks @xaeon2026.
## 2026.3.24

View File

@ -82,6 +82,12 @@ describe("formatAssistantErrorText", () => {
);
expect(formatAssistantErrorText(msg)).toBe("LLM error server_error: Something exploded");
});
it("sanitizes Codex error-prefixed JSON payloads", () => {
const msg = makeAssistantError(
'Codex error: {"type":"error","error":{"message":"Something exploded","type":"server_error"},"sequence_number":2}',
);
expect(formatAssistantErrorText(msg)).toBe("LLM error server_error: Something exploded");
});
it("returns a friendly billing message for credit balance errors", () => {
const msg = makeAssistantError("Your credit balance is too low to access the Anthropic API.");
const result = formatAssistantErrorText(msg);
@ -209,7 +215,7 @@ describe("formatRawAssistantErrorForUi", () => {
expect(text).toContain("HTTP 429");
expect(text).toContain("rate_limit_error");
expect(text).toContain("Rate limited.");
expect(text).toContain("req_123");
expect(text).not.toContain("req_123");
});
it("renders a generic unknown error message when raw is empty", () => {

View File

@ -794,6 +794,11 @@ describe("classifyFailoverReason", () => {
"521 <!DOCTYPE html><html><head><title>Web server is down</title></head><body>Cloudflare</body></html>",
),
).toBe("timeout");
expect(
classifyFailoverReason(
'Codex error: {"type":"error","error":{"type":"server_error","code":"server_error","message":"An error occurred while processing your request."},"sequence_number":2}',
),
).toBe("timeout");
expect(classifyFailoverReason("string should match pattern")).toBe("format");
expect(classifyFailoverReason("bad request")).toBeNull();
expect(

View File

@ -82,6 +82,46 @@ describe("sanitizeUserFacingText", () => {
);
});
it("does not rewrite unprefixed raw API payloads without explicit errorContext", () => {
const raw = '{"type":"error","error":{"type":"server_error","message":"Something exploded"}}';
expect(sanitizeUserFacingText(raw)).toBe(raw);
});
it("sanitizes Codex error-prefixed API payloads", () => {
const raw =
'Codex error: {"type":"error","error":{"type":"server_error","message":"Something exploded"},"sequence_number":2}';
expect(sanitizeUserFacingText(raw, { errorContext: true })).toBe(
"LLM error server_error: Something exploded",
);
});
it("sanitizes Codex error-prefixed API payloads without explicit errorContext", () => {
const raw =
'Codex error: {"type":"error","error":{"type":"server_error","message":"Something exploded"},"sequence_number":2}';
expect(sanitizeUserFacingText(raw)).toBe("LLM error server_error: Something exploded");
});
it("keeps regular JSON examples intact without explicit errorContext", () => {
const raw = '{"error":{"type":"validation_error","message":"showing an example payload"}}';
expect(sanitizeUserFacingText(raw)).toBe(raw);
});
it("preserves specialized context overflow guidance for raw API payloads", () => {
const raw =
'{"type":"error","error":{"type":"invalid_request_error","message":"Request size exceeds model context window"}}';
expect(sanitizeUserFacingText(raw, { errorContext: true })).toContain(
"Context overflow: prompt too large for the model.",
);
});
it("preserves specialized context overflow guidance for Codex-prefixed API payloads", () => {
const raw =
'Codex error: {"type":"error","error":{"type":"invalid_request_error","message":"Request size exceeds model context window"}}';
expect(sanitizeUserFacingText(raw, { errorContext: true })).toContain(
"Context overflow: prompt too large for the model.",
);
});
it("returns a friendly message for rate limit errors in Error: prefixed payloads", () => {
expect(sanitizeUserFacingText("Error: 429 Rate limit exceeded", { errorContext: true })).toBe(
"⚠️ API rate limit reached. Please try again later.",

View File

@ -23,6 +23,7 @@ import {
isOverloadedErrorMessage,
isPeriodicUsageLimitErrorMessage,
isRateLimitErrorMessage,
isServerErrorMessage,
isTimeoutErrorMessage,
matchesFormatErrorPattern,
} from "./failover-matches.js";
@ -34,6 +35,7 @@ export {
isBillingErrorMessage,
isOverloadedErrorMessage,
isRateLimitErrorMessage,
isServerErrorMessage,
isTimeoutErrorMessage,
} from "./failover-matches.js";
@ -320,7 +322,7 @@ export function extractObservedOverflowTokenCount(errorMessage?: string): number
const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/gi;
const ERROR_PREFIX_RE =
/^(?:error|(?:[a-z][\w-]*\s+)?api\s*error|openai\s*error|anthropic\s*error|gateway\s*error|request failed|failed|exception)(?:\s+\d{3})?[:\s-]+/i;
/^(?:error|(?:[a-z][\w-]*\s+)?api\s*error|openai\s*error|anthropic\s*error|gateway\s*error|codex\s*error|request failed|failed|exception)(?:\s+\d{3})?[:\s-]+/i;
const CONTEXT_OVERFLOW_ERROR_HEAD_RE =
/^(?:context overflow:|request_too_large\b|request size exceeds\b|request exceeds the maximum size\b|context length exceeded\b|maximum context length\b|prompt is too long\b|exceeds model context window\b)/i;
const TRANSIENT_HTTP_ERROR_CODES = new Set([499, 500, 502, 503, 504, 521, 522, 523, 524, 529]);
@ -589,6 +591,40 @@ export function isRawApiErrorPayload(raw?: string): boolean {
return getApiErrorPayloadFingerprint(raw) !== null;
}
function isLikelyProviderErrorType(type?: string): boolean {
const normalized = type?.trim().toLowerCase();
if (!normalized) {
return false;
}
return normalized.endsWith("_error");
}
const NON_ERROR_PROVIDER_PAYLOAD_MAX_LENGTH = 16_384;
const NON_ERROR_PROVIDER_PAYLOAD_PREFIX_RE = /^codex\s*error(?:\s+\d{3})?[:\s-]+/i;
function shouldRewriteRawPayloadWithoutErrorContext(raw: string): boolean {
if (raw.length > NON_ERROR_PROVIDER_PAYLOAD_MAX_LENGTH) {
return false;
}
if (!NON_ERROR_PROVIDER_PAYLOAD_PREFIX_RE.test(raw)) {
return false;
}
const info = parseApiErrorInfo(raw);
if (!info) {
return false;
}
if (isLikelyProviderErrorType(info.type)) {
return true;
}
if (info.httpCode) {
const parsedCode = Number(info.httpCode);
if (Number.isFinite(parsedCode) && parsedCode >= 400) {
return true;
}
}
return false;
}
export function formatAssistantErrorText(
msg: AssistantMessage,
opts?: { cfg?: OpenClawConfig; sessionKey?: string; provider?: string; model?: string },
@ -695,6 +731,12 @@ export function sanitizeUserFacingText(text: string, opts?: { errorContext?: boo
return "";
}
// Provider error payloads should not leak directly into user-visible text even
// when a stream chunk was not explicitly flagged as an error.
if (!errorContext && shouldRewriteRawPayloadWithoutErrorContext(trimmed)) {
return formatRawAssistantErrorForUi(trimmed);
}
// Only apply error-pattern rewrites when the caller knows this text is an error payload.
// Otherwise we risk swallowing legitimate assistant text that merely *mentions* these errors.
if (errorContext) {
@ -965,6 +1007,9 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
if (isAuthErrorMessage(raw)) {
return "auth";
}
if (isServerErrorMessage(raw)) {
return "timeout";
}
if (isJsonApiInternalServerError(raw)) {
return "timeout";
}

View File

@ -25,6 +25,19 @@ const ERROR_PATTERNS = {
/service[_ ]unavailable.*(?:overload|capacity|high[_ ]demand)|(?:overload|capacity|high[_ ]demand).*service[_ ]unavailable/i,
"high demand",
],
serverError: [
"an error occurred while processing",
"internal server error",
"internal_error",
"server_error",
"service temporarily unavailable",
"service_unavailable",
"bad gateway",
"gateway timeout",
"upstream error",
"upstream connect error",
"connection reset",
],
timeout: [
"timeout",
"timed out",
@ -169,3 +182,7 @@ export function isAuthErrorMessage(raw: string): boolean {
export function isOverloadedErrorMessage(raw: string): boolean {
return matchesErrorPatterns(raw, ERROR_PATTERNS.overloaded);
}
export function isServerErrorMessage(raw: string): boolean {
return matchesErrorPatterns(raw, ERROR_PATTERNS.serverError);
}

View File

@ -0,0 +1,70 @@
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js";
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
import {
loadRunOverflowCompactionHarness,
MockedFailoverError,
mockedClassifyFailoverReason,
mockedFormatAssistantErrorText,
mockedGlobalHookRunner,
mockedIsFailoverAssistantError,
mockedRunEmbeddedAttempt,
overflowBaseRunParams,
resetRunOverflowCompactionHarnessMocks,
} from "./run.overflow-compaction.harness.js";
import type { EmbeddedRunAttemptResult } from "./run/types.js";
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
describe("runEmbeddedPiAgent Codex server_error fallback handoff", () => {
beforeAll(async () => {
({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness());
});
beforeEach(() => {
resetRunOverflowCompactionHarnessMocks();
mockedGlobalHookRunner.hasHooks.mockImplementation(() => false);
});
it("throws FailoverError for Codex server_error when model fallbacks are configured", async () => {
const rawCodexError =
'Codex error: {"type":"error","error":{"type":"server_error","code":"server_error","message":"An error occurred while processing your request."},"sequence_number":2}';
mockedClassifyFailoverReason.mockReturnValue("timeout");
mockedIsFailoverAssistantError.mockReturnValue(true);
mockedFormatAssistantErrorText.mockReturnValue(
"LLM error server_error: An error occurred while processing your request.",
);
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
assistantTexts: [],
lastAssistant: {
stopReason: "error",
errorMessage: rawCodexError,
provider: "openai-codex",
model: "gpt-5.4",
} as EmbeddedRunAttemptResult["lastAssistant"],
}),
);
const promise = runEmbeddedPiAgent({
...overflowBaseRunParams,
runId: "run-codex-server-error-fallback",
config: makeModelFallbackCfg({
agents: {
defaults: {
model: {
primary: "openai-codex/gpt-5.4",
fallbacks: ["anthropic/claude-opus-4-6"],
},
},
},
}),
});
await expect(promise).rejects.toBeInstanceOf(MockedFailoverError);
await expect(promise).rejects.toThrow(
"LLM error server_error: An error occurred while processing your request.",
);
});
});

View File

@ -6,6 +6,7 @@ import type {
PluginHookBeforeModelResolveResult,
PluginHookBeforePromptBuildResult,
} from "../../plugins/types.js";
import type { FailoverReason } from "../pi-embedded-helpers/types.js";
import type { EmbeddedRunAttemptResult } from "./run/types.js";
type MockCompactionResult =
@ -96,6 +97,13 @@ type MockTruncateOversizedToolResultsResult = {
reason?: string;
};
export class MockedFailoverError extends Error {
constructor(message: string) {
super(message);
this.name = "FailoverError";
}
}
export const mockedCoerceToFailoverError = vi.fn<MockCoerceToFailoverError>();
export const mockedDescribeFailoverError = vi.fn<MockDescribeFailoverError>(
(err: unknown): MockFailoverErrorDescription => ({
@ -121,12 +129,20 @@ export const mockedLog: {
isEnabled: vi.fn(() => false),
};
export const mockedClassifyFailoverReason = vi.fn(() => null);
export const mockedFormatBillingErrorMessage = vi.fn(() => "");
export const mockedClassifyFailoverReason = vi.fn<(raw: string) => FailoverReason | null>(
() => null,
);
export const mockedExtractObservedOverflowTokenCount = vi.fn((msg?: string) => {
const match = msg?.match(/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i);
return match?.[1] ? Number(match[1].replaceAll(",", "")) : undefined;
});
export const mockedFormatAssistantErrorText = vi.fn(() => "");
export const mockedIsAuthAssistantError = vi.fn(() => false);
export const mockedIsBillingAssistantError = vi.fn(() => false);
export const mockedIsCompactionFailureError = vi.fn(() => false);
export const mockedIsFailoverAssistantError = vi.fn(() => false);
export const mockedIsFailoverErrorMessage = vi.fn(() => false);
export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => {
const lower = (msg ?? "").toLowerCase();
return (
@ -135,6 +151,10 @@ export const mockedIsLikelyContextOverflowError = vi.fn((msg?: string) => {
lower.includes("prompt is too long")
);
});
export const mockedParseImageSizeError = vi.fn(() => null);
export const mockedParseImageDimensionError = vi.fn(() => null);
export const mockedIsRateLimitAssistantError = vi.fn(() => false);
export const mockedIsTimeoutErrorMessage = vi.fn(() => false);
export const mockedPickFallbackThinkingLevel = vi.fn<(params?: unknown) => ThinkLevel | null>(
() => null,
);
@ -228,6 +248,14 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
mockedClassifyFailoverReason.mockReset();
mockedClassifyFailoverReason.mockReturnValue(null);
mockedFormatBillingErrorMessage.mockReset();
mockedFormatBillingErrorMessage.mockReturnValue("");
mockedFormatAssistantErrorText.mockReset();
mockedFormatAssistantErrorText.mockReturnValue("");
mockedIsAuthAssistantError.mockReset();
mockedIsAuthAssistantError.mockReturnValue(false);
mockedIsBillingAssistantError.mockReset();
mockedIsBillingAssistantError.mockReturnValue(false);
mockedExtractObservedOverflowTokenCount.mockReset();
mockedExtractObservedOverflowTokenCount.mockImplementation((msg?: string) => {
const match = msg?.match(/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i);
@ -235,6 +263,10 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
});
mockedIsCompactionFailureError.mockReset();
mockedIsCompactionFailureError.mockReturnValue(false);
mockedIsFailoverAssistantError.mockReset();
mockedIsFailoverAssistantError.mockReturnValue(false);
mockedIsFailoverErrorMessage.mockReset();
mockedIsFailoverErrorMessage.mockReturnValue(false);
mockedIsLikelyContextOverflowError.mockReset();
mockedIsLikelyContextOverflowError.mockImplementation((msg?: string) => {
const lower = (msg ?? "").toLowerCase();
@ -244,6 +276,14 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
lower.includes("prompt is too long")
);
});
mockedParseImageSizeError.mockReset();
mockedParseImageSizeError.mockReturnValue(null);
mockedParseImageDimensionError.mockReset();
mockedParseImageDimensionError.mockReturnValue(null);
mockedIsRateLimitAssistantError.mockReset();
mockedIsRateLimitAssistantError.mockReturnValue(false);
mockedIsTimeoutErrorMessage.mockReset();
mockedIsTimeoutErrorMessage.mockReturnValue(false);
mockedPickFallbackThinkingLevel.mockReset();
mockedPickFallbackThinkingLevel.mockReturnValue(null);
mockedEvaluateContextWindowGuard.mockReset();
@ -330,20 +370,20 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
}));
vi.doMock("../pi-embedded-helpers.js", () => ({
formatBillingErrorMessage: vi.fn(() => ""),
formatBillingErrorMessage: mockedFormatBillingErrorMessage,
classifyFailoverReason: mockedClassifyFailoverReason,
extractObservedOverflowTokenCount: mockedExtractObservedOverflowTokenCount,
formatAssistantErrorText: vi.fn(() => ""),
isAuthAssistantError: vi.fn(() => false),
isBillingAssistantError: vi.fn(() => false),
formatAssistantErrorText: mockedFormatAssistantErrorText,
isAuthAssistantError: mockedIsAuthAssistantError,
isBillingAssistantError: mockedIsBillingAssistantError,
isCompactionFailureError: mockedIsCompactionFailureError,
isLikelyContextOverflowError: mockedIsLikelyContextOverflowError,
isFailoverAssistantError: vi.fn(() => false),
isFailoverErrorMessage: vi.fn(() => false),
parseImageSizeError: vi.fn(() => null),
parseImageDimensionError: vi.fn(() => null),
isRateLimitAssistantError: vi.fn(() => false),
isTimeoutErrorMessage: vi.fn(() => false),
isFailoverAssistantError: mockedIsFailoverAssistantError,
isFailoverErrorMessage: mockedIsFailoverErrorMessage,
parseImageSizeError: mockedParseImageSizeError,
parseImageDimensionError: mockedParseImageDimensionError,
isRateLimitAssistantError: mockedIsRateLimitAssistantError,
isTimeoutErrorMessage: mockedIsTimeoutErrorMessage,
pickFallbackThinkingLevel: mockedPickFallbackThinkingLevel,
}));
@ -408,7 +448,7 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
}));
vi.doMock("../failover-error.js", () => ({
FailoverError: class extends Error {},
FailoverError: MockedFailoverError,
coerceToFailoverError: mockedCoerceToFailoverError,
describeFailoverError: mockedDescribeFailoverError,
resolveFailoverStatus: mockedResolveFailoverStatus,

View File

@ -1,5 +1,5 @@
const ERROR_PAYLOAD_PREFIX_RE =
/^(?:error|(?:[a-z][\w-]*\s+)?api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error)(?:\s+\d{3})?[:\s-]+/i;
/^(?:error|(?:[a-z][\w-]*\s+)?api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error|codex\s*error)(?:\s+\d{3})?[:\s-]+/i;
const HTTP_STATUS_PREFIX_RE = /^(?:http\s*)?(\d{3})\s+(.+)$/i;
const HTTP_STATUS_CODE_PREFIX_RE = /^(?:http\s*)?(\d{3})(?:\s+([\s\S]+))?$/i;
const HTML_ERROR_PREFIX_RE = /^\s*(?:<!doctype\s+html\b|<html\b)/i;
@ -180,8 +180,7 @@ export function formatRawAssistantErrorForUi(raw?: string): string {
if (info?.message) {
const prefix = info.httpCode ? `HTTP ${info.httpCode}` : "LLM error";
const type = info.type ? ` ${info.type}` : "";
const requestId = info.requestId ? ` (request_id: ${info.requestId})` : "";
return `${prefix}${type}: ${info.message}${requestId}`;
return `${prefix}${type}: ${info.message}`;
}
return trimmed.length > 600 ? `${trimmed.slice(0, 600)}` : trimmed;

View File

@ -19,7 +19,7 @@ describe("extractTextFromMessage", () => {
expect(text).toContain("HTTP 429");
expect(text).toContain("rate_limit_error");
expect(text).toContain("req_123");
expect(text).toContain("This request would exceed your account's rate limit.");
});
it("falls back to a generic message when errorMessage is missing", () => {