diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index 8c49df40acb..e367461ea31 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -72,9 +72,16 @@ function getStatusCode(err: unknown): number | undefined { if (!err || typeof err !== "object") { return undefined; } + // Dig into nested `err.error` shapes (e.g. Google Vertex abort wrappers) + const nestedError = + "error" in err && err.error && typeof err.error === "object" + ? (err.error as { status?: unknown; code?: unknown }) + : undefined; const candidate = (err as { status?: unknown; statusCode?: unknown }).status ?? - (err as { statusCode?: unknown }).statusCode; + (err as { statusCode?: unknown }).statusCode ?? + nestedError?.code ?? + nestedError?.status; if (typeof candidate === "number") { return candidate; } @@ -88,7 +95,11 @@ function getErrorCode(err: unknown): string | undefined { if (!err || typeof err !== "object") { return undefined; } - const candidate = (err as { code?: unknown }).code; + const nestedError = + "error" in err && err.error && typeof err.error === "object" + ? (err.error as { code?: unknown; status?: unknown }) + : undefined; + const candidate = (err as { code?: unknown }).code ?? nestedError?.status ?? nestedError?.code; if (typeof candidate !== "string") { return undefined; } @@ -114,10 +125,53 @@ function getErrorMessage(err: unknown): string { if (typeof message === "string") { return message; } + // Extract message from nested `err.error.message` (e.g. Google Vertex wrappers) + const nestedMessage = + "error" in err && + err.error && + typeof err.error === "object" && + typeof (err.error as { message?: unknown }).message === "string" + ? ((err.error as { message: string }).message ?? "") + : ""; + if (nestedMessage) { + return nestedMessage; + } } return ""; } +function getErrorCause(err: unknown): unknown { + if (!err || typeof err !== "object" || !("cause" in err)) { + return undefined; + } + return (err as { cause?: unknown }).cause; +} + +/** Classify rate-limit / overloaded from symbolic error codes like RESOURCE_EXHAUSTED. */ +function classifyFailoverReasonFromSymbolicCode(raw: string | undefined): FailoverReason | null { + const normalized = raw?.trim().toUpperCase(); + if (!normalized) { + return null; + } + switch (normalized) { + case "RESOURCE_EXHAUSTED": + case "RATE_LIMIT": + case "RATE_LIMITED": + case "RATE_LIMIT_EXCEEDED": + case "TOO_MANY_REQUESTS": + case "THROTTLED": + case "THROTTLING": + case "THROTTLINGEXCEPTION": + case "THROTTLING_EXCEPTION": + return "rate_limit"; + case "OVERLOADED": + case "OVERLOADED_ERROR": + return "overloaded"; + default: + return null; + } +} + function hasTimeoutHint(err: unknown): boolean { if (!err) { return false; @@ -160,6 +214,12 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n return statusReason; } + // Check symbolic error codes (e.g. RESOURCE_EXHAUSTED from Google APIs) + const symbolicCodeReason = classifyFailoverReasonFromSymbolicCode(getErrorCode(err)); + if (symbolicCodeReason) { + return symbolicCodeReason; + } + const code = (getErrorCode(err) ?? "").toUpperCase(); if ( [ @@ -181,6 +241,14 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n if (isTimeoutError(err)) { return "timeout"; } + // Walk into error cause chain (e.g. AbortError wrapping a rate-limit cause) + const cause = getErrorCause(err); + if (cause && cause !== err) { + const causeReason = resolveFailoverReasonFromError(cause); + if (causeReason) { + return causeReason; + } + } if (!message) { return null; } diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index 3969416cd38..4795bdb4c65 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -331,6 +331,76 @@ describe("runWithModelFallback – probe logic", () => { }); }); + it("keeps walking remaining fallbacks after an abort-wrapped RESOURCE_EXHAUSTED probe failure", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "google/gemini-3-flash-preview", + fallbacks: ["anthropic/claude-haiku-3-5", "deepseek/deepseek-chat"], + }, + }, + }, + } as Partial); + + mockedResolveAuthProfileOrder.mockImplementation(({ provider }: { provider: string }) => { + if (provider === "google") { + return ["google-profile-1"]; + } + if (provider === "anthropic") { + return ["anthropic-profile-1"]; + } + if (provider === "deepseek") { + return ["deepseek-profile-1"]; + } + return []; + }); + mockedIsProfileInCooldown.mockImplementation((_store, profileId: string) => + profileId.startsWith("google"), + ); + mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 30 * 1000); + mockedResolveProfilesUnavailableReason.mockReturnValue("rate_limit"); + + // Simulate Google Vertex abort-wrapped RESOURCE_EXHAUSTED (the shape that was + // previously swallowed by shouldRethrowAbort before the fallback loop could continue) + const primaryAbort = Object.assign(new Error("request aborted"), { + name: "AbortError", + cause: { + error: { + code: 429, + message: "Resource has been exhausted (e.g. check quota).", + status: "RESOURCE_EXHAUSTED", + }, + }, + }); + const run = vi + .fn() + .mockRejectedValueOnce(primaryAbort) + .mockRejectedValueOnce( + Object.assign(new Error("fallback still rate limited"), { status: 429 }), + ) + .mockRejectedValueOnce( + Object.assign(new Error("final fallback still rate limited"), { status: 429 }), + ); + + await expect( + runWithModelFallback({ + cfg, + provider: "google", + model: "gemini-3-flash-preview", + run, + }), + ).rejects.toThrow(/All models failed \(3\)/); + + // All three candidates must be attempted — the abort must not short-circuit + expect(run).toHaveBeenCalledTimes(3); + expect(run).toHaveBeenNthCalledWith(1, "google", "gemini-3-flash-preview", { + allowTransientCooldownProbe: true, + }); + expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5"); + expect(run).toHaveBeenNthCalledWith(3, "deepseek", "deepseek-chat"); + }); + it("throttles probe when called within 30s interval", async () => { const cfg = makeCfg(); // Cooldown just about to expire (within probe margin) diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index d14ede7658b..5fd6e533a1a 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -140,10 +140,16 @@ async function runFallbackCandidate(params: { result, }; } catch (err) { - if (shouldRethrowAbort(err)) { + // Normalize abort-wrapped rate-limit errors (e.g. Google Vertex RESOURCE_EXHAUSTED) + // so they become FailoverErrors and continue the fallback loop instead of aborting. + const normalizedFailover = coerceToFailoverError(err, { + provider: params.provider, + model: params.model, + }); + if (shouldRethrowAbort(err) && !normalizedFailover) { throw err; } - return { ok: false, error: err }; + return { ok: false, error: normalizedFailover ?? err }; } } diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 1839a9df1bb..4ca6c0ea226 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -28,7 +28,12 @@ import { resolveContextWindowInfo, } from "../context-window-guard.js"; import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js"; -import { FailoverError, resolveFailoverStatus } from "../failover-error.js"; +import { + coerceToFailoverError, + describeFailoverError, + FailoverError, + resolveFailoverStatus, +} from "../failover-error.js"; import { applyLocalNoAuthHeaderOverride, ensureAuthProfileStore, @@ -1217,7 +1222,17 @@ export async function runEmbeddedPiAgent( } if (promptError && !aborted) { - const errorText = describeUnknownError(promptError); + // Normalize wrapped errors (e.g. abort-wrapped RESOURCE_EXHAUSTED) into + // FailoverError so rate-limit classification works even for nested shapes. + const normalizedPromptFailover = coerceToFailoverError(promptError, { + provider: activeErrorContext.provider, + model: activeErrorContext.model, + profileId: lastProfileId, + }); + const promptErrorDetails = normalizedPromptFailover + ? describeFailoverError(normalizedPromptFailover) + : describeFailoverError(promptError); + const errorText = promptErrorDetails.message || describeUnknownError(promptError); if (await maybeRefreshCopilotForAuthError(errorText, copilotAuthRetry)) { authRetryPending = true; continue; @@ -1281,14 +1296,16 @@ export async function runEmbeddedPiAgent( }, }; } - const promptFailoverReason = classifyFailoverReason(errorText); + const promptFailoverReason = + promptErrorDetails.reason ?? classifyFailoverReason(errorText); const promptProfileFailureReason = resolveAuthProfileFailureReason(promptFailoverReason); await maybeMarkAuthProfileFailure({ profileId: lastProfileId, reason: promptProfileFailureReason, }); - const promptFailoverFailure = isFailoverErrorMessage(errorText); + const promptFailoverFailure = + promptFailoverReason !== null || isFailoverErrorMessage(errorText); // Capture the failing profile before auth-profile rotation mutates `lastProfileId`. const failedPromptProfileId = lastProfileId; const logPromptFailoverDecision = createFailoverDecisionLogger({ @@ -1330,13 +1347,16 @@ export async function runEmbeddedPiAgent( const status = resolveFailoverStatus(promptFailoverReason ?? "unknown"); logPromptFailoverDecision("fallback_model", { status }); await maybeBackoffBeforeOverloadFailover(promptFailoverReason); - throw new FailoverError(errorText, { - reason: promptFailoverReason ?? "unknown", - provider, - model: modelId, - profileId: lastProfileId, - status, - }); + throw ( + normalizedPromptFailover ?? + new FailoverError(errorText, { + reason: promptFailoverReason ?? "unknown", + provider, + model: modelId, + profileId: lastProfileId, + status: resolveFailoverStatus(promptFailoverReason ?? "unknown"), + }) + ); } if (promptFailoverFailure || promptFailoverReason) { logPromptFailoverDecision("surface_error");