diff --git a/CHANGELOG.md b/CHANGELOG.md index c00259b9ad7..3e1f53f26ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -105,6 +105,7 @@ Docs: https://docs.openclaw.ai - iOS/Live Activities: mark the `ActivityKit` import in `LiveActivityManager.swift` as `@preconcurrency` so Xcode 26.4 / Swift 6 builds stop failing on strict concurrency checks. (#57180) Thanks @ngutman. - LINE/ACP: add current-conversation binding and inbound binding-routing parity so `/acp spawn ... --thread here`, configured ACP bindings, and active conversation-bound ACP sessions work on LINE like the other conversation channels. - LINE/markdown: preserve underscores inside Latin, Cyrillic, and CJK words when stripping markdown, while still removing standalone `_italic_` markers on the shared text-runtime path used by LINE and TTS. (#47465) Thanks @jackjin1997. +- Agents/failover: make overloaded same-provider retry count and retry delay configurable via `auth.cooldowns`, default to one retry with no delay, and document the model-fallback behavior. ## 2026.3.31-beta.1 diff --git a/docs/.generated/config-baseline.json b/docs/.generated/config-baseline.json index 28755b819b4..1c7e2d5eb64 100644 --- a/docs/.generated/config-baseline.json +++ b/docs/.generated/config-baseline.json @@ -7855,6 +7855,39 @@ "help": "Failure window (hours) for backoff counters (default: 24).", "hasChildren": false }, + { + "path": "auth.cooldowns.overloadedBackoffMs", + "kind": "core", + "type": "integer", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "access", + "auth", + "reliability", + "storage" + ], + "label": "Overloaded Backoff (ms)", + "help": "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).", + "hasChildren": false + }, + { + "path": "auth.cooldowns.overloadedProfileRotations", + "kind": "core", + "type": "integer", + "required": false, + "deprecated": false, + "sensitive": false, + "tags": [ + "access", + "auth", + "storage" + ], + "label": "Overloaded Profile Rotations", + "help": "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).", + "hasChildren": false + }, { "path": "auth.order", "kind": "core", diff --git a/docs/.generated/config-baseline.jsonl b/docs/.generated/config-baseline.jsonl index d61e58934a0..8ca67f22976 100644 --- a/docs/.generated/config-baseline.jsonl +++ b/docs/.generated/config-baseline.jsonl @@ -1,4 +1,4 @@ -{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5718} +{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5720} {"recordType":"path","path":"acp","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"ACP","help":"ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.","hasChildren":true} {"recordType":"path","path":"acp.allowedAgents","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"ACP Allowed Agents","help":"Allowlist of ACP target agent ids permitted for ACP runtime sessions. Empty means no additional allowlist restriction.","hasChildren":true} {"recordType":"path","path":"acp.allowedAgents.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} @@ -697,6 +697,8 @@ {"recordType":"path","path":"auth.cooldowns.billingBackoffHoursByProvider.*","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} {"recordType":"path","path":"auth.cooldowns.billingMaxHours","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","performance"],"label":"Billing Backoff Cap (hours)","help":"Cap (hours) for billing backoff (default: 24).","hasChildren":false} {"recordType":"path","path":"auth.cooldowns.failureWindowHours","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Failover Window (hours)","help":"Failure window (hours) for backoff counters (default: 24).","hasChildren":false} +{"recordType":"path","path":"auth.cooldowns.overloadedBackoffMs","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","reliability","storage"],"label":"Overloaded Backoff (ms)","help":"Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).","hasChildren":false} +{"recordType":"path","path":"auth.cooldowns.overloadedProfileRotations","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","storage"],"label":"Overloaded Profile Rotations","help":"Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).","hasChildren":false} {"recordType":"path","path":"auth.order","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Auth Profile Order","help":"Ordered auth profile IDs per provider (used for automatic failover).","hasChildren":true} {"recordType":"path","path":"auth.order.*","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true} {"recordType":"path","path":"auth.order.*.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false} diff --git a/docs/concepts/model-failover.md b/docs/concepts/model-failover.md index 80592bcc2c9..b99803b10e6 100644 --- a/docs/concepts/model-failover.md +++ b/docs/concepts/model-failover.md @@ -129,6 +129,8 @@ Defaults: - Billing backoff starts at **5 hours**, doubles per billing failure, and caps at **24 hours**. - Backoff counters reset if the profile hasn’t failed for **24 hours** (configurable). +- Overloaded retries allow **1 same-provider profile rotation** before model fallback. +- Overloaded retries use **0 ms backoff** by default. ## Model fallback @@ -136,6 +138,11 @@ If all profiles for a provider fail, OpenClaw moves to the next model in `agents.defaults.model.fallbacks`. This applies to auth failures, rate limits, and timeouts that exhausted profile rotation (other errors do not advance fallback). +Overloaded errors are handled more aggressively than billing cooldowns. By default, +OpenClaw allows one same-provider auth-profile retry, then switches to the next +configured model fallback without waiting. Tune this with +`auth.cooldowns.overloadedProfileRotations` and `auth.cooldowns.overloadedBackoffMs`. + When a run starts with a model override (hooks or CLI), fallbacks still end at `agents.defaults.model.primary` after trying any configured fallbacks. @@ -146,6 +153,7 @@ See [Gateway configuration](/gateway/configuration) for: - `auth.profiles` / `auth.order` - `auth.cooldowns.billingBackoffHours` / `auth.cooldowns.billingBackoffHoursByProvider` - `auth.cooldowns.billingMaxHours` / `auth.cooldowns.failureWindowHours` +- `auth.cooldowns.overloadedProfileRotations` / `auth.cooldowns.overloadedBackoffMs` - `agents.defaults.model.primary` / `agents.defaults.model.fallbacks` - `agents.defaults.imageModel` routing diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index 24f086ecc3b..46ba51d6534 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -3029,6 +3029,8 @@ Notes: billingBackoffHoursByProvider: { anthropic: 3, openai: 8 }, billingMaxHours: 24, failureWindowHours: 24, + overloadedProfileRotations: 1, + overloadedBackoffMs: 0, }, }, } @@ -3038,6 +3040,8 @@ Notes: - `billingBackoffHoursByProvider`: optional per-provider overrides for billing backoff hours. - `billingMaxHours`: cap in hours for billing backoff exponential growth (default: `24`). - `failureWindowHours`: rolling window in hours used for backoff counters (default: `24`). +- `overloadedProfileRotations`: maximum same-provider auth-profile rotations for overloaded errors before switching to model fallback (default: `1`). +- `overloadedBackoffMs`: fixed delay before retrying an overloaded provider/profile rotation (default: `0`). --- diff --git a/src/agents/model-fallback.run-embedded.e2e.test.ts b/src/agents/model-fallback.run-embedded.e2e.test.ts index 41d5d8ef5a7..4232ab75cf4 100644 --- a/src/agents/model-fallback.run-embedded.e2e.test.ts +++ b/src/agents/model-fallback.run-embedded.e2e.test.ts @@ -202,8 +202,9 @@ async function runEmbeddedFallback(params: { sessionKey: string; runId: string; abortSignal?: AbortSignal; + config?: OpenClawConfig; }) { - const cfg = makeConfig(); + const cfg = params.config ?? makeConfig(); return await runWithModelFallback({ cfg, provider: "openai", @@ -321,8 +322,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => { expect(typeof usageStats["groq:p1"]?.lastUsed).toBe("number"); expectOpenAiThenGroqAttemptOrder(); - expect(computeBackoffMock).toHaveBeenCalledTimes(1); - expect(sleepWithAbortMock).toHaveBeenCalledTimes(1); + expect(computeBackoffMock).not.toHaveBeenCalled(); + expect(sleepWithAbortMock).not.toHaveBeenCalled(); }); }); @@ -358,8 +359,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => { expect(usageStats["groq:p1"]?.disabledUntil).toBeUndefined(); expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2); - expect(computeBackoffMock).toHaveBeenCalledTimes(2); - expect(sleepWithAbortMock).toHaveBeenCalledTimes(2); + expect(computeBackoffMock).not.toHaveBeenCalled(); + expect(sleepWithAbortMock).not.toHaveBeenCalled(); }); }); @@ -421,8 +422,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => { const usageStats = await readUsageStats(agentDir); expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number"); expect(usageStats["openai:p1"]?.failureCounts).toMatchObject({ overloaded: 2 }); - expect(computeBackoffMock).toHaveBeenCalledTimes(1); - expect(sleepWithAbortMock).toHaveBeenCalledTimes(1); + expect(computeBackoffMock).not.toHaveBeenCalled(); + expect(sleepWithAbortMock).not.toHaveBeenCalled(); }); }); @@ -466,6 +467,10 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => { sessionKey: "agent:test:overloaded-backoff-abort", runId: "run:overloaded-backoff-abort", abortSignal: controller.signal, + config: { + ...makeConfig(), + auth: { cooldowns: { overloadedBackoffMs: 321 } }, + }, }), ).rejects.toMatchObject({ name: "AbortError", @@ -483,7 +488,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => { it("caps overloaded profile rotations and escalates to cross-provider fallback (#58348)", async () => { // When a provider has multiple auth profiles and all return overloaded_error, // the runner should not exhaust all profiles before falling back. It should - // cap profile rotations at MAX_OVERLOAD_PROFILE_ROTATIONS (1) and escalate + // cap profile rotations at overloadedProfileRotations=1 and escalate // to cross-provider fallback immediately. await withAgentWorkspace(async ({ agentDir, workspaceDir }) => { // Write auth store with multiple profiles for openai @@ -549,7 +554,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => { expect(result.model).toBe("mock-2"); expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok"); - // With MAX_OVERLOAD_PROFILE_ROTATIONS=1, we expect: + // With overloadedProfileRotations=1, we expect: // - 1 initial openai attempt (p1) // - 1 rotation to p2 (capped) // - escalation to groq (1 attempt) @@ -564,4 +569,73 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => { expect(groqAttempts.length).toBe(1); }); }); + + it("respects overloadedProfileRotations=0 and falls back immediately", async () => { + await withAgentWorkspace(async ({ agentDir, workspaceDir }) => { + await fs.writeFile( + path.join(agentDir, "auth-profiles.json"), + JSON.stringify({ + version: 1, + profiles: { + "openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" }, + "openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" }, + "groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" }, + }, + usageStats: { + "openai:p1": { lastUsed: 1 }, + "openai:p2": { lastUsed: 2 }, + "groq:p1": { lastUsed: 3 }, + }, + }), + ); + + runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => { + const attemptParams = params as { provider: string }; + if (attemptParams.provider === "openai") { + return makeEmbeddedRunnerAttempt({ + assistantTexts: [], + lastAssistant: buildEmbeddedRunnerAssistant({ + provider: "openai", + model: "mock-1", + stopReason: "error", + errorMessage: OVERLOADED_ERROR_PAYLOAD, + }), + }); + } + if (attemptParams.provider === "groq") { + return makeEmbeddedRunnerAttempt({ + assistantTexts: ["fallback ok"], + lastAssistant: buildEmbeddedRunnerAssistant({ + provider: "groq", + model: "mock-2", + stopReason: "stop", + content: [{ type: "text", text: "fallback ok" }], + }), + }); + } + throw new Error(`Unexpected provider ${attemptParams.provider}`); + }); + + const result = await runEmbeddedFallback({ + agentDir, + workspaceDir, + sessionKey: "agent:test:overloaded-no-rotation", + runId: "run:overloaded-no-rotation", + config: { + ...makeConfig(), + auth: { cooldowns: { overloadedProfileRotations: 0 } }, + }, + }); + + expect(result.provider).toBe("groq"); + const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter( + (call) => (call[0] as { provider?: string })?.provider === "openai", + ); + const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter( + (call) => (call[0] as { provider?: string })?.provider === "groq", + ); + expect(openaiAttempts.length).toBe(1); + expect(groqAttempts.length).toBe(1); + }); + }); }); diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts index 573e5f73de1..58b91f1a1d7 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts @@ -58,22 +58,27 @@ const installRunEmbeddedMocks = () => { vi.doMock("./pi-embedded-runner/run/attempt.js", () => ({ runEmbeddedAttempt: (params: unknown) => runEmbeddedAttemptMock(params), })); - vi.doMock("../plugins/provider-runtime.js", () => ({ - prepareProviderRuntimeAuth: async (params: { - provider: string; - context: { apiKey: string }; - }) => { - if (params.provider !== "github-copilot") { - return undefined; - } - const token = await resolveCopilotApiTokenMock(params.context.apiKey); - return { - apiKey: token.token, - baseUrl: token.baseUrl, - expiresAt: token.expiresAt, - }; - }, - })); + vi.doMock("../plugins/provider-runtime.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + prepareProviderRuntimeAuth: async (params: { + provider: string; + context: { apiKey: string }; + }) => { + if (params.provider !== "github-copilot") { + return undefined; + } + const token = await resolveCopilotApiTokenMock(params.context.apiKey); + return { + apiKey: token.token, + baseUrl: token.baseUrl, + expiresAt: token.expiresAt, + }; + }, + resolveProviderCapabilitiesWithPlugin: vi.fn(() => undefined), + }; + }); vi.doMock("../infra/backoff.js", () => ({ computeBackoff: ( policy: { initialMs: number; maxMs: number; factor: number; jitter: number }, @@ -188,8 +193,26 @@ const makeAttempt = (overrides: Partial): EmbeddedRunA ...overrides, }); -const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): OpenClawConfig => +const makeConfig = (opts?: { + fallbacks?: string[]; + apiKey?: string; + overloadedBackoffMs?: number; + overloadedProfileRotations?: number; +}): OpenClawConfig => ({ + auth: + opts?.overloadedBackoffMs != null || opts?.overloadedProfileRotations != null + ? { + cooldowns: { + ...(opts?.overloadedBackoffMs != null + ? { overloadedBackoffMs: opts.overloadedBackoffMs } + : {}), + ...(opts?.overloadedProfileRotations != null + ? { overloadedProfileRotations: opts.overloadedProfileRotations } + : {}), + }, + } + : undefined, agents: { defaults: { model: { @@ -379,6 +402,7 @@ async function runAutoPinnedOpenAiTurn(params: { sessionKey: string; runId: string; authProfileId?: string; + config?: OpenClawConfig; }) { await runEmbeddedPiAgentInline({ sessionId: "session:test", @@ -386,7 +410,7 @@ async function runAutoPinnedOpenAiTurn(params: { sessionFile: path.join(params.workspaceDir, "session.jsonl"), workspaceDir: params.workspaceDir, agentDir: params.agentDir, - config: makeConfig(), + config: params.config ?? makeConfig(), prompt: "hello", provider: "openai", model: "mock-1", @@ -423,6 +447,7 @@ async function runAutoPinnedRotationCase(params: { errorMessage: string; sessionKey: string; runId: string; + config?: OpenClawConfig; }) { runEmbeddedAttemptMock.mockReset(); return withAgentWorkspace(async ({ agentDir, workspaceDir }) => { @@ -433,6 +458,7 @@ async function runAutoPinnedRotationCase(params: { workspaceDir, sessionKey: params.sessionKey, runId: params.runId, + config: params.config, }); expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2); @@ -445,6 +471,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: { errorMessage: string; sessionKey: string; runId: string; + config?: OpenClawConfig; }) { runEmbeddedAttemptMock.mockReset(); return withAgentWorkspace(async ({ agentDir, workspaceDir }) => { @@ -455,6 +482,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: { workspaceDir, sessionKey: params.sessionKey, runId: params.runId, + config: params.config, }); expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2); @@ -786,18 +814,8 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { }); expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number"); - expect(computeBackoffMock).toHaveBeenCalledTimes(1); - expect(computeBackoffMock).toHaveBeenCalledWith( - expect.objectContaining({ - initialMs: 250, - maxMs: 1500, - factor: 2, - jitter: 0.2, - }), - 1, - ); - expect(sleepWithAbortMock).toHaveBeenCalledTimes(1); - expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined); + expect(computeBackoffMock).not.toHaveBeenCalled(); + expect(sleepWithAbortMock).not.toHaveBeenCalled(); }); it("logs structured failover decision metadata for overloaded assistant rotation", async () => { @@ -863,16 +881,19 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { }); expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number"); - expect(computeBackoffMock).toHaveBeenCalledTimes(1); - expect(computeBackoffMock).toHaveBeenCalledWith( - expect.objectContaining({ - initialMs: 250, - maxMs: 1500, - factor: 2, - jitter: 0.2, - }), - 1, - ); + expect(computeBackoffMock).not.toHaveBeenCalled(); + expect(sleepWithAbortMock).not.toHaveBeenCalled(); + }); + + it("uses configured overload backoff before rotating profiles", async () => { + const { usageStats } = await runAutoPinnedRotationCase({ + errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}', + sessionKey: "agent:test:overloaded-configured-backoff", + runId: "run:overloaded-configured-backoff", + config: makeConfig({ overloadedBackoffMs: 321 }), + }); + expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); + expect(computeBackoffMock).not.toHaveBeenCalled(); expect(sleepWithAbortMock).toHaveBeenCalledTimes(1); expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 4d13eff49ac..debd19b5740 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -5,7 +5,7 @@ import { ensureContextEnginesInitialized, resolveContextEngine, } from "../../context-engine/index.js"; -import { computeBackoff, sleepWithAbort } from "../../infra/backoff.js"; +import { sleepWithAbort } from "../../infra/backoff.js"; import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js"; import { enqueueCommandInLane } from "../../process/command-queue.js"; import { sanitizeForLog } from "../../terminal/ansi.js"; @@ -76,10 +76,10 @@ import { buildErrorAgentMeta, buildUsageAgentMetaFields, createCompactionDiagId, - MAX_OVERLOAD_PROFILE_ROTATIONS, - OVERLOAD_FAILOVER_BACKOFF_POLICY, resolveActiveErrorContext, resolveMaxRunRetryIterations, + resolveOverloadFailoverBackoffMs, + resolveOverloadProfileRotationLimit, type RuntimeAuthState, scrubAnthropicRefusalMagic, } from "./run/helpers.js"; @@ -317,9 +317,10 @@ export async function runEmbeddedPiAgent( let lastRunPromptUsage: ReturnType | undefined; let autoCompactionCount = 0; let runLoopIterations = 0; - let overloadFailoverAttempts = 0; let overloadProfileRotations = 0; let timeoutCompactionAttempts = 0; + const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config); + const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config); const maybeMarkAuthProfileFailure = async (failure: { profileId?: string; reason?: AuthProfileFailureReason | null; @@ -352,16 +353,14 @@ export async function runEmbeddedPiAgent( return failoverReason; }; const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => { - if (reason !== "overloaded") { + if (reason !== "overloaded" || overloadFailoverBackoffMs <= 0) { return; } - overloadFailoverAttempts += 1; - const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts); log.warn( - `overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`, + `overload backoff before failover for ${provider}/${modelId}: delayMs=${overloadFailoverBackoffMs}`, ); try { - await sleepWithAbort(delayMs, params.abortSignal); + await sleepWithAbort(overloadFailoverBackoffMs, params.abortSignal); } catch (err) { if (params.abortSignal?.aborted) { const abortErr = new Error("Operation aborted", { cause: err }); @@ -1199,15 +1198,15 @@ export async function runEmbeddedPiAgent( } } - // For overloaded errors, check the rotation cap *before* calling - // advanceAuthProfile() to avoid a wasted auth-profile setup cycle. - // advanceAuthProfile() runs applyApiKeyInfo() which initialises the - // next profile — costly work that is pointless when we already know - // we will escalate to cross-provider fallback. + // For overloaded errors, check the configured rotation cap *before* + // calling advanceAuthProfile() to avoid a wasted auth-profile setup + // cycle. advanceAuthProfile() runs applyApiKeyInfo() which + // initializes the next profile — costly work that is pointless when + // we already know we will escalate to cross-provider fallback. // See: https://github.com/openclaw/openclaw/issues/58348 if (assistantFailoverReason === "overloaded") { overloadProfileRotations += 1; - if (overloadProfileRotations > MAX_OVERLOAD_PROFILE_ROTATIONS && fallbackConfigured) { + if (overloadProfileRotations > overloadProfileRotationLimit && fallbackConfigured) { const status = resolveFailoverStatus("overloaded"); log.warn( `overload profile rotation cap reached for ${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} after ${overloadProfileRotations} rotations; escalating to model fallback`, diff --git a/src/agents/pi-embedded-runner/run/helpers.ts b/src/agents/pi-embedded-runner/run/helpers.ts index 79d42db2388..a61b0254d13 100644 --- a/src/agents/pi-embedded-runner/run/helpers.ts +++ b/src/agents/pi-embedded-runner/run/helpers.ts @@ -1,4 +1,4 @@ -import { type BackoffPolicy } from "../../../infra/backoff.js"; +import type { OpenClawConfig } from "../../../config/config.js"; import { generateSecureToken } from "../../../infra/secure-random.js"; import { derivePromptTokens, normalizeUsage } from "../../usage.js"; import type { EmbeddedPiAgentMeta } from "../types.js"; @@ -25,22 +25,16 @@ export const RUNTIME_AUTH_REFRESH_MARGIN_MS = 5 * 60 * 1000; export const RUNTIME_AUTH_REFRESH_RETRY_MS = 60 * 1000; export const RUNTIME_AUTH_REFRESH_MIN_DELAY_MS = 5 * 1000; -// Keep overload pacing noticeable enough to avoid tight retry bursts, but short -// enough that fallback still feels responsive within a single turn. -export const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = { - initialMs: 250, - maxMs: 1_500, - factor: 2, - jitter: 0.2, -}; +export const DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS = 0; +export const DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS = 1; -// Maximum number of auth-profile rotations to attempt for overloaded errors -// before escalating to cross-provider fallback. Overloaded is a provider-level -// capacity issue — rotating auth profiles on the same provider is unlikely to -// help and wastes time with backoff delays. A cap of 1 allows one probe attempt -// (in case the overload was transient) before giving up on the provider. -// See: https://github.com/openclaw/openclaw/issues/58348 -export const MAX_OVERLOAD_PROFILE_ROTATIONS = 1; +export function resolveOverloadFailoverBackoffMs(cfg?: OpenClawConfig): number { + return cfg?.auth?.cooldowns?.overloadedBackoffMs ?? DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS; +} + +export function resolveOverloadProfileRotationLimit(cfg?: OpenClawConfig): number { + return cfg?.auth?.cooldowns?.overloadedProfileRotations ?? DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS; +} const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL"; const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)"; diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index ae2d649c8a7..35372243a54 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -789,6 +789,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA = { type: "number", exclusiveMinimum: 0, }, + overloadedProfileRotations: { + type: "integer", + minimum: 0, + maximum: 9007199254740991, + }, + overloadedBackoffMs: { + type: "integer", + minimum: 0, + maximum: 9007199254740991, + }, }, additionalProperties: false, }, @@ -13645,6 +13655,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA = { help: "Failure window (hours) for backoff counters (default: 24).", tags: ["auth", "access"], }, + "auth.cooldowns.overloadedProfileRotations": { + label: "Overloaded Profile Rotations", + help: "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).", + tags: ["auth", "access", "storage"], + }, + "auth.cooldowns.overloadedBackoffMs": { + label: "Overloaded Backoff (ms)", + help: "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).", + tags: ["auth", "access", "reliability", "storage"], + }, "agents.defaults.models": { label: "Models", help: "Configured model catalog (keys are full provider/model IDs).", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 9aff7687a96..05f45e280b3 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -801,6 +801,10 @@ export const FIELD_HELP: Record = { "Optional per-provider overrides for billing backoff (hours).", "auth.cooldowns.billingMaxHours": "Cap (hours) for billing backoff (default: 24).", "auth.cooldowns.failureWindowHours": "Failure window (hours) for backoff counters (default: 24).", + "auth.cooldowns.overloadedProfileRotations": + "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).", + "auth.cooldowns.overloadedBackoffMs": + "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).", "agents.defaults.workspace": "Default workspace path exposed to agent runtime tools for filesystem context and repo-aware behavior. Set this explicitly when running from wrappers so path resolution stays deterministic.", "agents.defaults.bootstrapMaxChars": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 94a09ce87d1..4ccce2268a1 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -471,6 +471,8 @@ export const FIELD_LABELS: Record = { "auth.cooldowns.billingBackoffHoursByProvider": "Billing Backoff Overrides", "auth.cooldowns.billingMaxHours": "Billing Backoff Cap (hours)", "auth.cooldowns.failureWindowHours": "Failover Window (hours)", + "auth.cooldowns.overloadedProfileRotations": "Overloaded Profile Rotations", + "auth.cooldowns.overloadedBackoffMs": "Overloaded Backoff (ms)", "agents.defaults.models": "Models", "agents.defaults.model.primary": "Primary Model", "agents.defaults.model.fallbacks": "Model Fallbacks", diff --git a/src/config/types.auth.ts b/src/config/types.auth.ts index 80b2111ae91..010295c86b3 100644 --- a/src/config/types.auth.ts +++ b/src/config/types.auth.ts @@ -26,5 +26,15 @@ export type AuthConfig = { * this window, counters reset. Default: 24. */ failureWindowHours?: number; + /** + * Maximum same-provider auth-profile rotations to allow for overloaded + * errors before escalating to cross-provider model fallback. Default: 1. + */ + overloadedProfileRotations?: number; + /** + * Fixed delay before retrying an overloaded provider/profile rotation. + * Default: 0. + */ + overloadedBackoffMs?: number; }; }; diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index 73c4bfe8076..b27a98765fb 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -450,6 +450,8 @@ export const OpenClawSchema = z billingBackoffHoursByProvider: z.record(z.string(), z.number().positive()).optional(), billingMaxHours: z.number().positive().optional(), failureWindowHours: z.number().positive().optional(), + overloadedProfileRotations: z.number().int().nonnegative().optional(), + overloadedBackoffMs: z.number().int().nonnegative().optional(), }) .strict() .optional(),