fix: make overload failover configurable

This commit is contained in:
Peter Steinberger 2026-03-31 21:33:35 +01:00
parent 2a60e34f2a
commit 418fa12dfa
No known key found for this signature in database
14 changed files with 255 additions and 81 deletions

View File

@ -105,6 +105,7 @@ Docs: https://docs.openclaw.ai
- iOS/Live Activities: mark the `ActivityKit` import in `LiveActivityManager.swift` as `@preconcurrency` so Xcode 26.4 / Swift 6 builds stop failing on strict concurrency checks. (#57180) Thanks @ngutman.
- LINE/ACP: add current-conversation binding and inbound binding-routing parity so `/acp spawn ... --thread here`, configured ACP bindings, and active conversation-bound ACP sessions work on LINE like the other conversation channels.
- LINE/markdown: preserve underscores inside Latin, Cyrillic, and CJK words when stripping markdown, while still removing standalone `_italic_` markers on the shared text-runtime path used by LINE and TTS. (#47465) Thanks @jackjin1997.
- Agents/failover: make overloaded same-provider retry count and retry delay configurable via `auth.cooldowns`, default to one retry with no delay, and document the model-fallback behavior.
## 2026.3.31-beta.1

View File

@ -7855,6 +7855,39 @@
"help": "Failure window (hours) for backoff counters (default: 24).",
"hasChildren": false
},
{
"path": "auth.cooldowns.overloadedBackoffMs",
"kind": "core",
"type": "integer",
"required": false,
"deprecated": false,
"sensitive": false,
"tags": [
"access",
"auth",
"reliability",
"storage"
],
"label": "Overloaded Backoff (ms)",
"help": "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
"hasChildren": false
},
{
"path": "auth.cooldowns.overloadedProfileRotations",
"kind": "core",
"type": "integer",
"required": false,
"deprecated": false,
"sensitive": false,
"tags": [
"access",
"auth",
"storage"
],
"label": "Overloaded Profile Rotations",
"help": "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
"hasChildren": false
},
{
"path": "auth.order",
"kind": "core",

View File

@ -1,4 +1,4 @@
{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5718}
{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5720}
{"recordType":"path","path":"acp","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"ACP","help":"ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.","hasChildren":true}
{"recordType":"path","path":"acp.allowedAgents","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"ACP Allowed Agents","help":"Allowlist of ACP target agent ids permitted for ACP runtime sessions. Empty means no additional allowlist restriction.","hasChildren":true}
{"recordType":"path","path":"acp.allowedAgents.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
@ -697,6 +697,8 @@
{"recordType":"path","path":"auth.cooldowns.billingBackoffHoursByProvider.*","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
{"recordType":"path","path":"auth.cooldowns.billingMaxHours","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","performance"],"label":"Billing Backoff Cap (hours)","help":"Cap (hours) for billing backoff (default: 24).","hasChildren":false}
{"recordType":"path","path":"auth.cooldowns.failureWindowHours","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Failover Window (hours)","help":"Failure window (hours) for backoff counters (default: 24).","hasChildren":false}
{"recordType":"path","path":"auth.cooldowns.overloadedBackoffMs","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","reliability","storage"],"label":"Overloaded Backoff (ms)","help":"Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).","hasChildren":false}
{"recordType":"path","path":"auth.cooldowns.overloadedProfileRotations","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","storage"],"label":"Overloaded Profile Rotations","help":"Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).","hasChildren":false}
{"recordType":"path","path":"auth.order","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Auth Profile Order","help":"Ordered auth profile IDs per provider (used for automatic failover).","hasChildren":true}
{"recordType":"path","path":"auth.order.*","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true}
{"recordType":"path","path":"auth.order.*.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}

View File

@ -129,6 +129,8 @@ Defaults:
- Billing backoff starts at **5 hours**, doubles per billing failure, and caps at **24 hours**.
- Backoff counters reset if the profile hasnt failed for **24 hours** (configurable).
- Overloaded retries allow **1 same-provider profile rotation** before model fallback.
- Overloaded retries use **0 ms backoff** by default.
## Model fallback
@ -136,6 +138,11 @@ If all profiles for a provider fail, OpenClaw moves to the next model in
`agents.defaults.model.fallbacks`. This applies to auth failures, rate limits, and
timeouts that exhausted profile rotation (other errors do not advance fallback).
Overloaded errors are handled more aggressively than billing cooldowns. By default,
OpenClaw allows one same-provider auth-profile retry, then switches to the next
configured model fallback without waiting. Tune this with
`auth.cooldowns.overloadedProfileRotations` and `auth.cooldowns.overloadedBackoffMs`.
When a run starts with a model override (hooks or CLI), fallbacks still end at
`agents.defaults.model.primary` after trying any configured fallbacks.
@ -146,6 +153,7 @@ See [Gateway configuration](/gateway/configuration) for:
- `auth.profiles` / `auth.order`
- `auth.cooldowns.billingBackoffHours` / `auth.cooldowns.billingBackoffHoursByProvider`
- `auth.cooldowns.billingMaxHours` / `auth.cooldowns.failureWindowHours`
- `auth.cooldowns.overloadedProfileRotations` / `auth.cooldowns.overloadedBackoffMs`
- `agents.defaults.model.primary` / `agents.defaults.model.fallbacks`
- `agents.defaults.imageModel` routing

View File

@ -3029,6 +3029,8 @@ Notes:
billingBackoffHoursByProvider: { anthropic: 3, openai: 8 },
billingMaxHours: 24,
failureWindowHours: 24,
overloadedProfileRotations: 1,
overloadedBackoffMs: 0,
},
},
}
@ -3038,6 +3040,8 @@ Notes:
- `billingBackoffHoursByProvider`: optional per-provider overrides for billing backoff hours.
- `billingMaxHours`: cap in hours for billing backoff exponential growth (default: `24`).
- `failureWindowHours`: rolling window in hours used for backoff counters (default: `24`).
- `overloadedProfileRotations`: maximum same-provider auth-profile rotations for overloaded errors before switching to model fallback (default: `1`).
- `overloadedBackoffMs`: fixed delay before retrying an overloaded provider/profile rotation (default: `0`).
---

View File

@ -202,8 +202,9 @@ async function runEmbeddedFallback(params: {
sessionKey: string;
runId: string;
abortSignal?: AbortSignal;
config?: OpenClawConfig;
}) {
const cfg = makeConfig();
const cfg = params.config ?? makeConfig();
return await runWithModelFallback({
cfg,
provider: "openai",
@ -321,8 +322,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
expect(typeof usageStats["groq:p1"]?.lastUsed).toBe("number");
expectOpenAiThenGroqAttemptOrder();
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
});
@ -358,8 +359,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
expect(usageStats["groq:p1"]?.disabledUntil).toBeUndefined();
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
expect(computeBackoffMock).toHaveBeenCalledTimes(2);
expect(sleepWithAbortMock).toHaveBeenCalledTimes(2);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
});
@ -421,8 +422,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
const usageStats = await readUsageStats(agentDir);
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
expect(usageStats["openai:p1"]?.failureCounts).toMatchObject({ overloaded: 2 });
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
});
@ -466,6 +467,10 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
sessionKey: "agent:test:overloaded-backoff-abort",
runId: "run:overloaded-backoff-abort",
abortSignal: controller.signal,
config: {
...makeConfig(),
auth: { cooldowns: { overloadedBackoffMs: 321 } },
},
}),
).rejects.toMatchObject({
name: "AbortError",
@ -483,7 +488,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
it("caps overloaded profile rotations and escalates to cross-provider fallback (#58348)", async () => {
// When a provider has multiple auth profiles and all return overloaded_error,
// the runner should not exhaust all profiles before falling back. It should
// cap profile rotations at MAX_OVERLOAD_PROFILE_ROTATIONS (1) and escalate
// cap profile rotations at overloadedProfileRotations=1 and escalate
// to cross-provider fallback immediately.
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
// Write auth store with multiple profiles for openai
@ -549,7 +554,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
expect(result.model).toBe("mock-2");
expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok");
// With MAX_OVERLOAD_PROFILE_ROTATIONS=1, we expect:
// With overloadedProfileRotations=1, we expect:
// - 1 initial openai attempt (p1)
// - 1 rotation to p2 (capped)
// - escalation to groq (1 attempt)
@ -564,4 +569,73 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
expect(groqAttempts.length).toBe(1);
});
});
it("respects overloadedProfileRotations=0 and falls back immediately", async () => {
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
await fs.writeFile(
path.join(agentDir, "auth-profiles.json"),
JSON.stringify({
version: 1,
profiles: {
"openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" },
"openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" },
"groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" },
},
usageStats: {
"openai:p1": { lastUsed: 1 },
"openai:p2": { lastUsed: 2 },
"groq:p1": { lastUsed: 3 },
},
}),
);
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
const attemptParams = params as { provider: string };
if (attemptParams.provider === "openai") {
return makeEmbeddedRunnerAttempt({
assistantTexts: [],
lastAssistant: buildEmbeddedRunnerAssistant({
provider: "openai",
model: "mock-1",
stopReason: "error",
errorMessage: OVERLOADED_ERROR_PAYLOAD,
}),
});
}
if (attemptParams.provider === "groq") {
return makeEmbeddedRunnerAttempt({
assistantTexts: ["fallback ok"],
lastAssistant: buildEmbeddedRunnerAssistant({
provider: "groq",
model: "mock-2",
stopReason: "stop",
content: [{ type: "text", text: "fallback ok" }],
}),
});
}
throw new Error(`Unexpected provider ${attemptParams.provider}`);
});
const result = await runEmbeddedFallback({
agentDir,
workspaceDir,
sessionKey: "agent:test:overloaded-no-rotation",
runId: "run:overloaded-no-rotation",
config: {
...makeConfig(),
auth: { cooldowns: { overloadedProfileRotations: 0 } },
},
});
expect(result.provider).toBe("groq");
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "openai",
);
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "groq",
);
expect(openaiAttempts.length).toBe(1);
expect(groqAttempts.length).toBe(1);
});
});
});

View File

@ -58,22 +58,27 @@ const installRunEmbeddedMocks = () => {
vi.doMock("./pi-embedded-runner/run/attempt.js", () => ({
runEmbeddedAttempt: (params: unknown) => runEmbeddedAttemptMock(params),
}));
vi.doMock("../plugins/provider-runtime.js", () => ({
prepareProviderRuntimeAuth: async (params: {
provider: string;
context: { apiKey: string };
}) => {
if (params.provider !== "github-copilot") {
return undefined;
}
const token = await resolveCopilotApiTokenMock(params.context.apiKey);
return {
apiKey: token.token,
baseUrl: token.baseUrl,
expiresAt: token.expiresAt,
};
},
}));
vi.doMock("../plugins/provider-runtime.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../plugins/provider-runtime.js")>();
return {
...actual,
prepareProviderRuntimeAuth: async (params: {
provider: string;
context: { apiKey: string };
}) => {
if (params.provider !== "github-copilot") {
return undefined;
}
const token = await resolveCopilotApiTokenMock(params.context.apiKey);
return {
apiKey: token.token,
baseUrl: token.baseUrl,
expiresAt: token.expiresAt,
};
},
resolveProviderCapabilitiesWithPlugin: vi.fn(() => undefined),
};
});
vi.doMock("../infra/backoff.js", () => ({
computeBackoff: (
policy: { initialMs: number; maxMs: number; factor: number; jitter: number },
@ -188,8 +193,26 @@ const makeAttempt = (overrides: Partial<EmbeddedRunAttemptResult>): EmbeddedRunA
...overrides,
});
const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): OpenClawConfig =>
const makeConfig = (opts?: {
fallbacks?: string[];
apiKey?: string;
overloadedBackoffMs?: number;
overloadedProfileRotations?: number;
}): OpenClawConfig =>
({
auth:
opts?.overloadedBackoffMs != null || opts?.overloadedProfileRotations != null
? {
cooldowns: {
...(opts?.overloadedBackoffMs != null
? { overloadedBackoffMs: opts.overloadedBackoffMs }
: {}),
...(opts?.overloadedProfileRotations != null
? { overloadedProfileRotations: opts.overloadedProfileRotations }
: {}),
},
}
: undefined,
agents: {
defaults: {
model: {
@ -379,6 +402,7 @@ async function runAutoPinnedOpenAiTurn(params: {
sessionKey: string;
runId: string;
authProfileId?: string;
config?: OpenClawConfig;
}) {
await runEmbeddedPiAgentInline({
sessionId: "session:test",
@ -386,7 +410,7 @@ async function runAutoPinnedOpenAiTurn(params: {
sessionFile: path.join(params.workspaceDir, "session.jsonl"),
workspaceDir: params.workspaceDir,
agentDir: params.agentDir,
config: makeConfig(),
config: params.config ?? makeConfig(),
prompt: "hello",
provider: "openai",
model: "mock-1",
@ -423,6 +447,7 @@ async function runAutoPinnedRotationCase(params: {
errorMessage: string;
sessionKey: string;
runId: string;
config?: OpenClawConfig;
}) {
runEmbeddedAttemptMock.mockReset();
return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
@ -433,6 +458,7 @@ async function runAutoPinnedRotationCase(params: {
workspaceDir,
sessionKey: params.sessionKey,
runId: params.runId,
config: params.config,
});
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
@ -445,6 +471,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
errorMessage: string;
sessionKey: string;
runId: string;
config?: OpenClawConfig;
}) {
runEmbeddedAttemptMock.mockReset();
return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
@ -455,6 +482,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
workspaceDir,
sessionKey: params.sessionKey,
runId: params.runId,
config: params.config,
});
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
@ -786,18 +814,8 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
});
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
expect(computeBackoffMock).toHaveBeenCalledWith(
expect.objectContaining({
initialMs: 250,
maxMs: 1500,
factor: 2,
jitter: 0.2,
}),
1,
);
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
it("logs structured failover decision metadata for overloaded assistant rotation", async () => {
@ -863,16 +881,19 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
});
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
expect(computeBackoffMock).toHaveBeenCalledWith(
expect.objectContaining({
initialMs: 250,
maxMs: 1500,
factor: 2,
jitter: 0.2,
}),
1,
);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
it("uses configured overload backoff before rotating profiles", async () => {
const { usageStats } = await runAutoPinnedRotationCase({
errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
sessionKey: "agent:test:overloaded-configured-backoff",
runId: "run:overloaded-configured-backoff",
config: makeConfig({ overloadedBackoffMs: 321 }),
});
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
});

View File

@ -5,7 +5,7 @@ import {
ensureContextEnginesInitialized,
resolveContextEngine,
} from "../../context-engine/index.js";
import { computeBackoff, sleepWithAbort } from "../../infra/backoff.js";
import { sleepWithAbort } from "../../infra/backoff.js";
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
import { enqueueCommandInLane } from "../../process/command-queue.js";
import { sanitizeForLog } from "../../terminal/ansi.js";
@ -76,10 +76,10 @@ import {
buildErrorAgentMeta,
buildUsageAgentMetaFields,
createCompactionDiagId,
MAX_OVERLOAD_PROFILE_ROTATIONS,
OVERLOAD_FAILOVER_BACKOFF_POLICY,
resolveActiveErrorContext,
resolveMaxRunRetryIterations,
resolveOverloadFailoverBackoffMs,
resolveOverloadProfileRotationLimit,
type RuntimeAuthState,
scrubAnthropicRefusalMagic,
} from "./run/helpers.js";
@ -317,9 +317,10 @@ export async function runEmbeddedPiAgent(
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
let autoCompactionCount = 0;
let runLoopIterations = 0;
let overloadFailoverAttempts = 0;
let overloadProfileRotations = 0;
let timeoutCompactionAttempts = 0;
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config);
const maybeMarkAuthProfileFailure = async (failure: {
profileId?: string;
reason?: AuthProfileFailureReason | null;
@ -352,16 +353,14 @@ export async function runEmbeddedPiAgent(
return failoverReason;
};
const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
if (reason !== "overloaded") {
if (reason !== "overloaded" || overloadFailoverBackoffMs <= 0) {
return;
}
overloadFailoverAttempts += 1;
const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
log.warn(
`overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
`overload backoff before failover for ${provider}/${modelId}: delayMs=${overloadFailoverBackoffMs}`,
);
try {
await sleepWithAbort(delayMs, params.abortSignal);
await sleepWithAbort(overloadFailoverBackoffMs, params.abortSignal);
} catch (err) {
if (params.abortSignal?.aborted) {
const abortErr = new Error("Operation aborted", { cause: err });
@ -1199,15 +1198,15 @@ export async function runEmbeddedPiAgent(
}
}
// For overloaded errors, check the rotation cap *before* calling
// advanceAuthProfile() to avoid a wasted auth-profile setup cycle.
// advanceAuthProfile() runs applyApiKeyInfo() which initialises the
// next profile — costly work that is pointless when we already know
// we will escalate to cross-provider fallback.
// For overloaded errors, check the configured rotation cap *before*
// calling advanceAuthProfile() to avoid a wasted auth-profile setup
// cycle. advanceAuthProfile() runs applyApiKeyInfo() which
// initializes the next profile — costly work that is pointless when
// we already know we will escalate to cross-provider fallback.
// See: https://github.com/openclaw/openclaw/issues/58348
if (assistantFailoverReason === "overloaded") {
overloadProfileRotations += 1;
if (overloadProfileRotations > MAX_OVERLOAD_PROFILE_ROTATIONS && fallbackConfigured) {
if (overloadProfileRotations > overloadProfileRotationLimit && fallbackConfigured) {
const status = resolveFailoverStatus("overloaded");
log.warn(
`overload profile rotation cap reached for ${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} after ${overloadProfileRotations} rotations; escalating to model fallback`,

View File

@ -1,4 +1,4 @@
import { type BackoffPolicy } from "../../../infra/backoff.js";
import type { OpenClawConfig } from "../../../config/config.js";
import { generateSecureToken } from "../../../infra/secure-random.js";
import { derivePromptTokens, normalizeUsage } from "../../usage.js";
import type { EmbeddedPiAgentMeta } from "../types.js";
@ -25,22 +25,16 @@ export const RUNTIME_AUTH_REFRESH_MARGIN_MS = 5 * 60 * 1000;
export const RUNTIME_AUTH_REFRESH_RETRY_MS = 60 * 1000;
export const RUNTIME_AUTH_REFRESH_MIN_DELAY_MS = 5 * 1000;
// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
// enough that fallback still feels responsive within a single turn.
export const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
initialMs: 250,
maxMs: 1_500,
factor: 2,
jitter: 0.2,
};
export const DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS = 0;
export const DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS = 1;
// Maximum number of auth-profile rotations to attempt for overloaded errors
// before escalating to cross-provider fallback. Overloaded is a provider-level
// capacity issue — rotating auth profiles on the same provider is unlikely to
// help and wastes time with backoff delays. A cap of 1 allows one probe attempt
// (in case the overload was transient) before giving up on the provider.
// See: https://github.com/openclaw/openclaw/issues/58348
export const MAX_OVERLOAD_PROFILE_ROTATIONS = 1;
export function resolveOverloadFailoverBackoffMs(cfg?: OpenClawConfig): number {
return cfg?.auth?.cooldowns?.overloadedBackoffMs ?? DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS;
}
export function resolveOverloadProfileRotationLimit(cfg?: OpenClawConfig): number {
return cfg?.auth?.cooldowns?.overloadedProfileRotations ?? DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS;
}
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";

View File

@ -789,6 +789,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
type: "number",
exclusiveMinimum: 0,
},
overloadedProfileRotations: {
type: "integer",
minimum: 0,
maximum: 9007199254740991,
},
overloadedBackoffMs: {
type: "integer",
minimum: 0,
maximum: 9007199254740991,
},
},
additionalProperties: false,
},
@ -13645,6 +13655,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
help: "Failure window (hours) for backoff counters (default: 24).",
tags: ["auth", "access"],
},
"auth.cooldowns.overloadedProfileRotations": {
label: "Overloaded Profile Rotations",
help: "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
tags: ["auth", "access", "storage"],
},
"auth.cooldowns.overloadedBackoffMs": {
label: "Overloaded Backoff (ms)",
help: "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
tags: ["auth", "access", "reliability", "storage"],
},
"agents.defaults.models": {
label: "Models",
help: "Configured model catalog (keys are full provider/model IDs).",

View File

@ -801,6 +801,10 @@ export const FIELD_HELP: Record<string, string> = {
"Optional per-provider overrides for billing backoff (hours).",
"auth.cooldowns.billingMaxHours": "Cap (hours) for billing backoff (default: 24).",
"auth.cooldowns.failureWindowHours": "Failure window (hours) for backoff counters (default: 24).",
"auth.cooldowns.overloadedProfileRotations":
"Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
"auth.cooldowns.overloadedBackoffMs":
"Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
"agents.defaults.workspace":
"Default workspace path exposed to agent runtime tools for filesystem context and repo-aware behavior. Set this explicitly when running from wrappers so path resolution stays deterministic.",
"agents.defaults.bootstrapMaxChars":

View File

@ -471,6 +471,8 @@ export const FIELD_LABELS: Record<string, string> = {
"auth.cooldowns.billingBackoffHoursByProvider": "Billing Backoff Overrides",
"auth.cooldowns.billingMaxHours": "Billing Backoff Cap (hours)",
"auth.cooldowns.failureWindowHours": "Failover Window (hours)",
"auth.cooldowns.overloadedProfileRotations": "Overloaded Profile Rotations",
"auth.cooldowns.overloadedBackoffMs": "Overloaded Backoff (ms)",
"agents.defaults.models": "Models",
"agents.defaults.model.primary": "Primary Model",
"agents.defaults.model.fallbacks": "Model Fallbacks",

View File

@ -26,5 +26,15 @@ export type AuthConfig = {
* this window, counters reset. Default: 24.
*/
failureWindowHours?: number;
/**
* Maximum same-provider auth-profile rotations to allow for overloaded
* errors before escalating to cross-provider model fallback. Default: 1.
*/
overloadedProfileRotations?: number;
/**
* Fixed delay before retrying an overloaded provider/profile rotation.
* Default: 0.
*/
overloadedBackoffMs?: number;
};
};

View File

@ -450,6 +450,8 @@ export const OpenClawSchema = z
billingBackoffHoursByProvider: z.record(z.string(), z.number().positive()).optional(),
billingMaxHours: z.number().positive().optional(),
failureWindowHours: z.number().positive().optional(),
overloadedProfileRotations: z.number().int().nonnegative().optional(),
overloadedBackoffMs: z.number().int().nonnegative().optional(),
})
.strict()
.optional(),