mirror of https://github.com/openclaw/openclaw.git
fix: make overload failover configurable
This commit is contained in:
parent
2a60e34f2a
commit
418fa12dfa
|
|
@ -105,6 +105,7 @@ Docs: https://docs.openclaw.ai
|
|||
- iOS/Live Activities: mark the `ActivityKit` import in `LiveActivityManager.swift` as `@preconcurrency` so Xcode 26.4 / Swift 6 builds stop failing on strict concurrency checks. (#57180) Thanks @ngutman.
|
||||
- LINE/ACP: add current-conversation binding and inbound binding-routing parity so `/acp spawn ... --thread here`, configured ACP bindings, and active conversation-bound ACP sessions work on LINE like the other conversation channels.
|
||||
- LINE/markdown: preserve underscores inside Latin, Cyrillic, and CJK words when stripping markdown, while still removing standalone `_italic_` markers on the shared text-runtime path used by LINE and TTS. (#47465) Thanks @jackjin1997.
|
||||
- Agents/failover: make overloaded same-provider retry count and retry delay configurable via `auth.cooldowns`, default to one retry with no delay, and document the model-fallback behavior.
|
||||
|
||||
## 2026.3.31-beta.1
|
||||
|
||||
|
|
|
|||
|
|
@ -7855,6 +7855,39 @@
|
|||
"help": "Failure window (hours) for backoff counters (default: 24).",
|
||||
"hasChildren": false
|
||||
},
|
||||
{
|
||||
"path": "auth.cooldowns.overloadedBackoffMs",
|
||||
"kind": "core",
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"deprecated": false,
|
||||
"sensitive": false,
|
||||
"tags": [
|
||||
"access",
|
||||
"auth",
|
||||
"reliability",
|
||||
"storage"
|
||||
],
|
||||
"label": "Overloaded Backoff (ms)",
|
||||
"help": "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
|
||||
"hasChildren": false
|
||||
},
|
||||
{
|
||||
"path": "auth.cooldowns.overloadedProfileRotations",
|
||||
"kind": "core",
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"deprecated": false,
|
||||
"sensitive": false,
|
||||
"tags": [
|
||||
"access",
|
||||
"auth",
|
||||
"storage"
|
||||
],
|
||||
"label": "Overloaded Profile Rotations",
|
||||
"help": "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
|
||||
"hasChildren": false
|
||||
},
|
||||
{
|
||||
"path": "auth.order",
|
||||
"kind": "core",
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5718}
|
||||
{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5720}
|
||||
{"recordType":"path","path":"acp","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"ACP","help":"ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.","hasChildren":true}
|
||||
{"recordType":"path","path":"acp.allowedAgents","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"ACP Allowed Agents","help":"Allowlist of ACP target agent ids permitted for ACP runtime sessions. Empty means no additional allowlist restriction.","hasChildren":true}
|
||||
{"recordType":"path","path":"acp.allowedAgents.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
|
|
@ -697,6 +697,8 @@
|
|||
{"recordType":"path","path":"auth.cooldowns.billingBackoffHoursByProvider.*","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
{"recordType":"path","path":"auth.cooldowns.billingMaxHours","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","performance"],"label":"Billing Backoff Cap (hours)","help":"Cap (hours) for billing backoff (default: 24).","hasChildren":false}
|
||||
{"recordType":"path","path":"auth.cooldowns.failureWindowHours","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Failover Window (hours)","help":"Failure window (hours) for backoff counters (default: 24).","hasChildren":false}
|
||||
{"recordType":"path","path":"auth.cooldowns.overloadedBackoffMs","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","reliability","storage"],"label":"Overloaded Backoff (ms)","help":"Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).","hasChildren":false}
|
||||
{"recordType":"path","path":"auth.cooldowns.overloadedProfileRotations","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","storage"],"label":"Overloaded Profile Rotations","help":"Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).","hasChildren":false}
|
||||
{"recordType":"path","path":"auth.order","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Auth Profile Order","help":"Ordered auth profile IDs per provider (used for automatic failover).","hasChildren":true}
|
||||
{"recordType":"path","path":"auth.order.*","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true}
|
||||
{"recordType":"path","path":"auth.order.*.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
|
|
|
|||
|
|
@ -129,6 +129,8 @@ Defaults:
|
|||
|
||||
- Billing backoff starts at **5 hours**, doubles per billing failure, and caps at **24 hours**.
|
||||
- Backoff counters reset if the profile hasn’t failed for **24 hours** (configurable).
|
||||
- Overloaded retries allow **1 same-provider profile rotation** before model fallback.
|
||||
- Overloaded retries use **0 ms backoff** by default.
|
||||
|
||||
## Model fallback
|
||||
|
||||
|
|
@ -136,6 +138,11 @@ If all profiles for a provider fail, OpenClaw moves to the next model in
|
|||
`agents.defaults.model.fallbacks`. This applies to auth failures, rate limits, and
|
||||
timeouts that exhausted profile rotation (other errors do not advance fallback).
|
||||
|
||||
Overloaded errors are handled more aggressively than billing cooldowns. By default,
|
||||
OpenClaw allows one same-provider auth-profile retry, then switches to the next
|
||||
configured model fallback without waiting. Tune this with
|
||||
`auth.cooldowns.overloadedProfileRotations` and `auth.cooldowns.overloadedBackoffMs`.
|
||||
|
||||
When a run starts with a model override (hooks or CLI), fallbacks still end at
|
||||
`agents.defaults.model.primary` after trying any configured fallbacks.
|
||||
|
||||
|
|
@ -146,6 +153,7 @@ See [Gateway configuration](/gateway/configuration) for:
|
|||
- `auth.profiles` / `auth.order`
|
||||
- `auth.cooldowns.billingBackoffHours` / `auth.cooldowns.billingBackoffHoursByProvider`
|
||||
- `auth.cooldowns.billingMaxHours` / `auth.cooldowns.failureWindowHours`
|
||||
- `auth.cooldowns.overloadedProfileRotations` / `auth.cooldowns.overloadedBackoffMs`
|
||||
- `agents.defaults.model.primary` / `agents.defaults.model.fallbacks`
|
||||
- `agents.defaults.imageModel` routing
|
||||
|
||||
|
|
|
|||
|
|
@ -3029,6 +3029,8 @@ Notes:
|
|||
billingBackoffHoursByProvider: { anthropic: 3, openai: 8 },
|
||||
billingMaxHours: 24,
|
||||
failureWindowHours: 24,
|
||||
overloadedProfileRotations: 1,
|
||||
overloadedBackoffMs: 0,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
@ -3038,6 +3040,8 @@ Notes:
|
|||
- `billingBackoffHoursByProvider`: optional per-provider overrides for billing backoff hours.
|
||||
- `billingMaxHours`: cap in hours for billing backoff exponential growth (default: `24`).
|
||||
- `failureWindowHours`: rolling window in hours used for backoff counters (default: `24`).
|
||||
- `overloadedProfileRotations`: maximum same-provider auth-profile rotations for overloaded errors before switching to model fallback (default: `1`).
|
||||
- `overloadedBackoffMs`: fixed delay before retrying an overloaded provider/profile rotation (default: `0`).
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -202,8 +202,9 @@ async function runEmbeddedFallback(params: {
|
|||
sessionKey: string;
|
||||
runId: string;
|
||||
abortSignal?: AbortSignal;
|
||||
config?: OpenClawConfig;
|
||||
}) {
|
||||
const cfg = makeConfig();
|
||||
const cfg = params.config ?? makeConfig();
|
||||
return await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
|
|
@ -321,8 +322,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
|
|||
expect(typeof usageStats["groq:p1"]?.lastUsed).toBe("number");
|
||||
|
||||
expectOpenAiThenGroqAttemptOrder();
|
||||
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
|
||||
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
|
||||
expect(computeBackoffMock).not.toHaveBeenCalled();
|
||||
expect(sleepWithAbortMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -358,8 +359,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
|
|||
expect(usageStats["groq:p1"]?.disabledUntil).toBeUndefined();
|
||||
|
||||
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
|
||||
expect(computeBackoffMock).toHaveBeenCalledTimes(2);
|
||||
expect(sleepWithAbortMock).toHaveBeenCalledTimes(2);
|
||||
expect(computeBackoffMock).not.toHaveBeenCalled();
|
||||
expect(sleepWithAbortMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -421,8 +422,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
|
|||
const usageStats = await readUsageStats(agentDir);
|
||||
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
|
||||
expect(usageStats["openai:p1"]?.failureCounts).toMatchObject({ overloaded: 2 });
|
||||
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
|
||||
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
|
||||
expect(computeBackoffMock).not.toHaveBeenCalled();
|
||||
expect(sleepWithAbortMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -466,6 +467,10 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
|
|||
sessionKey: "agent:test:overloaded-backoff-abort",
|
||||
runId: "run:overloaded-backoff-abort",
|
||||
abortSignal: controller.signal,
|
||||
config: {
|
||||
...makeConfig(),
|
||||
auth: { cooldowns: { overloadedBackoffMs: 321 } },
|
||||
},
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
name: "AbortError",
|
||||
|
|
@ -483,7 +488,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
|
|||
it("caps overloaded profile rotations and escalates to cross-provider fallback (#58348)", async () => {
|
||||
// When a provider has multiple auth profiles and all return overloaded_error,
|
||||
// the runner should not exhaust all profiles before falling back. It should
|
||||
// cap profile rotations at MAX_OVERLOAD_PROFILE_ROTATIONS (1) and escalate
|
||||
// cap profile rotations at overloadedProfileRotations=1 and escalate
|
||||
// to cross-provider fallback immediately.
|
||||
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
// Write auth store with multiple profiles for openai
|
||||
|
|
@ -549,7 +554,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
|
|||
expect(result.model).toBe("mock-2");
|
||||
expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok");
|
||||
|
||||
// With MAX_OVERLOAD_PROFILE_ROTATIONS=1, we expect:
|
||||
// With overloadedProfileRotations=1, we expect:
|
||||
// - 1 initial openai attempt (p1)
|
||||
// - 1 rotation to p2 (capped)
|
||||
// - escalation to groq (1 attempt)
|
||||
|
|
@ -564,4 +569,73 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
|
|||
expect(groqAttempts.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
it("respects overloadedProfileRotations=0 and falls back immediately", async () => {
|
||||
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
await fs.writeFile(
|
||||
path.join(agentDir, "auth-profiles.json"),
|
||||
JSON.stringify({
|
||||
version: 1,
|
||||
profiles: {
|
||||
"openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" },
|
||||
"openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" },
|
||||
"groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" },
|
||||
},
|
||||
usageStats: {
|
||||
"openai:p1": { lastUsed: 1 },
|
||||
"openai:p2": { lastUsed: 2 },
|
||||
"groq:p1": { lastUsed: 3 },
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
|
||||
const attemptParams = params as { provider: string };
|
||||
if (attemptParams.provider === "openai") {
|
||||
return makeEmbeddedRunnerAttempt({
|
||||
assistantTexts: [],
|
||||
lastAssistant: buildEmbeddedRunnerAssistant({
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
stopReason: "error",
|
||||
errorMessage: OVERLOADED_ERROR_PAYLOAD,
|
||||
}),
|
||||
});
|
||||
}
|
||||
if (attemptParams.provider === "groq") {
|
||||
return makeEmbeddedRunnerAttempt({
|
||||
assistantTexts: ["fallback ok"],
|
||||
lastAssistant: buildEmbeddedRunnerAssistant({
|
||||
provider: "groq",
|
||||
model: "mock-2",
|
||||
stopReason: "stop",
|
||||
content: [{ type: "text", text: "fallback ok" }],
|
||||
}),
|
||||
});
|
||||
}
|
||||
throw new Error(`Unexpected provider ${attemptParams.provider}`);
|
||||
});
|
||||
|
||||
const result = await runEmbeddedFallback({
|
||||
agentDir,
|
||||
workspaceDir,
|
||||
sessionKey: "agent:test:overloaded-no-rotation",
|
||||
runId: "run:overloaded-no-rotation",
|
||||
config: {
|
||||
...makeConfig(),
|
||||
auth: { cooldowns: { overloadedProfileRotations: 0 } },
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("groq");
|
||||
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "openai",
|
||||
);
|
||||
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "groq",
|
||||
);
|
||||
expect(openaiAttempts.length).toBe(1);
|
||||
expect(groqAttempts.length).toBe(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -58,22 +58,27 @@ const installRunEmbeddedMocks = () => {
|
|||
vi.doMock("./pi-embedded-runner/run/attempt.js", () => ({
|
||||
runEmbeddedAttempt: (params: unknown) => runEmbeddedAttemptMock(params),
|
||||
}));
|
||||
vi.doMock("../plugins/provider-runtime.js", () => ({
|
||||
prepareProviderRuntimeAuth: async (params: {
|
||||
provider: string;
|
||||
context: { apiKey: string };
|
||||
}) => {
|
||||
if (params.provider !== "github-copilot") {
|
||||
return undefined;
|
||||
}
|
||||
const token = await resolveCopilotApiTokenMock(params.context.apiKey);
|
||||
return {
|
||||
apiKey: token.token,
|
||||
baseUrl: token.baseUrl,
|
||||
expiresAt: token.expiresAt,
|
||||
};
|
||||
},
|
||||
}));
|
||||
vi.doMock("../plugins/provider-runtime.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("../plugins/provider-runtime.js")>();
|
||||
return {
|
||||
...actual,
|
||||
prepareProviderRuntimeAuth: async (params: {
|
||||
provider: string;
|
||||
context: { apiKey: string };
|
||||
}) => {
|
||||
if (params.provider !== "github-copilot") {
|
||||
return undefined;
|
||||
}
|
||||
const token = await resolveCopilotApiTokenMock(params.context.apiKey);
|
||||
return {
|
||||
apiKey: token.token,
|
||||
baseUrl: token.baseUrl,
|
||||
expiresAt: token.expiresAt,
|
||||
};
|
||||
},
|
||||
resolveProviderCapabilitiesWithPlugin: vi.fn(() => undefined),
|
||||
};
|
||||
});
|
||||
vi.doMock("../infra/backoff.js", () => ({
|
||||
computeBackoff: (
|
||||
policy: { initialMs: number; maxMs: number; factor: number; jitter: number },
|
||||
|
|
@ -188,8 +193,26 @@ const makeAttempt = (overrides: Partial<EmbeddedRunAttemptResult>): EmbeddedRunA
|
|||
...overrides,
|
||||
});
|
||||
|
||||
const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): OpenClawConfig =>
|
||||
const makeConfig = (opts?: {
|
||||
fallbacks?: string[];
|
||||
apiKey?: string;
|
||||
overloadedBackoffMs?: number;
|
||||
overloadedProfileRotations?: number;
|
||||
}): OpenClawConfig =>
|
||||
({
|
||||
auth:
|
||||
opts?.overloadedBackoffMs != null || opts?.overloadedProfileRotations != null
|
||||
? {
|
||||
cooldowns: {
|
||||
...(opts?.overloadedBackoffMs != null
|
||||
? { overloadedBackoffMs: opts.overloadedBackoffMs }
|
||||
: {}),
|
||||
...(opts?.overloadedProfileRotations != null
|
||||
? { overloadedProfileRotations: opts.overloadedProfileRotations }
|
||||
: {}),
|
||||
},
|
||||
}
|
||||
: undefined,
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
|
|
@ -379,6 +402,7 @@ async function runAutoPinnedOpenAiTurn(params: {
|
|||
sessionKey: string;
|
||||
runId: string;
|
||||
authProfileId?: string;
|
||||
config?: OpenClawConfig;
|
||||
}) {
|
||||
await runEmbeddedPiAgentInline({
|
||||
sessionId: "session:test",
|
||||
|
|
@ -386,7 +410,7 @@ async function runAutoPinnedOpenAiTurn(params: {
|
|||
sessionFile: path.join(params.workspaceDir, "session.jsonl"),
|
||||
workspaceDir: params.workspaceDir,
|
||||
agentDir: params.agentDir,
|
||||
config: makeConfig(),
|
||||
config: params.config ?? makeConfig(),
|
||||
prompt: "hello",
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
|
|
@ -423,6 +447,7 @@ async function runAutoPinnedRotationCase(params: {
|
|||
errorMessage: string;
|
||||
sessionKey: string;
|
||||
runId: string;
|
||||
config?: OpenClawConfig;
|
||||
}) {
|
||||
runEmbeddedAttemptMock.mockReset();
|
||||
return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
|
|
@ -433,6 +458,7 @@ async function runAutoPinnedRotationCase(params: {
|
|||
workspaceDir,
|
||||
sessionKey: params.sessionKey,
|
||||
runId: params.runId,
|
||||
config: params.config,
|
||||
});
|
||||
|
||||
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
|
||||
|
|
@ -445,6 +471,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
|
|||
errorMessage: string;
|
||||
sessionKey: string;
|
||||
runId: string;
|
||||
config?: OpenClawConfig;
|
||||
}) {
|
||||
runEmbeddedAttemptMock.mockReset();
|
||||
return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
|
|
@ -455,6 +482,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
|
|||
workspaceDir,
|
||||
sessionKey: params.sessionKey,
|
||||
runId: params.runId,
|
||||
config: params.config,
|
||||
});
|
||||
|
||||
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
|
||||
|
|
@ -786,18 +814,8 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
|
|||
});
|
||||
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
|
||||
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
|
||||
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
|
||||
expect(computeBackoffMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
initialMs: 250,
|
||||
maxMs: 1500,
|
||||
factor: 2,
|
||||
jitter: 0.2,
|
||||
}),
|
||||
1,
|
||||
);
|
||||
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
|
||||
expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
|
||||
expect(computeBackoffMock).not.toHaveBeenCalled();
|
||||
expect(sleepWithAbortMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("logs structured failover decision metadata for overloaded assistant rotation", async () => {
|
||||
|
|
@ -863,16 +881,19 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
|
|||
});
|
||||
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
|
||||
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
|
||||
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
|
||||
expect(computeBackoffMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
initialMs: 250,
|
||||
maxMs: 1500,
|
||||
factor: 2,
|
||||
jitter: 0.2,
|
||||
}),
|
||||
1,
|
||||
);
|
||||
expect(computeBackoffMock).not.toHaveBeenCalled();
|
||||
expect(sleepWithAbortMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("uses configured overload backoff before rotating profiles", async () => {
|
||||
const { usageStats } = await runAutoPinnedRotationCase({
|
||||
errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
|
||||
sessionKey: "agent:test:overloaded-configured-backoff",
|
||||
runId: "run:overloaded-configured-backoff",
|
||||
config: makeConfig({ overloadedBackoffMs: 321 }),
|
||||
});
|
||||
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
|
||||
expect(computeBackoffMock).not.toHaveBeenCalled();
|
||||
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
|
||||
expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import {
|
|||
ensureContextEnginesInitialized,
|
||||
resolveContextEngine,
|
||||
} from "../../context-engine/index.js";
|
||||
import { computeBackoff, sleepWithAbort } from "../../infra/backoff.js";
|
||||
import { sleepWithAbort } from "../../infra/backoff.js";
|
||||
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||||
import { enqueueCommandInLane } from "../../process/command-queue.js";
|
||||
import { sanitizeForLog } from "../../terminal/ansi.js";
|
||||
|
|
@ -76,10 +76,10 @@ import {
|
|||
buildErrorAgentMeta,
|
||||
buildUsageAgentMetaFields,
|
||||
createCompactionDiagId,
|
||||
MAX_OVERLOAD_PROFILE_ROTATIONS,
|
||||
OVERLOAD_FAILOVER_BACKOFF_POLICY,
|
||||
resolveActiveErrorContext,
|
||||
resolveMaxRunRetryIterations,
|
||||
resolveOverloadFailoverBackoffMs,
|
||||
resolveOverloadProfileRotationLimit,
|
||||
type RuntimeAuthState,
|
||||
scrubAnthropicRefusalMagic,
|
||||
} from "./run/helpers.js";
|
||||
|
|
@ -317,9 +317,10 @@ export async function runEmbeddedPiAgent(
|
|||
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
|
||||
let autoCompactionCount = 0;
|
||||
let runLoopIterations = 0;
|
||||
let overloadFailoverAttempts = 0;
|
||||
let overloadProfileRotations = 0;
|
||||
let timeoutCompactionAttempts = 0;
|
||||
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
|
||||
const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config);
|
||||
const maybeMarkAuthProfileFailure = async (failure: {
|
||||
profileId?: string;
|
||||
reason?: AuthProfileFailureReason | null;
|
||||
|
|
@ -352,16 +353,14 @@ export async function runEmbeddedPiAgent(
|
|||
return failoverReason;
|
||||
};
|
||||
const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
|
||||
if (reason !== "overloaded") {
|
||||
if (reason !== "overloaded" || overloadFailoverBackoffMs <= 0) {
|
||||
return;
|
||||
}
|
||||
overloadFailoverAttempts += 1;
|
||||
const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
|
||||
log.warn(
|
||||
`overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
|
||||
`overload backoff before failover for ${provider}/${modelId}: delayMs=${overloadFailoverBackoffMs}`,
|
||||
);
|
||||
try {
|
||||
await sleepWithAbort(delayMs, params.abortSignal);
|
||||
await sleepWithAbort(overloadFailoverBackoffMs, params.abortSignal);
|
||||
} catch (err) {
|
||||
if (params.abortSignal?.aborted) {
|
||||
const abortErr = new Error("Operation aborted", { cause: err });
|
||||
|
|
@ -1199,15 +1198,15 @@ export async function runEmbeddedPiAgent(
|
|||
}
|
||||
}
|
||||
|
||||
// For overloaded errors, check the rotation cap *before* calling
|
||||
// advanceAuthProfile() to avoid a wasted auth-profile setup cycle.
|
||||
// advanceAuthProfile() runs applyApiKeyInfo() which initialises the
|
||||
// next profile — costly work that is pointless when we already know
|
||||
// we will escalate to cross-provider fallback.
|
||||
// For overloaded errors, check the configured rotation cap *before*
|
||||
// calling advanceAuthProfile() to avoid a wasted auth-profile setup
|
||||
// cycle. advanceAuthProfile() runs applyApiKeyInfo() which
|
||||
// initializes the next profile — costly work that is pointless when
|
||||
// we already know we will escalate to cross-provider fallback.
|
||||
// See: https://github.com/openclaw/openclaw/issues/58348
|
||||
if (assistantFailoverReason === "overloaded") {
|
||||
overloadProfileRotations += 1;
|
||||
if (overloadProfileRotations > MAX_OVERLOAD_PROFILE_ROTATIONS && fallbackConfigured) {
|
||||
if (overloadProfileRotations > overloadProfileRotationLimit && fallbackConfigured) {
|
||||
const status = resolveFailoverStatus("overloaded");
|
||||
log.warn(
|
||||
`overload profile rotation cap reached for ${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} after ${overloadProfileRotations} rotations; escalating to model fallback`,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { type BackoffPolicy } from "../../../infra/backoff.js";
|
||||
import type { OpenClawConfig } from "../../../config/config.js";
|
||||
import { generateSecureToken } from "../../../infra/secure-random.js";
|
||||
import { derivePromptTokens, normalizeUsage } from "../../usage.js";
|
||||
import type { EmbeddedPiAgentMeta } from "../types.js";
|
||||
|
|
@ -25,22 +25,16 @@ export const RUNTIME_AUTH_REFRESH_MARGIN_MS = 5 * 60 * 1000;
|
|||
export const RUNTIME_AUTH_REFRESH_RETRY_MS = 60 * 1000;
|
||||
export const RUNTIME_AUTH_REFRESH_MIN_DELAY_MS = 5 * 1000;
|
||||
|
||||
// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
|
||||
// enough that fallback still feels responsive within a single turn.
|
||||
export const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
|
||||
initialMs: 250,
|
||||
maxMs: 1_500,
|
||||
factor: 2,
|
||||
jitter: 0.2,
|
||||
};
|
||||
export const DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS = 0;
|
||||
export const DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS = 1;
|
||||
|
||||
// Maximum number of auth-profile rotations to attempt for overloaded errors
|
||||
// before escalating to cross-provider fallback. Overloaded is a provider-level
|
||||
// capacity issue — rotating auth profiles on the same provider is unlikely to
|
||||
// help and wastes time with backoff delays. A cap of 1 allows one probe attempt
|
||||
// (in case the overload was transient) before giving up on the provider.
|
||||
// See: https://github.com/openclaw/openclaw/issues/58348
|
||||
export const MAX_OVERLOAD_PROFILE_ROTATIONS = 1;
|
||||
export function resolveOverloadFailoverBackoffMs(cfg?: OpenClawConfig): number {
|
||||
return cfg?.auth?.cooldowns?.overloadedBackoffMs ?? DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS;
|
||||
}
|
||||
|
||||
export function resolveOverloadProfileRotationLimit(cfg?: OpenClawConfig): number {
|
||||
return cfg?.auth?.cooldowns?.overloadedProfileRotations ?? DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS;
|
||||
}
|
||||
|
||||
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
|
||||
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";
|
||||
|
|
|
|||
|
|
@ -789,6 +789,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
|
|||
type: "number",
|
||||
exclusiveMinimum: 0,
|
||||
},
|
||||
overloadedProfileRotations: {
|
||||
type: "integer",
|
||||
minimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
},
|
||||
overloadedBackoffMs: {
|
||||
type: "integer",
|
||||
minimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
|
|
@ -13645,6 +13655,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
|
|||
help: "Failure window (hours) for backoff counters (default: 24).",
|
||||
tags: ["auth", "access"],
|
||||
},
|
||||
"auth.cooldowns.overloadedProfileRotations": {
|
||||
label: "Overloaded Profile Rotations",
|
||||
help: "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
|
||||
tags: ["auth", "access", "storage"],
|
||||
},
|
||||
"auth.cooldowns.overloadedBackoffMs": {
|
||||
label: "Overloaded Backoff (ms)",
|
||||
help: "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
|
||||
tags: ["auth", "access", "reliability", "storage"],
|
||||
},
|
||||
"agents.defaults.models": {
|
||||
label: "Models",
|
||||
help: "Configured model catalog (keys are full provider/model IDs).",
|
||||
|
|
|
|||
|
|
@ -801,6 +801,10 @@ export const FIELD_HELP: Record<string, string> = {
|
|||
"Optional per-provider overrides for billing backoff (hours).",
|
||||
"auth.cooldowns.billingMaxHours": "Cap (hours) for billing backoff (default: 24).",
|
||||
"auth.cooldowns.failureWindowHours": "Failure window (hours) for backoff counters (default: 24).",
|
||||
"auth.cooldowns.overloadedProfileRotations":
|
||||
"Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
|
||||
"auth.cooldowns.overloadedBackoffMs":
|
||||
"Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
|
||||
"agents.defaults.workspace":
|
||||
"Default workspace path exposed to agent runtime tools for filesystem context and repo-aware behavior. Set this explicitly when running from wrappers so path resolution stays deterministic.",
|
||||
"agents.defaults.bootstrapMaxChars":
|
||||
|
|
|
|||
|
|
@ -471,6 +471,8 @@ export const FIELD_LABELS: Record<string, string> = {
|
|||
"auth.cooldowns.billingBackoffHoursByProvider": "Billing Backoff Overrides",
|
||||
"auth.cooldowns.billingMaxHours": "Billing Backoff Cap (hours)",
|
||||
"auth.cooldowns.failureWindowHours": "Failover Window (hours)",
|
||||
"auth.cooldowns.overloadedProfileRotations": "Overloaded Profile Rotations",
|
||||
"auth.cooldowns.overloadedBackoffMs": "Overloaded Backoff (ms)",
|
||||
"agents.defaults.models": "Models",
|
||||
"agents.defaults.model.primary": "Primary Model",
|
||||
"agents.defaults.model.fallbacks": "Model Fallbacks",
|
||||
|
|
|
|||
|
|
@ -26,5 +26,15 @@ export type AuthConfig = {
|
|||
* this window, counters reset. Default: 24.
|
||||
*/
|
||||
failureWindowHours?: number;
|
||||
/**
|
||||
* Maximum same-provider auth-profile rotations to allow for overloaded
|
||||
* errors before escalating to cross-provider model fallback. Default: 1.
|
||||
*/
|
||||
overloadedProfileRotations?: number;
|
||||
/**
|
||||
* Fixed delay before retrying an overloaded provider/profile rotation.
|
||||
* Default: 0.
|
||||
*/
|
||||
overloadedBackoffMs?: number;
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -450,6 +450,8 @@ export const OpenClawSchema = z
|
|||
billingBackoffHoursByProvider: z.record(z.string(), z.number().positive()).optional(),
|
||||
billingMaxHours: z.number().positive().optional(),
|
||||
failureWindowHours: z.number().positive().optional(),
|
||||
overloadedProfileRotations: z.number().int().nonnegative().optional(),
|
||||
overloadedBackoffMs: z.number().int().nonnegative().optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
|
|
|
|||
Loading…
Reference in New Issue