fix: make overload failover configurable

2026-03-31 21:33:35 +01:00 · 2026-03-31 21:33:35 +01:00 · 418fa12dfa
parent 2a60e34f2a
commit 418fa12dfa
14 changed files with 255 additions and 81 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -105,6 +105,7 @@ Docs: https://docs.openclaw.ai
 - iOS/Live Activities: mark the `ActivityKit` import in `LiveActivityManager.swift` as `@preconcurrency` so Xcode 26.4 / Swift 6 builds stop failing on strict concurrency checks. (#57180) Thanks @ngutman.
 - LINE/ACP: add current-conversation binding and inbound binding-routing parity so `/acp spawn ... --thread here`, configured ACP bindings, and active conversation-bound ACP sessions work on LINE like the other conversation channels.
 - LINE/markdown: preserve underscores inside Latin, Cyrillic, and CJK words when stripping markdown, while still removing standalone `_italic_` markers on the shared text-runtime path used by LINE and TTS. (#47465) Thanks @jackjin1997.
+- Agents/failover: make overloaded same-provider retry count and retry delay configurable via `auth.cooldowns`, default to one retry with no delay, and document the model-fallback behavior.

 ## 2026.3.31-beta.1

--- a/docs/.generated/config-baseline.json
+++ b/docs/.generated/config-baseline.json
@ -7855,6 +7855,39 @@
      "help": "Failure window (hours) for backoff counters (default: 24).",
      "hasChildren": false
    },
+    {
+      "path": "auth.cooldowns.overloadedBackoffMs",
+      "kind": "core",
+      "type": "integer",
+      "required": false,
+      "deprecated": false,
+      "sensitive": false,
+      "tags": [
+        "access",
+        "auth",
+        "reliability",
+        "storage"
+      ],
+      "label": "Overloaded Backoff (ms)",
+      "help": "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
+      "hasChildren": false
+    },
+    {
+      "path": "auth.cooldowns.overloadedProfileRotations",
+      "kind": "core",
+      "type": "integer",
+      "required": false,
+      "deprecated": false,
+      "sensitive": false,
+      "tags": [
+        "access",
+        "auth",
+        "storage"
+      ],
+      "label": "Overloaded Profile Rotations",
+      "help": "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
+      "hasChildren": false
+    },
    {
      "path": "auth.order",
      "kind": "core",
--- a/docs/.generated/config-baseline.jsonl
+++ b/docs/.generated/config-baseline.jsonl
@ -1,4 +1,4 @@
-{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5718}
+{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5720}
 {"recordType":"path","path":"acp","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"ACP","help":"ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.","hasChildren":true}
 {"recordType":"path","path":"acp.allowedAgents","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"ACP Allowed Agents","help":"Allowlist of ACP target agent ids permitted for ACP runtime sessions. Empty means no additional allowlist restriction.","hasChildren":true}
 {"recordType":"path","path":"acp.allowedAgents.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
@ -697,6 +697,8 @@
 {"recordType":"path","path":"auth.cooldowns.billingBackoffHoursByProvider.*","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
 {"recordType":"path","path":"auth.cooldowns.billingMaxHours","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","performance"],"label":"Billing Backoff Cap (hours)","help":"Cap (hours) for billing backoff (default: 24).","hasChildren":false}
 {"recordType":"path","path":"auth.cooldowns.failureWindowHours","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Failover Window (hours)","help":"Failure window (hours) for backoff counters (default: 24).","hasChildren":false}
+{"recordType":"path","path":"auth.cooldowns.overloadedBackoffMs","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","reliability","storage"],"label":"Overloaded Backoff (ms)","help":"Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).","hasChildren":false}
+{"recordType":"path","path":"auth.cooldowns.overloadedProfileRotations","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","storage"],"label":"Overloaded Profile Rotations","help":"Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).","hasChildren":false}
 {"recordType":"path","path":"auth.order","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Auth Profile Order","help":"Ordered auth profile IDs per provider (used for automatic failover).","hasChildren":true}
 {"recordType":"path","path":"auth.order.*","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true}
 {"recordType":"path","path":"auth.order.*.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
--- a/docs/concepts/model-failover.md
+++ b/docs/concepts/model-failover.md
@ -129,6 +129,8 @@ Defaults:

 - Billing backoff starts at **5 hours**, doubles per billing failure, and caps at **24 hours**.
 - Backoff counters reset if the profile hasn’t failed for **24 hours** (configurable).
+- Overloaded retries allow **1 same-provider profile rotation** before model fallback.
+- Overloaded retries use **0 ms backoff** by default.

 ## Model fallback

@ -136,6 +138,11 @@ If all profiles for a provider fail, OpenClaw moves to the next model in
 `agents.defaults.model.fallbacks`. This applies to auth failures, rate limits, and
 timeouts that exhausted profile rotation (other errors do not advance fallback).

+Overloaded errors are handled more aggressively than billing cooldowns. By default,
+OpenClaw allows one same-provider auth-profile retry, then switches to the next
+configured model fallback without waiting. Tune this with
+`auth.cooldowns.overloadedProfileRotations` and `auth.cooldowns.overloadedBackoffMs`.
+
 When a run starts with a model override (hooks or CLI), fallbacks still end at
 `agents.defaults.model.primary` after trying any configured fallbacks.

@ -146,6 +153,7 @@ See [Gateway configuration](/gateway/configuration) for:
 - `auth.profiles` / `auth.order`
 - `auth.cooldowns.billingBackoffHours` / `auth.cooldowns.billingBackoffHoursByProvider`
 - `auth.cooldowns.billingMaxHours` / `auth.cooldowns.failureWindowHours`
+- `auth.cooldowns.overloadedProfileRotations` / `auth.cooldowns.overloadedBackoffMs`
 - `agents.defaults.model.primary` / `agents.defaults.model.fallbacks`
 - `agents.defaults.imageModel` routing

--- a/docs/gateway/configuration-reference.md
+++ b/docs/gateway/configuration-reference.md
@ -3029,6 +3029,8 @@ Notes:
      billingBackoffHoursByProvider: { anthropic: 3, openai: 8 },
      billingMaxHours: 24,
      failureWindowHours: 24,
+      overloadedProfileRotations: 1,
+      overloadedBackoffMs: 0,
    },
  },
 }
@ -3038,6 +3040,8 @@ Notes:
 - `billingBackoffHoursByProvider`: optional per-provider overrides for billing backoff hours.
 - `billingMaxHours`: cap in hours for billing backoff exponential growth (default: `24`).
 - `failureWindowHours`: rolling window in hours used for backoff counters (default: `24`).
+- `overloadedProfileRotations`: maximum same-provider auth-profile rotations for overloaded errors before switching to model fallback (default: `1`).
+- `overloadedBackoffMs`: fixed delay before retrying an overloaded provider/profile rotation (default: `0`).

 ---

--- a/src/agents/model-fallback.run-embedded.e2e.test.ts
+++ b/src/agents/model-fallback.run-embedded.e2e.test.ts
@ -202,8 +202,9 @@ async function runEmbeddedFallback(params: {
  sessionKey: string;
  runId: string;
  abortSignal?: AbortSignal;
+  config?: OpenClawConfig;
 }) {
-  const cfg = makeConfig();
+  const cfg = params.config ?? makeConfig();
  return await runWithModelFallback({
    cfg,
    provider: "openai",
@ -321,8 +322,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      expect(typeof usageStats["groq:p1"]?.lastUsed).toBe("number");

      expectOpenAiThenGroqAttemptOrder();
-      expect(computeBackoffMock).toHaveBeenCalledTimes(1);
-      expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
+      expect(computeBackoffMock).not.toHaveBeenCalled();
+      expect(sleepWithAbortMock).not.toHaveBeenCalled();
    });
  });

@ -358,8 +359,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      expect(usageStats["groq:p1"]?.disabledUntil).toBeUndefined();

      expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
-      expect(computeBackoffMock).toHaveBeenCalledTimes(2);
-      expect(sleepWithAbortMock).toHaveBeenCalledTimes(2);
+      expect(computeBackoffMock).not.toHaveBeenCalled();
+      expect(sleepWithAbortMock).not.toHaveBeenCalled();
    });
  });

@ -421,8 +422,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      const usageStats = await readUsageStats(agentDir);
      expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
      expect(usageStats["openai:p1"]?.failureCounts).toMatchObject({ overloaded: 2 });
-      expect(computeBackoffMock).toHaveBeenCalledTimes(1);
-      expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
+      expect(computeBackoffMock).not.toHaveBeenCalled();
+      expect(sleepWithAbortMock).not.toHaveBeenCalled();
    });
  });

@ -466,6 +467,10 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
          sessionKey: "agent:test:overloaded-backoff-abort",
          runId: "run:overloaded-backoff-abort",
          abortSignal: controller.signal,
+          config: {
+            ...makeConfig(),
+            auth: { cooldowns: { overloadedBackoffMs: 321 } },
+          },
        }),
      ).rejects.toMatchObject({
        name: "AbortError",
@ -483,7 +488,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
  it("caps overloaded profile rotations and escalates to cross-provider fallback (#58348)", async () => {
    // When a provider has multiple auth profiles and all return overloaded_error,
    // the runner should not exhaust all profiles before falling back. It should
-    // cap profile rotations at MAX_OVERLOAD_PROFILE_ROTATIONS (1) and escalate
+    // cap profile rotations at overloadedProfileRotations=1 and escalate
    // to cross-provider fallback immediately.
    await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
      // Write auth store with multiple profiles for openai
@ -549,7 +554,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      expect(result.model).toBe("mock-2");
      expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok");

-      // With MAX_OVERLOAD_PROFILE_ROTATIONS=1, we expect:
+      // With overloadedProfileRotations=1, we expect:
      // - 1 initial openai attempt (p1)
      // - 1 rotation to p2 (capped)
      // - escalation to groq (1 attempt)
@ -564,4 +569,73 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      expect(groqAttempts.length).toBe(1);
    });
  });
+
+  it("respects overloadedProfileRotations=0 and falls back immediately", async () => {
+    await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
+      await fs.writeFile(
+        path.join(agentDir, "auth-profiles.json"),
+        JSON.stringify({
+          version: 1,
+          profiles: {
+            "openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" },
+            "openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" },
+            "groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" },
+          },
+          usageStats: {
+            "openai:p1": { lastUsed: 1 },
+            "openai:p2": { lastUsed: 2 },
+            "groq:p1": { lastUsed: 3 },
+          },
+        }),
+      );
+
+      runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
+        const attemptParams = params as { provider: string };
+        if (attemptParams.provider === "openai") {
+          return makeEmbeddedRunnerAttempt({
+            assistantTexts: [],
+            lastAssistant: buildEmbeddedRunnerAssistant({
+              provider: "openai",
+              model: "mock-1",
+              stopReason: "error",
+              errorMessage: OVERLOADED_ERROR_PAYLOAD,
+            }),
+          });
+        }
+        if (attemptParams.provider === "groq") {
+          return makeEmbeddedRunnerAttempt({
+            assistantTexts: ["fallback ok"],
+            lastAssistant: buildEmbeddedRunnerAssistant({
+              provider: "groq",
+              model: "mock-2",
+              stopReason: "stop",
+              content: [{ type: "text", text: "fallback ok" }],
+            }),
+          });
+        }
+        throw new Error(`Unexpected provider ${attemptParams.provider}`);
+      });
+
+      const result = await runEmbeddedFallback({
+        agentDir,
+        workspaceDir,
+        sessionKey: "agent:test:overloaded-no-rotation",
+        runId: "run:overloaded-no-rotation",
+        config: {
+          ...makeConfig(),
+          auth: { cooldowns: { overloadedProfileRotations: 0 } },
+        },
+      });
+
+      expect(result.provider).toBe("groq");
+      const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
+        (call) => (call[0] as { provider?: string })?.provider === "openai",
+      );
+      const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
+        (call) => (call[0] as { provider?: string })?.provider === "groq",
+      );
+      expect(openaiAttempts.length).toBe(1);
+      expect(groqAttempts.length).toBe(1);
+    });
+  });
 });
--- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
+++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
@ -58,22 +58,27 @@ const installRunEmbeddedMocks = () => {
  vi.doMock("./pi-embedded-runner/run/attempt.js", () => ({
    runEmbeddedAttempt: (params: unknown) => runEmbeddedAttemptMock(params),
  }));
-  vi.doMock("../plugins/provider-runtime.js", () => ({
-    prepareProviderRuntimeAuth: async (params: {
-      provider: string;
-      context: { apiKey: string };
-    }) => {
-      if (params.provider !== "github-copilot") {
-        return undefined;
-      }
-      const token = await resolveCopilotApiTokenMock(params.context.apiKey);
-      return {
-        apiKey: token.token,
-        baseUrl: token.baseUrl,
-        expiresAt: token.expiresAt,
-      };
-    },
-  }));
+  vi.doMock("../plugins/provider-runtime.js", async (importOriginal) => {
+    const actual = await importOriginal<typeof import("../plugins/provider-runtime.js")>();
+    return {
+      ...actual,
+      prepareProviderRuntimeAuth: async (params: {
+        provider: string;
+        context: { apiKey: string };
+      }) => {
+        if (params.provider !== "github-copilot") {
+          return undefined;
+        }
+        const token = await resolveCopilotApiTokenMock(params.context.apiKey);
+        return {
+          apiKey: token.token,
+          baseUrl: token.baseUrl,
+          expiresAt: token.expiresAt,
+        };
+      },
+      resolveProviderCapabilitiesWithPlugin: vi.fn(() => undefined),
+    };
+  });
  vi.doMock("../infra/backoff.js", () => ({
    computeBackoff: (
      policy: { initialMs: number; maxMs: number; factor: number; jitter: number },
@ -188,8 +193,26 @@ const makeAttempt = (overrides: Partial<EmbeddedRunAttemptResult>): EmbeddedRunA
  ...overrides,
 });

-const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): OpenClawConfig =>
+const makeConfig = (opts?: {
+  fallbacks?: string[];
+  apiKey?: string;
+  overloadedBackoffMs?: number;
+  overloadedProfileRotations?: number;
+}): OpenClawConfig =>
  ({
+    auth:
+      opts?.overloadedBackoffMs != null || opts?.overloadedProfileRotations != null
+        ? {
+            cooldowns: {
+              ...(opts?.overloadedBackoffMs != null
+                ? { overloadedBackoffMs: opts.overloadedBackoffMs }
+                : {}),
+              ...(opts?.overloadedProfileRotations != null
+                ? { overloadedProfileRotations: opts.overloadedProfileRotations }
+                : {}),
+            },
+          }
+        : undefined,
    agents: {
      defaults: {
        model: {
@ -379,6 +402,7 @@ async function runAutoPinnedOpenAiTurn(params: {
  sessionKey: string;
  runId: string;
  authProfileId?: string;
+  config?: OpenClawConfig;
 }) {
  await runEmbeddedPiAgentInline({
    sessionId: "session:test",
@ -386,7 +410,7 @@ async function runAutoPinnedOpenAiTurn(params: {
    sessionFile: path.join(params.workspaceDir, "session.jsonl"),
    workspaceDir: params.workspaceDir,
    agentDir: params.agentDir,
-    config: makeConfig(),
+    config: params.config ?? makeConfig(),
    prompt: "hello",
    provider: "openai",
    model: "mock-1",
@ -423,6 +447,7 @@ async function runAutoPinnedRotationCase(params: {
  errorMessage: string;
  sessionKey: string;
  runId: string;
+  config?: OpenClawConfig;
 }) {
  runEmbeddedAttemptMock.mockReset();
  return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
@ -433,6 +458,7 @@ async function runAutoPinnedRotationCase(params: {
      workspaceDir,
      sessionKey: params.sessionKey,
      runId: params.runId,
+      config: params.config,
    });

    expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
@ -445,6 +471,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
  errorMessage: string;
  sessionKey: string;
  runId: string;
+  config?: OpenClawConfig;
 }) {
  runEmbeddedAttemptMock.mockReset();
  return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
@ -455,6 +482,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
      workspaceDir,
      sessionKey: params.sessionKey,
      runId: params.runId,
+      config: params.config,
    });

    expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
@ -786,18 +814,8 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
    });
    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
    expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
-    expect(computeBackoffMock).toHaveBeenCalledTimes(1);
-    expect(computeBackoffMock).toHaveBeenCalledWith(
-      expect.objectContaining({
-        initialMs: 250,
-        maxMs: 1500,
-        factor: 2,
-        jitter: 0.2,
-      }),
-      1,
-    );
-    expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
-    expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
+    expect(computeBackoffMock).not.toHaveBeenCalled();
+    expect(sleepWithAbortMock).not.toHaveBeenCalled();
  });

  it("logs structured failover decision metadata for overloaded assistant rotation", async () => {
@ -863,16 +881,19 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
    });
    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
    expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
-    expect(computeBackoffMock).toHaveBeenCalledTimes(1);
-    expect(computeBackoffMock).toHaveBeenCalledWith(
-      expect.objectContaining({
-        initialMs: 250,
-        maxMs: 1500,
-        factor: 2,
-        jitter: 0.2,
-      }),
-      1,
-    );
+    expect(computeBackoffMock).not.toHaveBeenCalled();
+    expect(sleepWithAbortMock).not.toHaveBeenCalled();
+  });
+
+  it("uses configured overload backoff before rotating profiles", async () => {
+    const { usageStats } = await runAutoPinnedRotationCase({
+      errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
+      sessionKey: "agent:test:overloaded-configured-backoff",
+      runId: "run:overloaded-configured-backoff",
+      config: makeConfig({ overloadedBackoffMs: 321 }),
+    });
+    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
+    expect(computeBackoffMock).not.toHaveBeenCalled();
    expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
    expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
  });
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@ -5,7 +5,7 @@ import {
  ensureContextEnginesInitialized,
  resolveContextEngine,
 } from "../../context-engine/index.js";
-import { computeBackoff, sleepWithAbort } from "../../infra/backoff.js";
+import { sleepWithAbort } from "../../infra/backoff.js";
 import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
 import { enqueueCommandInLane } from "../../process/command-queue.js";
 import { sanitizeForLog } from "../../terminal/ansi.js";
@ -76,10 +76,10 @@ import {
  buildErrorAgentMeta,
  buildUsageAgentMetaFields,
  createCompactionDiagId,
-  MAX_OVERLOAD_PROFILE_ROTATIONS,
-  OVERLOAD_FAILOVER_BACKOFF_POLICY,
  resolveActiveErrorContext,
  resolveMaxRunRetryIterations,
+  resolveOverloadFailoverBackoffMs,
+  resolveOverloadProfileRotationLimit,
  type RuntimeAuthState,
  scrubAnthropicRefusalMagic,
 } from "./run/helpers.js";
@ -317,9 +317,10 @@ export async function runEmbeddedPiAgent(
      let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
      let autoCompactionCount = 0;
      let runLoopIterations = 0;
-      let overloadFailoverAttempts = 0;
      let overloadProfileRotations = 0;
      let timeoutCompactionAttempts = 0;
+      const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
+      const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config);
      const maybeMarkAuthProfileFailure = async (failure: {
        profileId?: string;
        reason?: AuthProfileFailureReason | null;
@ -352,16 +353,14 @@ export async function runEmbeddedPiAgent(
        return failoverReason;
      };
      const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
-        if (reason !== "overloaded") {
+        if (reason !== "overloaded" || overloadFailoverBackoffMs <= 0) {
          return;
        }
-        overloadFailoverAttempts += 1;
-        const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
        log.warn(
-          `overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
+          `overload backoff before failover for ${provider}/${modelId}: delayMs=${overloadFailoverBackoffMs}`,
        );
        try {
-          await sleepWithAbort(delayMs, params.abortSignal);
+          await sleepWithAbort(overloadFailoverBackoffMs, params.abortSignal);
        } catch (err) {
          if (params.abortSignal?.aborted) {
            const abortErr = new Error("Operation aborted", { cause: err });
@ -1199,15 +1198,15 @@ export async function runEmbeddedPiAgent(
              }
            }

-            // For overloaded errors, check the rotation cap *before* calling
-            // advanceAuthProfile() to avoid a wasted auth-profile setup cycle.
-            // advanceAuthProfile() runs applyApiKeyInfo() which initialises the
-            // next profile — costly work that is pointless when we already know
-            // we will escalate to cross-provider fallback.
+            // For overloaded errors, check the configured rotation cap *before*
+            // calling advanceAuthProfile() to avoid a wasted auth-profile setup
+            // cycle. advanceAuthProfile() runs applyApiKeyInfo() which
+            // initializes the next profile — costly work that is pointless when
+            // we already know we will escalate to cross-provider fallback.
            // See: https://github.com/openclaw/openclaw/issues/58348
            if (assistantFailoverReason === "overloaded") {
              overloadProfileRotations += 1;
-              if (overloadProfileRotations > MAX_OVERLOAD_PROFILE_ROTATIONS && fallbackConfigured) {
+              if (overloadProfileRotations > overloadProfileRotationLimit && fallbackConfigured) {
                const status = resolveFailoverStatus("overloaded");
                log.warn(
                  `overload profile rotation cap reached for ${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} after ${overloadProfileRotations} rotations; escalating to model fallback`,
--- a/src/agents/pi-embedded-runner/run/helpers.ts
+++ b/src/agents/pi-embedded-runner/run/helpers.ts
@ -1,4 +1,4 @@
-import { type BackoffPolicy } from "../../../infra/backoff.js";
+import type { OpenClawConfig } from "../../../config/config.js";
 import { generateSecureToken } from "../../../infra/secure-random.js";
 import { derivePromptTokens, normalizeUsage } from "../../usage.js";
 import type { EmbeddedPiAgentMeta } from "../types.js";
@ -25,22 +25,16 @@ export const RUNTIME_AUTH_REFRESH_MARGIN_MS = 5 * 60 * 1000;
 export const RUNTIME_AUTH_REFRESH_RETRY_MS = 60 * 1000;
 export const RUNTIME_AUTH_REFRESH_MIN_DELAY_MS = 5 * 1000;

-// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
-// enough that fallback still feels responsive within a single turn.
-export const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
-  initialMs: 250,
-  maxMs: 1_500,
-  factor: 2,
-  jitter: 0.2,
-};
+export const DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS = 0;
+export const DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS = 1;

-// Maximum number of auth-profile rotations to attempt for overloaded errors
-// before escalating to cross-provider fallback. Overloaded is a provider-level
-// capacity issue — rotating auth profiles on the same provider is unlikely to
-// help and wastes time with backoff delays. A cap of 1 allows one probe attempt
-// (in case the overload was transient) before giving up on the provider.
-// See: https://github.com/openclaw/openclaw/issues/58348
-export const MAX_OVERLOAD_PROFILE_ROTATIONS = 1;
+export function resolveOverloadFailoverBackoffMs(cfg?: OpenClawConfig): number {
+  return cfg?.auth?.cooldowns?.overloadedBackoffMs ?? DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS;
+}
+
+export function resolveOverloadProfileRotationLimit(cfg?: OpenClawConfig): number {
+  return cfg?.auth?.cooldowns?.overloadedProfileRotations ?? DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS;
+}

 const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
 const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";
--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@ -789,6 +789,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
                type: "number",
                exclusiveMinimum: 0,
              },
+              overloadedProfileRotations: {
+                type: "integer",
+                minimum: 0,
+                maximum: 9007199254740991,
+              },
+              overloadedBackoffMs: {
+                type: "integer",
+                minimum: 0,
+                maximum: 9007199254740991,
+              },
            },
            additionalProperties: false,
          },
@ -13645,6 +13655,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
      help: "Failure window (hours) for backoff counters (default: 24).",
      tags: ["auth", "access"],
    },
+    "auth.cooldowns.overloadedProfileRotations": {
+      label: "Overloaded Profile Rotations",
+      help: "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
+      tags: ["auth", "access", "storage"],
+    },
+    "auth.cooldowns.overloadedBackoffMs": {
+      label: "Overloaded Backoff (ms)",
+      help: "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
+      tags: ["auth", "access", "reliability", "storage"],
+    },
    "agents.defaults.models": {
      label: "Models",
      help: "Configured model catalog (keys are full provider/model IDs).",
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@ -801,6 +801,10 @@ export const FIELD_HELP: Record<string, string> = {
    "Optional per-provider overrides for billing backoff (hours).",
  "auth.cooldowns.billingMaxHours": "Cap (hours) for billing backoff (default: 24).",
  "auth.cooldowns.failureWindowHours": "Failure window (hours) for backoff counters (default: 24).",
+  "auth.cooldowns.overloadedProfileRotations":
+    "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
+  "auth.cooldowns.overloadedBackoffMs":
+    "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
  "agents.defaults.workspace":
    "Default workspace path exposed to agent runtime tools for filesystem context and repo-aware behavior. Set this explicitly when running from wrappers so path resolution stays deterministic.",
  "agents.defaults.bootstrapMaxChars":
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@ -471,6 +471,8 @@ export const FIELD_LABELS: Record<string, string> = {
  "auth.cooldowns.billingBackoffHoursByProvider": "Billing Backoff Overrides",
  "auth.cooldowns.billingMaxHours": "Billing Backoff Cap (hours)",
  "auth.cooldowns.failureWindowHours": "Failover Window (hours)",
+  "auth.cooldowns.overloadedProfileRotations": "Overloaded Profile Rotations",
+  "auth.cooldowns.overloadedBackoffMs": "Overloaded Backoff (ms)",
  "agents.defaults.models": "Models",
  "agents.defaults.model.primary": "Primary Model",
  "agents.defaults.model.fallbacks": "Model Fallbacks",
--- a/src/config/types.auth.ts
+++ b/src/config/types.auth.ts
@ -26,5 +26,15 @@ export type AuthConfig = {
     * this window, counters reset. Default: 24.
     */
    failureWindowHours?: number;
+    /**
+     * Maximum same-provider auth-profile rotations to allow for overloaded
+     * errors before escalating to cross-provider model fallback. Default: 1.
+     */
+    overloadedProfileRotations?: number;
+    /**
+     * Fixed delay before retrying an overloaded provider/profile rotation.
+     * Default: 0.
+     */
+    overloadedBackoffMs?: number;
  };
 };
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@ -450,6 +450,8 @@ export const OpenClawSchema = z
            billingBackoffHoursByProvider: z.record(z.string(), z.number().positive()).optional(),
            billingMaxHours: z.number().positive().optional(),
            failureWindowHours: z.number().positive().optional(),
+            overloadedProfileRotations: z.number().int().nonnegative().optional(),
+            overloadedBackoffMs: z.number().int().nonnegative().optional(),
          })
          .strict()
          .optional(),