diff --git a/CHANGELOG.md b/CHANGELOG.md index 155bc867062..42014edaad0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ Docs: https://docs.openclaw.ai - CLI/memory teardown: close cached memory search/index managers in the one-shot CLI shutdown path so watcher-backed memory caches no longer keep completed CLI runs alive after output finishes. (#40389) thanks @Julbarth. - Tools/web search: treat Brave `llm-context` grounding snippets as plain strings so `web_search` no longer returns empty snippet arrays in LLM Context mode. (#41387) thanks @zheliu2. - Telegram/exec approvals: reject `/approve` commands aimed at other bots, keep deterministic approval prompts visible when tool-result delivery fails, and stop resolved exact IDs from matching other pending approvals by prefix. (#37233) Thanks @huntharo. +- Agents/fallback cooldown probing: cap cooldown-bypass probing to one attempt per provider per fallback run so multi-model same-provider cooldown chains can continue to cross-provider fallbacks instead of repeatedly stalling on duplicate cooldown probes. (#41492) Thanks @cgdusek. - Control UI/Sessions: restore single-column session table collapse on narrow viewport or container widths by moving the responsive table override next to the base grid rule and enabling inline-size container queries. (#12175) Thanks @benjipeng. - Telegram/final preview delivery: split active preview lifecycle from cleanup retention so missing archived preview edits avoid duplicate fallback sends without clearing the live preview or blocking later in-place finalization. (#41662) thanks @hougangdev. - Cron/state errors: record `lastErrorReason` in cron job state and keep the gateway schema aligned with the full failover-reason set, including regression coverage for protocol conformance. (#14382) thanks @futuremind2026. diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index e4c84028e95..2f866d0df05 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -1318,6 +1318,46 @@ describe("runWithModelFallback", () => { }); // Rate limit allows attempt expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works }); + + it("limits cooldown probes to one per provider before moving to cross-provider fallback", async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit"); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: [ + "anthropic/claude-sonnet-4-5", + "anthropic/claude-haiku-3-5", + "groq/llama-3.3-70b-versatile", + ], + }, + }, + }, + }); + + const run = vi + .fn() + .mockRejectedValueOnce(new Error("Still rate limited")) // First same-provider probe fails + .mockResolvedValueOnce("groq success"); // Next provider succeeds + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); + + expect(result.result).toBe("groq success"); + // Primary is skipped, first same-provider fallback is probed, second same-provider fallback + // is skipped (probe already attempted), then cross-provider fallback runs. + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", { + allowTransientCooldownProbe: true, + }); + expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); + }); }); }); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 373e10c936f..0e3ca5c758d 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -521,6 +521,7 @@ export async function runWithModelFallback(params: { : null; const attempts: FallbackAttempt[] = []; let lastError: unknown; + const cooldownProbeUsedProviders = new Set(); const hasFallbackCandidates = candidates.length > 1; @@ -588,7 +589,37 @@ export async function runWithModelFallback(params: { decision.reason === "overloaded" || decision.reason === "billing" ) { + // Probe at most once per provider per fallback run when all profiles + // are cooldowned. Re-probing every same-provider candidate can stall + // cross-provider fallback on providers with long internal retries. + if (cooldownProbeUsedProviders.has(candidate.provider)) { + const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error, + reason: decision.reason, + }); + logModelFallbackDecision({ + decision: "skip_candidate", + runId: params.runId, + requestedProvider: params.provider, + requestedModel: params.model, + candidate, + attempt: i + 1, + total: candidates.length, + reason: decision.reason, + error, + nextCandidate: candidates[i + 1], + isPrimary, + requestedModelMatched: requestedModel, + fallbackConfigured: hasFallbackCandidates, + profileCount: profileIds.length, + }); + continue; + } runOptions = { allowTransientCooldownProbe: true }; + cooldownProbeUsedProviders.add(candidate.provider); } attemptedDuringCooldown = true; logModelFallbackDecision({