agents: limit duplicate cooldown probes per provider

This commit is contained in:
Charles Dusek 2026-03-09 22:55:14 -05:00 committed by Altay
parent bda63c3c7f
commit 95eb9518ef
No known key found for this signature in database
3 changed files with 72 additions and 0 deletions

View File

@ -47,6 +47,7 @@ Docs: https://docs.openclaw.ai
- CLI/memory teardown: close cached memory search/index managers in the one-shot CLI shutdown path so watcher-backed memory caches no longer keep completed CLI runs alive after output finishes. (#40389) thanks @Julbarth.
- Tools/web search: treat Brave `llm-context` grounding snippets as plain strings so `web_search` no longer returns empty snippet arrays in LLM Context mode. (#41387) thanks @zheliu2.
- Telegram/exec approvals: reject `/approve` commands aimed at other bots, keep deterministic approval prompts visible when tool-result delivery fails, and stop resolved exact IDs from matching other pending approvals by prefix. (#37233) Thanks @huntharo.
- Agents/fallback cooldown probing: cap cooldown-bypass probing to one attempt per provider per fallback run so multi-model same-provider cooldown chains can continue to cross-provider fallbacks instead of repeatedly stalling on duplicate cooldown probes. (#41492) Thanks @cgdusek.
- Control UI/Sessions: restore single-column session table collapse on narrow viewport or container widths by moving the responsive table override next to the base grid rule and enabling inline-size container queries. (#12175) Thanks @benjipeng.
- Telegram/final preview delivery: split active preview lifecycle from cleanup retention so missing archived preview edits avoid duplicate fallback sends without clearing the live preview or blocking later in-place finalization. (#41662) thanks @hougangdev.
- Cron/state errors: record `lastErrorReason` in cron job state and keep the gateway schema aligned with the full failover-reason set, including regression coverage for protocol conformance. (#14382) thanks @futuremind2026.

View File

@ -1318,6 +1318,46 @@ describe("runWithModelFallback", () => {
}); // Rate limit allows attempt
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
});
it("limits cooldown probes to one per provider before moving to cross-provider fallback", async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-6",
fallbacks: [
"anthropic/claude-sonnet-4-5",
"anthropic/claude-haiku-3-5",
"groq/llama-3.3-70b-versatile",
],
},
},
},
});
const run = vi
.fn()
.mockRejectedValueOnce(new Error("Still rate limited")) // First same-provider probe fails
.mockResolvedValueOnce("groq success"); // Next provider succeeds
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-6",
run,
agentDir: dir,
});
expect(result.result).toBe("groq success");
// Primary is skipped, first same-provider fallback is probed, second same-provider fallback
// is skipped (probe already attempted), then cross-provider fallback runs.
expect(run).toHaveBeenCalledTimes(2);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,
});
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
});
});
});

View File

@ -521,6 +521,7 @@ export async function runWithModelFallback<T>(params: {
: null;
const attempts: FallbackAttempt[] = [];
let lastError: unknown;
const cooldownProbeUsedProviders = new Set<string>();
const hasFallbackCandidates = candidates.length > 1;
@ -588,7 +589,37 @@ export async function runWithModelFallback<T>(params: {
decision.reason === "overloaded" ||
decision.reason === "billing"
) {
// Probe at most once per provider per fallback run when all profiles
// are cooldowned. Re-probing every same-provider candidate can stall
// cross-provider fallback on providers with long internal retries.
if (cooldownProbeUsedProviders.has(candidate.provider)) {
const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`;
attempts.push({
provider: candidate.provider,
model: candidate.model,
error,
reason: decision.reason,
});
logModelFallbackDecision({
decision: "skip_candidate",
runId: params.runId,
requestedProvider: params.provider,
requestedModel: params.model,
candidate,
attempt: i + 1,
total: candidates.length,
reason: decision.reason,
error,
nextCandidate: candidates[i + 1],
isPrimary,
requestedModelMatched: requestedModel,
fallbackConfigured: hasFallbackCandidates,
profileCount: profileIds.length,
});
continue;
}
runOptions = { allowTransientCooldownProbe: true };
cooldownProbeUsedProviders.add(candidate.provider);
}
attemptedDuringCooldown = true;
logModelFallbackDecision({