mirror of https://github.com/openclaw/openclaw.git
agents: limit duplicate cooldown probes per provider
This commit is contained in:
parent
bda63c3c7f
commit
95eb9518ef
|
|
@ -47,6 +47,7 @@ Docs: https://docs.openclaw.ai
|
|||
- CLI/memory teardown: close cached memory search/index managers in the one-shot CLI shutdown path so watcher-backed memory caches no longer keep completed CLI runs alive after output finishes. (#40389) thanks @Julbarth.
|
||||
- Tools/web search: treat Brave `llm-context` grounding snippets as plain strings so `web_search` no longer returns empty snippet arrays in LLM Context mode. (#41387) thanks @zheliu2.
|
||||
- Telegram/exec approvals: reject `/approve` commands aimed at other bots, keep deterministic approval prompts visible when tool-result delivery fails, and stop resolved exact IDs from matching other pending approvals by prefix. (#37233) Thanks @huntharo.
|
||||
- Agents/fallback cooldown probing: cap cooldown-bypass probing to one attempt per provider per fallback run so multi-model same-provider cooldown chains can continue to cross-provider fallbacks instead of repeatedly stalling on duplicate cooldown probes. (#41492) Thanks @cgdusek.
|
||||
- Control UI/Sessions: restore single-column session table collapse on narrow viewport or container widths by moving the responsive table override next to the base grid rule and enabling inline-size container queries. (#12175) Thanks @benjipeng.
|
||||
- Telegram/final preview delivery: split active preview lifecycle from cleanup retention so missing archived preview edits avoid duplicate fallback sends without clearing the live preview or blocking later in-place finalization. (#41662) thanks @hougangdev.
|
||||
- Cron/state errors: record `lastErrorReason` in cron job state and keep the gateway schema aligned with the full failover-reason set, including regression coverage for protocol conformance. (#14382) thanks @futuremind2026.
|
||||
|
|
|
|||
|
|
@ -1318,6 +1318,46 @@ describe("runWithModelFallback", () => {
|
|||
}); // Rate limit allows attempt
|
||||
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
|
||||
});
|
||||
|
||||
it("limits cooldown probes to one per provider before moving to cross-provider fallback", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: [
|
||||
"anthropic/claude-sonnet-4-5",
|
||||
"anthropic/claude-haiku-3-5",
|
||||
"groq/llama-3.3-70b-versatile",
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("Still rate limited")) // First same-provider probe fails
|
||||
.mockResolvedValueOnce("groq success"); // Next provider succeeds
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("groq success");
|
||||
// Primary is skipped, first same-provider fallback is probed, second same-provider fallback
|
||||
// is skipped (probe already attempted), then cross-provider fallback runs.
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
|
||||
allowTransientCooldownProbe: true,
|
||||
});
|
||||
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -521,6 +521,7 @@ export async function runWithModelFallback<T>(params: {
|
|||
: null;
|
||||
const attempts: FallbackAttempt[] = [];
|
||||
let lastError: unknown;
|
||||
const cooldownProbeUsedProviders = new Set<string>();
|
||||
|
||||
const hasFallbackCandidates = candidates.length > 1;
|
||||
|
||||
|
|
@ -588,7 +589,37 @@ export async function runWithModelFallback<T>(params: {
|
|||
decision.reason === "overloaded" ||
|
||||
decision.reason === "billing"
|
||||
) {
|
||||
// Probe at most once per provider per fallback run when all profiles
|
||||
// are cooldowned. Re-probing every same-provider candidate can stall
|
||||
// cross-provider fallback on providers with long internal retries.
|
||||
if (cooldownProbeUsedProviders.has(candidate.provider)) {
|
||||
const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error,
|
||||
reason: decision.reason,
|
||||
});
|
||||
logModelFallbackDecision({
|
||||
decision: "skip_candidate",
|
||||
runId: params.runId,
|
||||
requestedProvider: params.provider,
|
||||
requestedModel: params.model,
|
||||
candidate,
|
||||
attempt: i + 1,
|
||||
total: candidates.length,
|
||||
reason: decision.reason,
|
||||
error,
|
||||
nextCandidate: candidates[i + 1],
|
||||
isPrimary,
|
||||
requestedModelMatched: requestedModel,
|
||||
fallbackConfigured: hasFallbackCandidates,
|
||||
profileCount: profileIds.length,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
runOptions = { allowTransientCooldownProbe: true };
|
||||
cooldownProbeUsedProviders.add(candidate.provider);
|
||||
}
|
||||
attemptedDuringCooldown = true;
|
||||
logModelFallbackDecision({
|
||||
|
|
|
|||
Loading…
Reference in New Issue