fix(agents): avoid duplicate same-provider cooldown probes in fallback runs (#41711)

Merged via squash.

Prepared head SHA: 8be8967bcb
Co-authored-by: cgdusek <38732970+cgdusek@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
This commit is contained in:
Charles Dusek 2026-03-10 07:26:47 -05:00 committed by GitHub
parent bda63c3c7f
commit 048e25c2b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 129 additions and 0 deletions

View File

@ -59,6 +59,7 @@ Docs: https://docs.openclaw.ai
- Mattermost/plugin send actions: normalize direct `replyTo` fallback handling so threaded plugin sends trim blank IDs and reuse the correct reply target again. (#41176) Thanks @hnykda.
- MS Teams/allowlist resolution: use the General channel conversation ID as the resolved team key (with Graph GUID fallback) so Bot Framework runtime `channelData.team.id` matching works for team and team/channel allowlist entries. (#41838) Thanks @BradGroux.
- Mattermost/Markdown formatting: preserve first-line indentation when stripping bot mentions so nested list items and indented code blocks keep their structure, and render Mattermost tables natively by default instead of fenced-code fallback. (#18655) thanks @echo931.
- Agents/fallback cooldown probing: cap cooldown-bypass probing to one attempt per provider per fallback run so multi-model same-provider cooldown chains can continue to cross-provider fallbacks instead of repeatedly stalling on duplicate cooldown probes. (#41711) Thanks @cgdusek.
## 2026.3.8

View File

@ -1318,6 +1318,86 @@ describe("runWithModelFallback", () => {
}); // Rate limit allows attempt
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
});
it("limits cooldown probes to one per provider before moving to cross-provider fallback", async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-6",
fallbacks: [
"anthropic/claude-sonnet-4-5",
"anthropic/claude-haiku-3-5",
"groq/llama-3.3-70b-versatile",
],
},
},
},
});
const run = vi
.fn()
.mockRejectedValueOnce(new Error("Still rate limited")) // First same-provider probe fails
.mockResolvedValueOnce("groq success"); // Next provider succeeds
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-6",
run,
agentDir: dir,
});
expect(result.result).toBe("groq success");
// Primary is skipped, first same-provider fallback is probed, second same-provider fallback
// is skipped (probe already attempted), then cross-provider fallback runs.
expect(run).toHaveBeenCalledTimes(2);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,
});
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
});
it("does not consume transient probe slot when first same-provider probe fails with model_not_found", async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-6",
fallbacks: [
"anthropic/claude-sonnet-4-5",
"anthropic/claude-haiku-3-5",
"groq/llama-3.3-70b-versatile",
],
},
},
},
});
const run = vi
.fn()
.mockRejectedValueOnce(new Error("Model not found: anthropic/claude-sonnet-4-5"))
.mockResolvedValueOnce("haiku success");
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-6",
run,
agentDir: dir,
});
expect(result.result).toBe("haiku success");
expect(run).toHaveBeenCalledTimes(2);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,
});
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5", {
allowTransientCooldownProbe: true,
});
});
});
});

View File

@ -521,6 +521,7 @@ export async function runWithModelFallback<T>(params: {
: null;
const attempts: FallbackAttempt[] = [];
let lastError: unknown;
const cooldownProbeUsedProviders = new Set<string>();
const hasFallbackCandidates = candidates.length > 1;
@ -531,6 +532,7 @@ export async function runWithModelFallback<T>(params: {
params.provider === candidate.provider && params.model === candidate.model;
let runOptions: ModelFallbackRunOptions | undefined;
let attemptedDuringCooldown = false;
let transientProbeProviderForAttempt: string | null = null;
if (authStore) {
const profileIds = resolveAuthProfileOrder({
cfg: params.cfg,
@ -588,7 +590,41 @@ export async function runWithModelFallback<T>(params: {
decision.reason === "overloaded" ||
decision.reason === "billing"
) {
// Probe at most once per provider per fallback run when all profiles
// are cooldowned. Re-probing every same-provider candidate can stall
// cross-provider fallback on providers with long internal retries.
const isTransientCooldownReason =
decision.reason === "rate_limit" || decision.reason === "overloaded";
if (isTransientCooldownReason && cooldownProbeUsedProviders.has(candidate.provider)) {
const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`;
attempts.push({
provider: candidate.provider,
model: candidate.model,
error,
reason: decision.reason,
});
logModelFallbackDecision({
decision: "skip_candidate",
runId: params.runId,
requestedProvider: params.provider,
requestedModel: params.model,
candidate,
attempt: i + 1,
total: candidates.length,
reason: decision.reason,
error,
nextCandidate: candidates[i + 1],
isPrimary,
requestedModelMatched: requestedModel,
fallbackConfigured: hasFallbackCandidates,
profileCount: profileIds.length,
});
continue;
}
runOptions = { allowTransientCooldownProbe: true };
if (isTransientCooldownReason) {
transientProbeProviderForAttempt = candidate.provider;
}
}
attemptedDuringCooldown = true;
logModelFallbackDecision({
@ -643,6 +679,18 @@ export async function runWithModelFallback<T>(params: {
}
const err = attemptRun.error;
{
if (transientProbeProviderForAttempt) {
const probeFailureReason = describeFailoverError(err).reason;
const shouldPreserveTransientProbeSlot =
probeFailureReason === "model_not_found" ||
probeFailureReason === "format" ||
probeFailureReason === "auth" ||
probeFailureReason === "auth_permanent" ||
probeFailureReason === "session_expired";
if (!shouldPreserveTransientProbeSlot) {
cooldownProbeUsedProviders.add(transientProbeProviderForAttempt);
}
}
// Context overflow errors should be handled by the inner runner's
// compaction/retry logic, not by model fallback. If one escapes as a
// throw, rethrow it immediately rather than trying a different model