diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index 2f866d0df05..8bc1a6ecb47 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -1358,6 +1358,46 @@ describe("runWithModelFallback", () => { }); expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); }); + + it("does not consume transient probe slot when first same-provider probe fails with model_not_found", async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit"); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: [ + "anthropic/claude-sonnet-4-5", + "anthropic/claude-haiku-3-5", + "groq/llama-3.3-70b-versatile", + ], + }, + }, + }, + }); + + const run = vi + .fn() + .mockRejectedValueOnce(new Error("Model not found: anthropic/claude-sonnet-4-5")) + .mockResolvedValueOnce("haiku success"); + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); + + expect(result.result).toBe("haiku success"); + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", { + allowTransientCooldownProbe: true, + }); + expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5", { + allowTransientCooldownProbe: true, + }); + }); }); }); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index c701217ad8a..cda7771d329 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -532,6 +532,7 @@ export async function runWithModelFallback(params: { params.provider === candidate.provider && params.model === candidate.model; let runOptions: ModelFallbackRunOptions | undefined; let attemptedDuringCooldown = false; + let transientProbeProviderForAttempt: string | null = null; if (authStore) { const profileIds = resolveAuthProfileOrder({ cfg: params.cfg, @@ -622,7 +623,7 @@ export async function runWithModelFallback(params: { } runOptions = { allowTransientCooldownProbe: true }; if (isTransientCooldownReason) { - cooldownProbeUsedProviders.add(candidate.provider); + transientProbeProviderForAttempt = candidate.provider; } } attemptedDuringCooldown = true; @@ -678,6 +679,18 @@ export async function runWithModelFallback(params: { } const err = attemptRun.error; { + if (transientProbeProviderForAttempt) { + const probeFailureReason = describeFailoverError(err).reason; + const shouldPreserveTransientProbeSlot = + probeFailureReason === "model_not_found" || + probeFailureReason === "format" || + probeFailureReason === "auth" || + probeFailureReason === "auth_permanent" || + probeFailureReason === "session_expired"; + if (!shouldPreserveTransientProbeSlot) { + cooldownProbeUsedProviders.add(transientProbeProviderForAttempt); + } + } // Context overflow errors should be handled by the inner runner's // compaction/retry logic, not by model fallback. If one escapes as a // throw, rethrow it immediately rather than trying a different model