agents: preserve transient probe slot on model-not-found probes

This commit is contained in:
Charles Dusek 2026-03-09 23:28:59 -05:00 committed by Altay
parent 47645d9dc6
commit 11e2ece516
No known key found for this signature in database
2 changed files with 54 additions and 1 deletions

View File

@ -1358,6 +1358,46 @@ describe("runWithModelFallback", () => {
});
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
});
it("does not consume transient probe slot when first same-provider probe fails with model_not_found", async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-6",
fallbacks: [
"anthropic/claude-sonnet-4-5",
"anthropic/claude-haiku-3-5",
"groq/llama-3.3-70b-versatile",
],
},
},
},
});
const run = vi
.fn()
.mockRejectedValueOnce(new Error("Model not found: anthropic/claude-sonnet-4-5"))
.mockResolvedValueOnce("haiku success");
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-6",
run,
agentDir: dir,
});
expect(result.result).toBe("haiku success");
expect(run).toHaveBeenCalledTimes(2);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,
});
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5", {
allowTransientCooldownProbe: true,
});
});
});
});

View File

@ -532,6 +532,7 @@ export async function runWithModelFallback<T>(params: {
params.provider === candidate.provider && params.model === candidate.model;
let runOptions: ModelFallbackRunOptions | undefined;
let attemptedDuringCooldown = false;
let transientProbeProviderForAttempt: string | null = null;
if (authStore) {
const profileIds = resolveAuthProfileOrder({
cfg: params.cfg,
@ -622,7 +623,7 @@ export async function runWithModelFallback<T>(params: {
}
runOptions = { allowTransientCooldownProbe: true };
if (isTransientCooldownReason) {
cooldownProbeUsedProviders.add(candidate.provider);
transientProbeProviderForAttempt = candidate.provider;
}
}
attemptedDuringCooldown = true;
@ -678,6 +679,18 @@ export async function runWithModelFallback<T>(params: {
}
const err = attemptRun.error;
{
if (transientProbeProviderForAttempt) {
const probeFailureReason = describeFailoverError(err).reason;
const shouldPreserveTransientProbeSlot =
probeFailureReason === "model_not_found" ||
probeFailureReason === "format" ||
probeFailureReason === "auth" ||
probeFailureReason === "auth_permanent" ||
probeFailureReason === "session_expired";
if (!shouldPreserveTransientProbeSlot) {
cooldownProbeUsedProviders.add(transientProbeProviderForAttempt);
}
}
// Context overflow errors should be handled by the inner runner's
// compaction/retry logic, not by model fallback. If one escapes as a
// throw, rethrow it immediately rather than trying a different model