fix(agents): harden rate-limit fallback handoff

Co-authored-by: TechFath3r <thetechfath3r@gmail.com>
This commit is contained in:
Peter Steinberger 2026-04-03 18:26:39 +09:00
parent f5c3b409ea
commit 87b7bb1d14
No known key found for this signature in database
4 changed files with 146 additions and 6 deletions

View File

@ -310,6 +310,35 @@ function mockPrimaryErrorThenFallbackSuccess(errorMessage: string) {
});
}
function mockPrimaryRunLoopRateLimitThenFallbackSuccess(errorMessage: string) {
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
const attemptParams = params as { provider: string };
if (attemptParams.provider === "openai") {
return makeEmbeddedRunnerAttempt({
assistantTexts: [],
lastAssistant: buildEmbeddedRunnerAssistant({
provider: "openai",
model: "mock-1",
stopReason: "length",
errorMessage,
}),
});
}
if (attemptParams.provider === "groq") {
return makeEmbeddedRunnerAttempt({
assistantTexts: ["fallback ok"],
lastAssistant: buildEmbeddedRunnerAssistant({
provider: "groq",
model: "mock-2",
stopReason: "stop",
content: [{ type: "text", text: "fallback ok" }],
}),
});
}
throw new Error(`Unexpected provider ${attemptParams.provider}`);
});
}
function expectOpenAiThenGroqAttemptOrder(params?: { expectOpenAiAuthProfileId?: string }) {
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
const firstCall = runEmbeddedAttemptMock.mock.calls[0]?.[0] as
@ -697,6 +726,39 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
});
});
it("falls back on classified rate limits even when stopReason is not error", async () => {
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
await writeMultiProfileAuthStore(agentDir);
mockPrimaryRunLoopRateLimitThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
const result = await runEmbeddedFallback({
agentDir,
workspaceDir,
sessionKey: "agent:test:rate-limit-retry-limit-fallback",
runId: "run:rate-limit-retry-limit-fallback",
config: {
...makeConfig(),
auth: { cooldowns: { rateLimitedProfileRotations: 999 } },
},
});
expect(result.provider).toBe("groq");
expect(result.model).toBe("mock-2");
expect(result.attempts[0]?.reason).toBe("rate_limit");
expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok");
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "openai",
);
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "groq",
);
expect(openaiAttempts.length).toBe(3);
expect(groqAttempts.length).toBe(1);
});
});
it("respects rateLimitedProfileRotations=0 and falls back immediately", async () => {
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
await writeMultiProfileAuthStore(agentDir);

View File

@ -0,0 +1,51 @@
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
import {
loadRunOverflowCompactionHarness,
mockedClassifyFailoverReason,
mockedGlobalHookRunner,
mockedRunEmbeddedAttempt,
overflowBaseRunParams,
resetRunOverflowCompactionHarnessMocks,
} from "./run.overflow-compaction.harness.js";
import type { EmbeddedRunAttemptResult } from "./run/types.js";
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
describe("runEmbeddedPiAgent incomplete-turn safety", () => {
beforeAll(async () => {
({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness());
});
beforeEach(() => {
resetRunOverflowCompactionHarnessMocks();
mockedGlobalHookRunner.hasHooks.mockImplementation(() => false);
});
it("warns before retrying when an incomplete turn already sent a message", async () => {
mockedClassifyFailoverReason.mockReturnValue(null);
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
assistantTexts: [],
toolMetas: [],
didSendViaMessagingTool: true,
lastAssistant: {
stopReason: "toolUse",
errorMessage: "internal retry interrupted tool execution",
provider: "openai",
model: "mock-1",
content: [],
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
);
const result = await runEmbeddedPiAgent({
...overflowBaseRunParams,
runId: "run-incomplete-turn-messaging-warning",
});
expect(mockedClassifyFailoverReason).toHaveBeenCalledTimes(1);
expect(result.payloads?.[0]?.isError).toBe(true);
expect(result.payloads?.[0]?.text).toContain("verify before retrying");
});
});

View File

@ -334,6 +334,9 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
vi.doMock("../../plugins/provider-runtime.js", () => ({
prepareProviderRuntimeAuth: mockedPrepareProviderRuntimeAuth,
resolveProviderCapabilitiesWithPlugin: vi.fn(() => ({})),
prepareProviderExtraParams: vi.fn(async () => ({})),
wrapProviderStreamFn: vi.fn((_cfg: unknown, _model: unknown, fn: unknown) => fn),
}));
vi.doMock("../auth-profiles.js", () => ({

View File

@ -306,6 +306,7 @@ export async function runEmbeddedPiAgent(
let autoCompactionCount = 0;
let runLoopIterations = 0;
let overloadProfileRotations = 0;
let lastRetryFailoverReason: FailoverReason | null = null;
let rateLimitProfileRotations = 0;
let timeoutCompactionAttempts = 0;
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
@ -448,6 +449,22 @@ export async function runEmbeddedPiAgent(
`provider=${provider}/${modelId} attempts=${runLoopIterations} ` +
`maxAttempts=${MAX_RUN_LOOP_ITERATIONS}`,
);
if (
fallbackConfigured &&
lastRetryFailoverReason &&
lastRetryFailoverReason !== "timeout" &&
lastRetryFailoverReason !== "model_not_found" &&
lastRetryFailoverReason !== "format" &&
lastRetryFailoverReason !== "session_expired"
) {
throw new FailoverError(message, {
reason: lastRetryFailoverReason,
provider,
model: modelId,
profileId: lastProfileId,
status: resolveFailoverStatus(lastRetryFailoverReason),
});
}
return {
payloads: [
{
@ -1071,6 +1088,7 @@ export async function runEmbeddedPiAgent(
promptFailoverReason !== "timeout" &&
(await advanceAuthProfile())
) {
lastRetryFailoverReason = promptFailoverReason ?? lastRetryFailoverReason;
logPromptFailoverDecision("rotate_profile");
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
continue;
@ -1179,7 +1197,8 @@ export async function runEmbeddedPiAgent(
// Rotate on timeout to try another account/model path in this turn,
// but exclude post-prompt compaction timeouts (model succeeded; no profile issue).
const shouldRotate =
(!aborted && failoverFailure) || (timedOut && !timedOutDuringCompaction);
(!aborted && (failoverFailure || assistantFailoverReason !== null)) ||
(timedOut && !timedOutDuringCompaction);
if (shouldRotate) {
if (lastProfileId) {
@ -1244,6 +1263,8 @@ export async function runEmbeddedPiAgent(
const rotated = await advanceAuthProfile();
if (rotated) {
lastRetryFailoverReason =
assistantFailoverReason ?? (timedOut ? "timeout" : null) ?? lastRetryFailoverReason;
logAssistantFailoverDecision("rotate_profile");
await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
continue;
@ -1389,19 +1410,22 @@ export async function runEmbeddedPiAgent(
// Mark the failing profile for cooldown so multi-profile setups
// rotate away from the exhausted credential on the next turn.
if (lastProfileId) {
const failoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
await maybeMarkAuthProfileFailure({
profileId: lastProfileId,
reason: resolveAuthProfileFailureReason(failoverReason),
reason: resolveAuthProfileFailureReason(assistantFailoverReason),
});
}
// Warn about potential side-effects when mutating tools executed
// before the turn was interrupted, so users don't blindly retry.
// Warn about potential side-effects when the interrupted turn may
// already have mutated state or sent outbound actions.
const hadMutatingTools = attempt.toolMetas.some((t) =>
isLikelyMutatingToolName(t.toolName),
);
const errorText = hadMutatingTools
const hadPotentialSideEffects =
hadMutatingTools ||
attempt.didSendViaMessagingTool ||
(attempt.successfulCronAdds ?? 0) > 0;
const errorText = hadPotentialSideEffects
? "⚠️ Agent couldn't generate a response. Note: some tool actions may have already been executed — please verify before retrying."
: "⚠️ Agent couldn't generate a response. Please try again.";