fix(agents): harden rate-limit fallback handoff

Co-authored-by: TechFath3r <thetechfath3r@gmail.com>
2026-04-03 18:26:39 +09:00 · 2026-04-03 18:26:39 +09:00 · 87b7bb1d14
parent f5c3b409ea
commit 87b7bb1d14
4 changed files with 146 additions and 6 deletions
--- a/src/agents/model-fallback.run-embedded.e2e.test.ts
+++ b/src/agents/model-fallback.run-embedded.e2e.test.ts
@ -310,6 +310,35 @@ function mockPrimaryErrorThenFallbackSuccess(errorMessage: string) {
  });
 }

+function mockPrimaryRunLoopRateLimitThenFallbackSuccess(errorMessage: string) {
+  runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
+    const attemptParams = params as { provider: string };
+    if (attemptParams.provider === "openai") {
+      return makeEmbeddedRunnerAttempt({
+        assistantTexts: [],
+        lastAssistant: buildEmbeddedRunnerAssistant({
+          provider: "openai",
+          model: "mock-1",
+          stopReason: "length",
+          errorMessage,
+        }),
+      });
+    }
+    if (attemptParams.provider === "groq") {
+      return makeEmbeddedRunnerAttempt({
+        assistantTexts: ["fallback ok"],
+        lastAssistant: buildEmbeddedRunnerAssistant({
+          provider: "groq",
+          model: "mock-2",
+          stopReason: "stop",
+          content: [{ type: "text", text: "fallback ok" }],
+        }),
+      });
+    }
+    throw new Error(`Unexpected provider ${attemptParams.provider}`);
+  });
+}
+
 function expectOpenAiThenGroqAttemptOrder(params?: { expectOpenAiAuthProfileId?: string }) {
  expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
  const firstCall = runEmbeddedAttemptMock.mock.calls[0]?.[0] as
@ -697,6 +726,39 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
    });
  });

+  it("falls back on classified rate limits even when stopReason is not error", async () => {
+    await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
+      await writeMultiProfileAuthStore(agentDir);
+
+      mockPrimaryRunLoopRateLimitThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
+
+      const result = await runEmbeddedFallback({
+        agentDir,
+        workspaceDir,
+        sessionKey: "agent:test:rate-limit-retry-limit-fallback",
+        runId: "run:rate-limit-retry-limit-fallback",
+        config: {
+          ...makeConfig(),
+          auth: { cooldowns: { rateLimitedProfileRotations: 999 } },
+        },
+      });
+
+      expect(result.provider).toBe("groq");
+      expect(result.model).toBe("mock-2");
+      expect(result.attempts[0]?.reason).toBe("rate_limit");
+      expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok");
+
+      const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
+        (call) => (call[0] as { provider?: string })?.provider === "openai",
+      );
+      const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
+        (call) => (call[0] as { provider?: string })?.provider === "groq",
+      );
+      expect(openaiAttempts.length).toBe(3);
+      expect(groqAttempts.length).toBe(1);
+    });
+  });
+
  it("respects rateLimitedProfileRotations=0 and falls back immediately", async () => {
    await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
      await writeMultiProfileAuthStore(agentDir);
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@ -0,0 +1,51 @@
+import { beforeAll, beforeEach, describe, expect, it } from "vitest";
+import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
+import {
+  loadRunOverflowCompactionHarness,
+  mockedClassifyFailoverReason,
+  mockedGlobalHookRunner,
+  mockedRunEmbeddedAttempt,
+  overflowBaseRunParams,
+  resetRunOverflowCompactionHarnessMocks,
+} from "./run.overflow-compaction.harness.js";
+import type { EmbeddedRunAttemptResult } from "./run/types.js";
+
+let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
+
+describe("runEmbeddedPiAgent incomplete-turn safety", () => {
+  beforeAll(async () => {
+    ({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness());
+  });
+
+  beforeEach(() => {
+    resetRunOverflowCompactionHarnessMocks();
+    mockedGlobalHookRunner.hasHooks.mockImplementation(() => false);
+  });
+
+  it("warns before retrying when an incomplete turn already sent a message", async () => {
+    mockedClassifyFailoverReason.mockReturnValue(null);
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        assistantTexts: [],
+        toolMetas: [],
+        didSendViaMessagingTool: true,
+        lastAssistant: {
+          stopReason: "toolUse",
+          errorMessage: "internal retry interrupted tool execution",
+          provider: "openai",
+          model: "mock-1",
+          content: [],
+        } as unknown as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+
+    const result = await runEmbeddedPiAgent({
+      ...overflowBaseRunParams,
+      runId: "run-incomplete-turn-messaging-warning",
+    });
+
+    expect(mockedClassifyFailoverReason).toHaveBeenCalledTimes(1);
+    expect(result.payloads?.[0]?.isError).toBe(true);
+    expect(result.payloads?.[0]?.text).toContain("verify before retrying");
+  });
+});
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts
@ -334,6 +334,9 @@ export async function loadRunOverflowCompactionHarness(): Promise<{

  vi.doMock("../../plugins/provider-runtime.js", () => ({
    prepareProviderRuntimeAuth: mockedPrepareProviderRuntimeAuth,
+    resolveProviderCapabilitiesWithPlugin: vi.fn(() => ({})),
+    prepareProviderExtraParams: vi.fn(async () => ({})),
+    wrapProviderStreamFn: vi.fn((_cfg: unknown, _model: unknown, fn: unknown) => fn),
  }));

  vi.doMock("../auth-profiles.js", () => ({
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@ -306,6 +306,7 @@ export async function runEmbeddedPiAgent(
      let autoCompactionCount = 0;
      let runLoopIterations = 0;
      let overloadProfileRotations = 0;
+      let lastRetryFailoverReason: FailoverReason | null = null;
      let rateLimitProfileRotations = 0;
      let timeoutCompactionAttempts = 0;
      const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
@ -448,6 +449,22 @@ export async function runEmbeddedPiAgent(
                `provider=${provider}/${modelId} attempts=${runLoopIterations} ` +
                `maxAttempts=${MAX_RUN_LOOP_ITERATIONS}`,
            );
+            if (
+              fallbackConfigured &&
+              lastRetryFailoverReason &&
+              lastRetryFailoverReason !== "timeout" &&
+              lastRetryFailoverReason !== "model_not_found" &&
+              lastRetryFailoverReason !== "format" &&
+              lastRetryFailoverReason !== "session_expired"
+            ) {
+              throw new FailoverError(message, {
+                reason: lastRetryFailoverReason,
+                provider,
+                model: modelId,
+                profileId: lastProfileId,
+                status: resolveFailoverStatus(lastRetryFailoverReason),
+              });
+            }
            return {
              payloads: [
                {
@ -1071,6 +1088,7 @@ export async function runEmbeddedPiAgent(
              promptFailoverReason !== "timeout" &&
              (await advanceAuthProfile())
            ) {
+              lastRetryFailoverReason = promptFailoverReason ?? lastRetryFailoverReason;
              logPromptFailoverDecision("rotate_profile");
              await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
              continue;
@ -1179,7 +1197,8 @@ export async function runEmbeddedPiAgent(
          // Rotate on timeout to try another account/model path in this turn,
          // but exclude post-prompt compaction timeouts (model succeeded; no profile issue).
          const shouldRotate =
-            (!aborted && failoverFailure) || (timedOut && !timedOutDuringCompaction);
+            (!aborted && (failoverFailure || assistantFailoverReason !== null)) ||
+            (timedOut && !timedOutDuringCompaction);

          if (shouldRotate) {
            if (lastProfileId) {
@ -1244,6 +1263,8 @@ export async function runEmbeddedPiAgent(

            const rotated = await advanceAuthProfile();
            if (rotated) {
+              lastRetryFailoverReason =
+                assistantFailoverReason ?? (timedOut ? "timeout" : null) ?? lastRetryFailoverReason;
              logAssistantFailoverDecision("rotate_profile");
              await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
              continue;
@ -1389,19 +1410,22 @@ export async function runEmbeddedPiAgent(
              // Mark the failing profile for cooldown so multi-profile setups
              // rotate away from the exhausted credential on the next turn.
              if (lastProfileId) {
-                const failoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
                await maybeMarkAuthProfileFailure({
                  profileId: lastProfileId,
-                  reason: resolveAuthProfileFailureReason(failoverReason),
+                  reason: resolveAuthProfileFailureReason(assistantFailoverReason),
                });
              }

-              // Warn about potential side-effects when mutating tools executed
-              // before the turn was interrupted, so users don't blindly retry.
+              // Warn about potential side-effects when the interrupted turn may
+              // already have mutated state or sent outbound actions.
              const hadMutatingTools = attempt.toolMetas.some((t) =>
                isLikelyMutatingToolName(t.toolName),
              );
-              const errorText = hadMutatingTools
+              const hadPotentialSideEffects =
+                hadMutatingTools ||
+                attempt.didSendViaMessagingTool ||
+                (attempt.successfulCronAdds ?? 0) > 0;
+              const errorText = hadPotentialSideEffects
                ? "⚠️ Agent couldn't generate a response. Note: some tool actions may have already been executed — please verify before retrying."
                : "⚠️ Agent couldn't generate a response. Please try again.";