From 0b3d31c0ce8ea444e2fbcf7747522d5577e0a895 Mon Sep 17 00:00:00 2001
From: ryanngit <74137224+ryanngit@users.noreply.github.com>
Date: Tue, 31 Mar 2026 18:09:25 -0700
Subject: [PATCH] feat(auth): WHAM-aware Codex cooldown for multi-profile
 setups (#58625)

Instead of exponential backoff guesses on Codex 429, probe the WHAM
usage API to determine real availability and write accurate cooldowns.

- Burst/concurrency contention: 15s circuit-break
- Genuine rate limit: proportional to real reset time (capped 2-4h)
- Expired token: 12h cooldown
- Dead account: 24h cooldown
- Probe failure: 30s fail-open

This prevents cascade lockouts when multiple agents share a pool of
Codex profiles, and avoids wasted retries on genuinely exhausted
profiles.

Closes #26329, relates to #1815, #1522, #23996, #54060

Co-authored-by: ryanngit <ryanngit@users.noreply.github.com>
---
 src/agents/auth-profiles/usage.test.ts  | 197 ++++++++++++++++++++++
 src/agents/auth-profiles/usage.ts       | 210 +++++++++++++++++++++++-
 src/infra/provider-usage.fetch.codex.ts |   3 +
 3 files changed, 406 insertions(+), 4 deletions(-)
diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts
index 1dd20d03e81..6d88f45f969 100644
--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@@ -15,6 +15,7 @@ const storeMocks = vi.hoisted(() => ({
   saveAuthProfileStore: vi.fn(),
   updateAuthProfileStoreWithLock: vi.fn().mockResolvedValue(null),
 }));
+const fetchMock = vi.hoisted(() => vi.fn());
 
 vi.mock("./store.js", async (importOriginal) => {
   const original = await importOriginal<typeof import("./store.js")>();
@@ -27,6 +28,8 @@ vi.mock("./store.js", async (importOriginal) => {
 
 beforeEach(() => {
   vi.clearAllMocks();
+  fetchMock.mockReset();
+  vi.stubGlobal("fetch", fetchMock);
   storeMocks.updateAuthProfileStoreWithLock.mockResolvedValue(null);
   authProfileUsageTesting.setDepsForTest({
     saveAuthProfileStore: storeMocks.saveAuthProfileStore,
@@ -40,6 +43,14 @@ function makeStore(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore
     profiles: {
       "anthropic:default": { type: "api_key", provider: "anthropic", key: "sk-test" },
       "openai:default": { type: "api_key", provider: "openai", key: "sk-test-2" },
+      "openai-codex:default": {
+        type: "oauth",
+        provider: "openai-codex",
+        access: "codex-access-token",
+        refresh: "codex-refresh-token",
+        expires: 4_102_444_800_000,
+        accountId: "acct_test_123",
+      },
       "openrouter:default": { type: "api_key", provider: "openrouter", key: "sk-or-test" },
       "kilocode:default": { type: "api_key", provider: "kilocode", key: "sk-kc-test" },
     },
@@ -738,6 +749,192 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
   }
 });
 
+describe("markAuthProfileFailure — WHAM-aware Codex cooldowns", () => {
+  function mockWhamResponse(status: number, body?: unknown): void {
+    fetchMock.mockResolvedValueOnce(
+      new Response(body === undefined ? "{}" : JSON.stringify(body), {
+        status,
+        headers: { "content-type": "application/json" },
+      }),
+    );
+  }
+
+  async function markCodexFailureAt(params: {
+    store: ReturnType<typeof makeStore>;
+    now: number;
+    reason?: "rate_limit" | "unknown";
+    useLock?: boolean;
+  }): Promise<void> {
+    vi.useFakeTimers();
+    vi.setSystemTime(params.now);
+    if (params.useLock) {
+      storeMocks.updateAuthProfileStoreWithLock.mockImplementationOnce(
+        async (lockParams: { updater: (store: AuthProfileStore) => boolean }) => {
+          const freshStore = structuredClone(params.store);
+          const changed = lockParams.updater(freshStore);
+          return changed ? freshStore : null;
+        },
+      );
+    }
+    try {
+      await markAuthProfileFailure({
+        store: params.store,
+        profileId: "openai-codex:default",
+        reason: params.reason ?? "rate_limit",
+      });
+    } finally {
+      vi.useRealTimers();
+    }
+  }
+
+  it.each([
+    {
+      label: "burst contention",
+      response: {
+        rate_limit: {
+          limit_reached: false,
+          primary_window: { used_percent: 45, reset_after_seconds: 9_000 },
+        },
+      },
+      expectedMs: 15_000,
+    },
+    {
+      label: "personal rolling window",
+      response: {
+        rate_limit: {
+          limit_reached: true,
+          primary_window: { used_percent: 100, reset_after_seconds: 7_200 },
+        },
+      },
+      expectedMs: 3_600_000,
+    },
+    {
+      label: "team rolling window",
+      response: {
+        rate_limit: {
+          limit_reached: true,
+          primary_window: { used_percent: 100, reset_after_seconds: 7_200 },
+          secondary_window: { used_percent: 85, reset_after_seconds: 201_600 },
+        },
+      },
+      expectedMs: 3_600_000,
+    },
+    {
+      label: "team weekly window",
+      response: {
+        rate_limit: {
+          limit_reached: true,
+          primary_window: { used_percent: 90, reset_after_seconds: 7_200 },
+          secondary_window: { used_percent: 100, reset_after_seconds: 28_800 },
+        },
+      },
+      expectedMs: 14_400_000,
+    },
+  ])("maps $label to the expected cooldown", async ({ response, expectedMs }) => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({});
+    mockWhamResponse(200, response);
+
+    await markCodexFailureAt({ store, now });
+
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    expect(fetchMock).toHaveBeenCalledWith(
+      "https://chatgpt.com/backend-api/wham/usage",
+      expect.objectContaining({
+        method: "GET",
+        headers: expect.objectContaining({
+          Authorization: "Bearer codex-access-token",
+          "ChatGPT-Account-Id": "acct_test_123",
+        }),
+      }),
+    );
+    expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + expectedMs);
+  });
+
+  it("maps HTTP 401 to a 12h cooldown", async () => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({});
+    mockWhamResponse(401);
+
+    await markCodexFailureAt({ store, now });
+
+    expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 43_200_000);
+  });
+
+  it("maps HTTP 403 to a 24h cooldown", async () => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({});
+    mockWhamResponse(403);
+
+    await markCodexFailureAt({ store, now });
+
+    expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 86_400_000);
+  });
+
+  it("maps other HTTP errors to a 5m cooldown", async () => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({});
+    mockWhamResponse(500);
+
+    await markCodexFailureAt({ store, now });
+
+    expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 300_000);
+  });
+
+  it("preserves a longer existing cooldown via max semantics", async () => {
+    const now = 1_700_000_000_000;
+    const existingCooldownUntil = now + 6 * 60 * 60 * 1000;
+    const store = makeStore({
+      "openai-codex:default": {
+        cooldownUntil: existingCooldownUntil,
+        cooldownReason: "rate_limit",
+        errorCount: 2,
+        lastFailureAt: now - 1_000,
+      },
+    });
+    mockWhamResponse(200, {
+      rate_limit: {
+        limit_reached: false,
+        primary_window: { used_percent: 25, reset_after_seconds: 300 },
+      },
+    });
+
+    await markCodexFailureAt({ store, now, useLock: true });
+
+    expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(existingCooldownUntil);
+  });
+
+  it("falls back to a 30s cooldown when the WHAM probe fails", async () => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({});
+    fetchMock.mockRejectedValueOnce(new Error("network unavailable"));
+
+    await markCodexFailureAt({ store, now, reason: "unknown" });
+
+    expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 30_000);
+  });
+
+  it("leaves non-codex providers on the normal stepped backoff path", async () => {
+    const now = 1_700_000_000_000;
+    const store = makeStore({});
+
+    vi.useFakeTimers();
+    vi.setSystemTime(now);
+    try {
+      await markAuthProfileFailure({
+        store,
+        profileId: "anthropic:default",
+        reason: "rate_limit",
+      });
+    } finally {
+      vi.useRealTimers();
+    }
+
+    expect(fetchMock).not.toHaveBeenCalled();
+    expect(store.usageStats?.["anthropic:default"]?.cooldownUntil).toBe(now + 30_000);
+  });
+});
+
 describe("markAuthProfileFailure — per-model cooldown metadata", () => {
   function makeStoreWithCopilot(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore {
     const store = makeStore(usageStats);
diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index 1418b3bae90..e77cf91dc05 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -39,11 +39,192 @@ const FAILURE_REASON_ORDER = new Map<AuthProfileFailureReason, number>(
   FAILURE_REASON_PRIORITY.map((reason, index) => [reason, index]),
 );
 
+const WHAM_USAGE_URL = "https://chatgpt.com/backend-api/wham/usage";
+const WHAM_TIMEOUT_MS = 3_000;
+const WHAM_BURST_COOLDOWN_MS = 15_000;
+const WHAM_PROBE_FAILURE_COOLDOWN_MS = 30_000;
+const WHAM_HTTP_ERROR_COOLDOWN_MS = 5 * 60 * 1000;
+const WHAM_TOKEN_EXPIRED_COOLDOWN_MS = 12 * 60 * 60 * 1000;
+const WHAM_DEAD_ACCOUNT_COOLDOWN_MS = 24 * 60 * 60 * 1000;
+const WHAM_TEAM_ROLLING_MAX_COOLDOWN_MS = 2 * 60 * 60 * 1000;
+const WHAM_PERSONAL_MAX_COOLDOWN_MS = 4 * 60 * 60 * 1000;
+const WHAM_TEAM_WEEKLY_MAX_COOLDOWN_MS = 4 * 60 * 60 * 1000;
+
+type WhamUsageWindow = {
+  limit_window_seconds?: number;
+  used_percent?: number;
+  reset_at?: number;
+  reset_after_seconds?: number;
+};
+
+type WhamUsageResponse = {
+  rate_limit?: {
+    limit_reached?: boolean;
+    primary_window?: WhamUsageWindow;
+    secondary_window?: WhamUsageWindow;
+  };
+};
+
+type WhamCooldownProbeResult = {
+  cooldownMs: number;
+  reason: string;
+};
+
 function isAuthCooldownBypassedForProvider(provider: string | undefined): boolean {
   const normalized = normalizeProviderId(provider ?? "");
   return normalized === "openrouter" || normalized === "kilocode";
 }
 
+function shouldProbeWhamForFailure(
+  provider: string | undefined,
+  reason: AuthProfileFailureReason,
+): boolean {
+  return (
+    normalizeProviderId(provider ?? "") === "openai-codex" &&
+    (reason === "rate_limit" || reason === "unknown")
+  );
+}
+
+function resolveWhamResetMs(window: WhamUsageWindow | undefined, now: number): number | null {
+  if (!window) {
+    return null;
+  }
+  if (
+    typeof window.reset_after_seconds === "number" &&
+    Number.isFinite(window.reset_after_seconds) &&
+    window.reset_after_seconds > 0
+  ) {
+    return window.reset_after_seconds * 1000;
+  }
+  if (
+    typeof window.reset_at === "number" &&
+    Number.isFinite(window.reset_at) &&
+    window.reset_at > 0
+  ) {
+    return Math.max(0, window.reset_at * 1000 - now);
+  }
+  return null;
+}
+
+function isWhamWindowExhausted(window: WhamUsageWindow | undefined): boolean {
+  return !!(
+    window &&
+    typeof window.used_percent === "number" &&
+    Number.isFinite(window.used_percent) &&
+    window.used_percent >= 100
+  );
+}
+
+function applyWhamCooldownResult(params: {
+  existing: ProfileUsageStats;
+  computed: ProfileUsageStats;
+  now: number;
+  whamResult: WhamCooldownProbeResult;
+}): ProfileUsageStats {
+  const existingCooldownUntil = params.existing.cooldownUntil;
+  const existingActiveCooldownUntil =
+    typeof existingCooldownUntil === "number" &&
+    Number.isFinite(existingCooldownUntil) &&
+    existingCooldownUntil > params.now
+      ? existingCooldownUntil
+      : 0;
+  return {
+    ...params.computed,
+    cooldownUntil: Math.max(
+      existingActiveCooldownUntil,
+      params.now + params.whamResult.cooldownMs,
+    ),
+  };
+}
+
+export async function probeWhamForCooldown(
+  store: AuthProfileStore,
+  profileId: string,
+): Promise<WhamCooldownProbeResult | null> {
+  const profile = store.profiles[profileId];
+  if (profile?.type !== "oauth" || !profile.access) {
+    return null;
+  }
+
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), WHAM_TIMEOUT_MS);
+  try {
+    const headers: Record<string, string> = {
+      Authorization: `Bearer ${profile.access}`,
+      Accept: "application/json",
+      "User-Agent": "CodexBar",
+    };
+    if (profile.accountId) {
+      headers["ChatGPT-Account-Id"] = profile.accountId;
+    }
+
+    const res = await fetch(WHAM_USAGE_URL, {
+      method: "GET",
+      headers,
+      signal: controller.signal,
+    });
+
+    if (!res.ok) {
+      if (res.status === 401) {
+        return { cooldownMs: WHAM_TOKEN_EXPIRED_COOLDOWN_MS, reason: "wham_token_expired" };
+      }
+      if (res.status === 403) {
+        return { cooldownMs: WHAM_DEAD_ACCOUNT_COOLDOWN_MS, reason: "wham_account_dead" };
+      }
+      return { cooldownMs: WHAM_HTTP_ERROR_COOLDOWN_MS, reason: "wham_http_error" };
+    }
+
+    const data = (await res.json()) as WhamUsageResponse;
+    if (!data.rate_limit) {
+      return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
+    }
+
+    if (data.rate_limit.limit_reached === false) {
+      return { cooldownMs: WHAM_BURST_COOLDOWN_MS, reason: "wham_burst_contention" };
+    }
+
+    const now = Date.now();
+    const primaryResetMs = resolveWhamResetMs(data.rate_limit.primary_window, now);
+    const secondaryResetMs = resolveWhamResetMs(data.rate_limit.secondary_window, now);
+
+    if (!data.rate_limit.secondary_window) {
+      if (primaryResetMs === null) {
+        return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
+      }
+      return {
+        cooldownMs: Math.min(Math.floor(primaryResetMs / 2), WHAM_PERSONAL_MAX_COOLDOWN_MS),
+        reason: "wham_personal_rolling",
+      };
+    }
+
+    if (isWhamWindowExhausted(data.rate_limit.secondary_window)) {
+      if (secondaryResetMs === null) {
+        return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
+      }
+      return {
+        cooldownMs: Math.min(Math.floor(secondaryResetMs / 2), WHAM_TEAM_WEEKLY_MAX_COOLDOWN_MS),
+        reason: "wham_team_weekly",
+      };
+    }
+
+    if (isWhamWindowExhausted(data.rate_limit.primary_window)) {
+      if (primaryResetMs === null) {
+        return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
+      }
+      return {
+        cooldownMs: Math.min(Math.floor(primaryResetMs / 2), WHAM_TEAM_ROLLING_MAX_COOLDOWN_MS),
+        reason: "wham_team_rolling",
+      };
+    }
+
+    return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
+  } catch {
+    return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+
 export function resolveProfileUnusableUntil(
   stats: Pick<ProfileUsageStats, "cooldownUntil" | "disabledUntil">,
 ): number | null {
@@ -567,6 +748,11 @@ export async function markAuthProfileFailure(params: {
   if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
     return;
   }
+
+  const whamResult = shouldProbeWhamForFailure(profile.provider, reason)
+    ? await probeWhamForCooldown(store, profileId)
+    : null;
+
   let nextStats: ProfileUsageStats | undefined;
   let previousStats: ProfileUsageStats | undefined;
   let updateTime = 0;
@@ -593,8 +779,16 @@ export async function markAuthProfileFailure(params: {
         cfgResolved,
         modelId,
       });
-      nextStats = computed;
-      updateUsageStatsEntry(freshStore, profileId, () => computed);
+      nextStats =
+        whamResult && shouldProbeWhamForFailure(profile.provider, reason)
+          ? applyWhamCooldownResult({
+              existing: previousStats ?? {},
+              computed,
+              now,
+              whamResult,
+            })
+          : computed;
+      updateUsageStatsEntry(freshStore, profileId, () => nextStats ?? computed);
       return true;
     },
   });
@@ -632,8 +826,16 @@ export async function markAuthProfileFailure(params: {
     cfgResolved,
     modelId,
   });
-  nextStats = computed;
-  updateUsageStatsEntry(store, profileId, () => computed);
+  nextStats =
+    whamResult && shouldProbeWhamForFailure(store.profiles[profileId]?.provider, reason)
+      ? applyWhamCooldownResult({
+          existing: previousStats ?? {},
+          computed,
+          now,
+          whamResult,
+        })
+      : computed;
+  updateUsageStatsEntry(store, profileId, () => nextStats ?? computed);
   authProfileUsageDeps.saveAuthProfileStore(store, agentDir);
   logAuthProfileFailureStateChange({
     runId,
diff --git a/src/infra/provider-usage.fetch.codex.ts b/src/infra/provider-usage.fetch.codex.ts
index 0f37417dd18..b7ad369a671 100644
--- a/src/infra/provider-usage.fetch.codex.ts
+++ b/src/infra/provider-usage.fetch.codex.ts
@@ -4,15 +4,18 @@ import type { ProviderUsageSnapshot, UsageWindow } from "./provider-usage.types.
 
 type CodexUsageResponse = {
   rate_limit?: {
+    limit_reached?: boolean;
     primary_window?: {
       limit_window_seconds?: number;
       used_percent?: number;
       reset_at?: number;
+      reset_after_seconds?: number;
     };
     secondary_window?: {
       limit_window_seconds?: number;
       used_percent?: number;
       reset_at?: number;
+      reset_after_seconds?: number;
     };
   };
   plan_type?: string;