From 0b3d31c0ce8ea444e2fbcf7747522d5577e0a895 Mon Sep 17 00:00:00 2001 From: ryanngit <74137224+ryanngit@users.noreply.github.com> Date: Tue, 31 Mar 2026 18:09:25 -0700 Subject: [PATCH] feat(auth): WHAM-aware Codex cooldown for multi-profile setups (#58625) Instead of exponential backoff guesses on Codex 429, probe the WHAM usage API to determine real availability and write accurate cooldowns. - Burst/concurrency contention: 15s circuit-break - Genuine rate limit: proportional to real reset time (capped 2-4h) - Expired token: 12h cooldown - Dead account: 24h cooldown - Probe failure: 30s fail-open This prevents cascade lockouts when multiple agents share a pool of Codex profiles, and avoids wasted retries on genuinely exhausted profiles. Closes #26329, relates to #1815, #1522, #23996, #54060 Co-authored-by: ryanngit --- src/agents/auth-profiles/usage.test.ts | 197 ++++++++++++++++++++++ src/agents/auth-profiles/usage.ts | 210 +++++++++++++++++++++++- src/infra/provider-usage.fetch.codex.ts | 3 + 3 files changed, 406 insertions(+), 4 deletions(-) diff --git a/src/agents/auth-profiles/usage.test.ts b/src/agents/auth-profiles/usage.test.ts index 1dd20d03e81..6d88f45f969 100644 --- a/src/agents/auth-profiles/usage.test.ts +++ b/src/agents/auth-profiles/usage.test.ts @@ -15,6 +15,7 @@ const storeMocks = vi.hoisted(() => ({ saveAuthProfileStore: vi.fn(), updateAuthProfileStoreWithLock: vi.fn().mockResolvedValue(null), })); +const fetchMock = vi.hoisted(() => vi.fn()); vi.mock("./store.js", async (importOriginal) => { const original = await importOriginal(); @@ -27,6 +28,8 @@ vi.mock("./store.js", async (importOriginal) => { beforeEach(() => { vi.clearAllMocks(); + fetchMock.mockReset(); + vi.stubGlobal("fetch", fetchMock); storeMocks.updateAuthProfileStoreWithLock.mockResolvedValue(null); authProfileUsageTesting.setDepsForTest({ saveAuthProfileStore: storeMocks.saveAuthProfileStore, @@ -40,6 +43,14 @@ function makeStore(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore profiles: { "anthropic:default": { type: "api_key", provider: "anthropic", key: "sk-test" }, "openai:default": { type: "api_key", provider: "openai", key: "sk-test-2" }, + "openai-codex:default": { + type: "oauth", + provider: "openai-codex", + access: "codex-access-token", + refresh: "codex-refresh-token", + expires: 4_102_444_800_000, + accountId: "acct_test_123", + }, "openrouter:default": { type: "api_key", provider: "openrouter", key: "sk-or-test" }, "kilocode:default": { type: "api_key", provider: "kilocode", key: "sk-kc-test" }, }, @@ -738,6 +749,192 @@ describe("markAuthProfileFailure — active windows do not extend on retry", () } }); +describe("markAuthProfileFailure — WHAM-aware Codex cooldowns", () => { + function mockWhamResponse(status: number, body?: unknown): void { + fetchMock.mockResolvedValueOnce( + new Response(body === undefined ? "{}" : JSON.stringify(body), { + status, + headers: { "content-type": "application/json" }, + }), + ); + } + + async function markCodexFailureAt(params: { + store: ReturnType; + now: number; + reason?: "rate_limit" | "unknown"; + useLock?: boolean; + }): Promise { + vi.useFakeTimers(); + vi.setSystemTime(params.now); + if (params.useLock) { + storeMocks.updateAuthProfileStoreWithLock.mockImplementationOnce( + async (lockParams: { updater: (store: AuthProfileStore) => boolean }) => { + const freshStore = structuredClone(params.store); + const changed = lockParams.updater(freshStore); + return changed ? freshStore : null; + }, + ); + } + try { + await markAuthProfileFailure({ + store: params.store, + profileId: "openai-codex:default", + reason: params.reason ?? "rate_limit", + }); + } finally { + vi.useRealTimers(); + } + } + + it.each([ + { + label: "burst contention", + response: { + rate_limit: { + limit_reached: false, + primary_window: { used_percent: 45, reset_after_seconds: 9_000 }, + }, + }, + expectedMs: 15_000, + }, + { + label: "personal rolling window", + response: { + rate_limit: { + limit_reached: true, + primary_window: { used_percent: 100, reset_after_seconds: 7_200 }, + }, + }, + expectedMs: 3_600_000, + }, + { + label: "team rolling window", + response: { + rate_limit: { + limit_reached: true, + primary_window: { used_percent: 100, reset_after_seconds: 7_200 }, + secondary_window: { used_percent: 85, reset_after_seconds: 201_600 }, + }, + }, + expectedMs: 3_600_000, + }, + { + label: "team weekly window", + response: { + rate_limit: { + limit_reached: true, + primary_window: { used_percent: 90, reset_after_seconds: 7_200 }, + secondary_window: { used_percent: 100, reset_after_seconds: 28_800 }, + }, + }, + expectedMs: 14_400_000, + }, + ])("maps $label to the expected cooldown", async ({ response, expectedMs }) => { + const now = 1_700_000_000_000; + const store = makeStore({}); + mockWhamResponse(200, response); + + await markCodexFailureAt({ store, now }); + + expect(fetchMock).toHaveBeenCalledTimes(1); + expect(fetchMock).toHaveBeenCalledWith( + "https://chatgpt.com/backend-api/wham/usage", + expect.objectContaining({ + method: "GET", + headers: expect.objectContaining({ + Authorization: "Bearer codex-access-token", + "ChatGPT-Account-Id": "acct_test_123", + }), + }), + ); + expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + expectedMs); + }); + + it("maps HTTP 401 to a 12h cooldown", async () => { + const now = 1_700_000_000_000; + const store = makeStore({}); + mockWhamResponse(401); + + await markCodexFailureAt({ store, now }); + + expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 43_200_000); + }); + + it("maps HTTP 403 to a 24h cooldown", async () => { + const now = 1_700_000_000_000; + const store = makeStore({}); + mockWhamResponse(403); + + await markCodexFailureAt({ store, now }); + + expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 86_400_000); + }); + + it("maps other HTTP errors to a 5m cooldown", async () => { + const now = 1_700_000_000_000; + const store = makeStore({}); + mockWhamResponse(500); + + await markCodexFailureAt({ store, now }); + + expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 300_000); + }); + + it("preserves a longer existing cooldown via max semantics", async () => { + const now = 1_700_000_000_000; + const existingCooldownUntil = now + 6 * 60 * 60 * 1000; + const store = makeStore({ + "openai-codex:default": { + cooldownUntil: existingCooldownUntil, + cooldownReason: "rate_limit", + errorCount: 2, + lastFailureAt: now - 1_000, + }, + }); + mockWhamResponse(200, { + rate_limit: { + limit_reached: false, + primary_window: { used_percent: 25, reset_after_seconds: 300 }, + }, + }); + + await markCodexFailureAt({ store, now, useLock: true }); + + expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(existingCooldownUntil); + }); + + it("falls back to a 30s cooldown when the WHAM probe fails", async () => { + const now = 1_700_000_000_000; + const store = makeStore({}); + fetchMock.mockRejectedValueOnce(new Error("network unavailable")); + + await markCodexFailureAt({ store, now, reason: "unknown" }); + + expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 30_000); + }); + + it("leaves non-codex providers on the normal stepped backoff path", async () => { + const now = 1_700_000_000_000; + const store = makeStore({}); + + vi.useFakeTimers(); + vi.setSystemTime(now); + try { + await markAuthProfileFailure({ + store, + profileId: "anthropic:default", + reason: "rate_limit", + }); + } finally { + vi.useRealTimers(); + } + + expect(fetchMock).not.toHaveBeenCalled(); + expect(store.usageStats?.["anthropic:default"]?.cooldownUntil).toBe(now + 30_000); + }); +}); + describe("markAuthProfileFailure — per-model cooldown metadata", () => { function makeStoreWithCopilot(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore { const store = makeStore(usageStats); diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 1418b3bae90..e77cf91dc05 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -39,11 +39,192 @@ const FAILURE_REASON_ORDER = new Map( FAILURE_REASON_PRIORITY.map((reason, index) => [reason, index]), ); +const WHAM_USAGE_URL = "https://chatgpt.com/backend-api/wham/usage"; +const WHAM_TIMEOUT_MS = 3_000; +const WHAM_BURST_COOLDOWN_MS = 15_000; +const WHAM_PROBE_FAILURE_COOLDOWN_MS = 30_000; +const WHAM_HTTP_ERROR_COOLDOWN_MS = 5 * 60 * 1000; +const WHAM_TOKEN_EXPIRED_COOLDOWN_MS = 12 * 60 * 60 * 1000; +const WHAM_DEAD_ACCOUNT_COOLDOWN_MS = 24 * 60 * 60 * 1000; +const WHAM_TEAM_ROLLING_MAX_COOLDOWN_MS = 2 * 60 * 60 * 1000; +const WHAM_PERSONAL_MAX_COOLDOWN_MS = 4 * 60 * 60 * 1000; +const WHAM_TEAM_WEEKLY_MAX_COOLDOWN_MS = 4 * 60 * 60 * 1000; + +type WhamUsageWindow = { + limit_window_seconds?: number; + used_percent?: number; + reset_at?: number; + reset_after_seconds?: number; +}; + +type WhamUsageResponse = { + rate_limit?: { + limit_reached?: boolean; + primary_window?: WhamUsageWindow; + secondary_window?: WhamUsageWindow; + }; +}; + +type WhamCooldownProbeResult = { + cooldownMs: number; + reason: string; +}; + function isAuthCooldownBypassedForProvider(provider: string | undefined): boolean { const normalized = normalizeProviderId(provider ?? ""); return normalized === "openrouter" || normalized === "kilocode"; } +function shouldProbeWhamForFailure( + provider: string | undefined, + reason: AuthProfileFailureReason, +): boolean { + return ( + normalizeProviderId(provider ?? "") === "openai-codex" && + (reason === "rate_limit" || reason === "unknown") + ); +} + +function resolveWhamResetMs(window: WhamUsageWindow | undefined, now: number): number | null { + if (!window) { + return null; + } + if ( + typeof window.reset_after_seconds === "number" && + Number.isFinite(window.reset_after_seconds) && + window.reset_after_seconds > 0 + ) { + return window.reset_after_seconds * 1000; + } + if ( + typeof window.reset_at === "number" && + Number.isFinite(window.reset_at) && + window.reset_at > 0 + ) { + return Math.max(0, window.reset_at * 1000 - now); + } + return null; +} + +function isWhamWindowExhausted(window: WhamUsageWindow | undefined): boolean { + return !!( + window && + typeof window.used_percent === "number" && + Number.isFinite(window.used_percent) && + window.used_percent >= 100 + ); +} + +function applyWhamCooldownResult(params: { + existing: ProfileUsageStats; + computed: ProfileUsageStats; + now: number; + whamResult: WhamCooldownProbeResult; +}): ProfileUsageStats { + const existingCooldownUntil = params.existing.cooldownUntil; + const existingActiveCooldownUntil = + typeof existingCooldownUntil === "number" && + Number.isFinite(existingCooldownUntil) && + existingCooldownUntil > params.now + ? existingCooldownUntil + : 0; + return { + ...params.computed, + cooldownUntil: Math.max( + existingActiveCooldownUntil, + params.now + params.whamResult.cooldownMs, + ), + }; +} + +export async function probeWhamForCooldown( + store: AuthProfileStore, + profileId: string, +): Promise { + const profile = store.profiles[profileId]; + if (profile?.type !== "oauth" || !profile.access) { + return null; + } + + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), WHAM_TIMEOUT_MS); + try { + const headers: Record = { + Authorization: `Bearer ${profile.access}`, + Accept: "application/json", + "User-Agent": "CodexBar", + }; + if (profile.accountId) { + headers["ChatGPT-Account-Id"] = profile.accountId; + } + + const res = await fetch(WHAM_USAGE_URL, { + method: "GET", + headers, + signal: controller.signal, + }); + + if (!res.ok) { + if (res.status === 401) { + return { cooldownMs: WHAM_TOKEN_EXPIRED_COOLDOWN_MS, reason: "wham_token_expired" }; + } + if (res.status === 403) { + return { cooldownMs: WHAM_DEAD_ACCOUNT_COOLDOWN_MS, reason: "wham_account_dead" }; + } + return { cooldownMs: WHAM_HTTP_ERROR_COOLDOWN_MS, reason: "wham_http_error" }; + } + + const data = (await res.json()) as WhamUsageResponse; + if (!data.rate_limit) { + return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" }; + } + + if (data.rate_limit.limit_reached === false) { + return { cooldownMs: WHAM_BURST_COOLDOWN_MS, reason: "wham_burst_contention" }; + } + + const now = Date.now(); + const primaryResetMs = resolveWhamResetMs(data.rate_limit.primary_window, now); + const secondaryResetMs = resolveWhamResetMs(data.rate_limit.secondary_window, now); + + if (!data.rate_limit.secondary_window) { + if (primaryResetMs === null) { + return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" }; + } + return { + cooldownMs: Math.min(Math.floor(primaryResetMs / 2), WHAM_PERSONAL_MAX_COOLDOWN_MS), + reason: "wham_personal_rolling", + }; + } + + if (isWhamWindowExhausted(data.rate_limit.secondary_window)) { + if (secondaryResetMs === null) { + return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" }; + } + return { + cooldownMs: Math.min(Math.floor(secondaryResetMs / 2), WHAM_TEAM_WEEKLY_MAX_COOLDOWN_MS), + reason: "wham_team_weekly", + }; + } + + if (isWhamWindowExhausted(data.rate_limit.primary_window)) { + if (primaryResetMs === null) { + return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" }; + } + return { + cooldownMs: Math.min(Math.floor(primaryResetMs / 2), WHAM_TEAM_ROLLING_MAX_COOLDOWN_MS), + reason: "wham_team_rolling", + }; + } + + return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" }; + } catch { + return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" }; + } finally { + clearTimeout(timeout); + } +} + export function resolveProfileUnusableUntil( stats: Pick, ): number | null { @@ -567,6 +748,11 @@ export async function markAuthProfileFailure(params: { if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) { return; } + + const whamResult = shouldProbeWhamForFailure(profile.provider, reason) + ? await probeWhamForCooldown(store, profileId) + : null; + let nextStats: ProfileUsageStats | undefined; let previousStats: ProfileUsageStats | undefined; let updateTime = 0; @@ -593,8 +779,16 @@ export async function markAuthProfileFailure(params: { cfgResolved, modelId, }); - nextStats = computed; - updateUsageStatsEntry(freshStore, profileId, () => computed); + nextStats = + whamResult && shouldProbeWhamForFailure(profile.provider, reason) + ? applyWhamCooldownResult({ + existing: previousStats ?? {}, + computed, + now, + whamResult, + }) + : computed; + updateUsageStatsEntry(freshStore, profileId, () => nextStats ?? computed); return true; }, }); @@ -632,8 +826,16 @@ export async function markAuthProfileFailure(params: { cfgResolved, modelId, }); - nextStats = computed; - updateUsageStatsEntry(store, profileId, () => computed); + nextStats = + whamResult && shouldProbeWhamForFailure(store.profiles[profileId]?.provider, reason) + ? applyWhamCooldownResult({ + existing: previousStats ?? {}, + computed, + now, + whamResult, + }) + : computed; + updateUsageStatsEntry(store, profileId, () => nextStats ?? computed); authProfileUsageDeps.saveAuthProfileStore(store, agentDir); logAuthProfileFailureStateChange({ runId, diff --git a/src/infra/provider-usage.fetch.codex.ts b/src/infra/provider-usage.fetch.codex.ts index 0f37417dd18..b7ad369a671 100644 --- a/src/infra/provider-usage.fetch.codex.ts +++ b/src/infra/provider-usage.fetch.codex.ts @@ -4,15 +4,18 @@ import type { ProviderUsageSnapshot, UsageWindow } from "./provider-usage.types. type CodexUsageResponse = { rate_limit?: { + limit_reached?: boolean; primary_window?: { limit_window_seconds?: number; used_percent?: number; reset_at?: number; + reset_after_seconds?: number; }; secondary_window?: { limit_window_seconds?: number; used_percent?: number; reset_at?: number; + reset_after_seconds?: number; }; }; plan_type?: string;