feat(auth): WHAM-aware Codex cooldown for multi-profile setups (#58625)

Instead of exponential backoff guesses on Codex 429, probe the WHAM
usage API to determine real availability and write accurate cooldowns.

- Burst/concurrency contention: 15s circuit-break
- Genuine rate limit: proportional to real reset time (capped 2-4h)
- Expired token: 12h cooldown
- Dead account: 24h cooldown
- Probe failure: 30s fail-open

This prevents cascade lockouts when multiple agents share a pool of
Codex profiles, and avoids wasted retries on genuinely exhausted
profiles.

Closes #26329, relates to #1815, #1522, #23996, #54060

Co-authored-by: ryanngit <ryanngit@users.noreply.github.com>
This commit is contained in:
ryanngit 2026-03-31 18:09:25 -07:00 committed by GitHub
parent 915e15c13d
commit 0b3d31c0ce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 406 additions and 4 deletions

View File

@ -15,6 +15,7 @@ const storeMocks = vi.hoisted(() => ({
saveAuthProfileStore: vi.fn(),
updateAuthProfileStoreWithLock: vi.fn().mockResolvedValue(null),
}));
const fetchMock = vi.hoisted(() => vi.fn());
vi.mock("./store.js", async (importOriginal) => {
const original = await importOriginal<typeof import("./store.js")>();
@ -27,6 +28,8 @@ vi.mock("./store.js", async (importOriginal) => {
beforeEach(() => {
vi.clearAllMocks();
fetchMock.mockReset();
vi.stubGlobal("fetch", fetchMock);
storeMocks.updateAuthProfileStoreWithLock.mockResolvedValue(null);
authProfileUsageTesting.setDepsForTest({
saveAuthProfileStore: storeMocks.saveAuthProfileStore,
@ -40,6 +43,14 @@ function makeStore(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore
profiles: {
"anthropic:default": { type: "api_key", provider: "anthropic", key: "sk-test" },
"openai:default": { type: "api_key", provider: "openai", key: "sk-test-2" },
"openai-codex:default": {
type: "oauth",
provider: "openai-codex",
access: "codex-access-token",
refresh: "codex-refresh-token",
expires: 4_102_444_800_000,
accountId: "acct_test_123",
},
"openrouter:default": { type: "api_key", provider: "openrouter", key: "sk-or-test" },
"kilocode:default": { type: "api_key", provider: "kilocode", key: "sk-kc-test" },
},
@ -738,6 +749,192 @@ describe("markAuthProfileFailure — active windows do not extend on retry", ()
}
});
describe("markAuthProfileFailure — WHAM-aware Codex cooldowns", () => {
function mockWhamResponse(status: number, body?: unknown): void {
fetchMock.mockResolvedValueOnce(
new Response(body === undefined ? "{}" : JSON.stringify(body), {
status,
headers: { "content-type": "application/json" },
}),
);
}
async function markCodexFailureAt(params: {
store: ReturnType<typeof makeStore>;
now: number;
reason?: "rate_limit" | "unknown";
useLock?: boolean;
}): Promise<void> {
vi.useFakeTimers();
vi.setSystemTime(params.now);
if (params.useLock) {
storeMocks.updateAuthProfileStoreWithLock.mockImplementationOnce(
async (lockParams: { updater: (store: AuthProfileStore) => boolean }) => {
const freshStore = structuredClone(params.store);
const changed = lockParams.updater(freshStore);
return changed ? freshStore : null;
},
);
}
try {
await markAuthProfileFailure({
store: params.store,
profileId: "openai-codex:default",
reason: params.reason ?? "rate_limit",
});
} finally {
vi.useRealTimers();
}
}
it.each([
{
label: "burst contention",
response: {
rate_limit: {
limit_reached: false,
primary_window: { used_percent: 45, reset_after_seconds: 9_000 },
},
},
expectedMs: 15_000,
},
{
label: "personal rolling window",
response: {
rate_limit: {
limit_reached: true,
primary_window: { used_percent: 100, reset_after_seconds: 7_200 },
},
},
expectedMs: 3_600_000,
},
{
label: "team rolling window",
response: {
rate_limit: {
limit_reached: true,
primary_window: { used_percent: 100, reset_after_seconds: 7_200 },
secondary_window: { used_percent: 85, reset_after_seconds: 201_600 },
},
},
expectedMs: 3_600_000,
},
{
label: "team weekly window",
response: {
rate_limit: {
limit_reached: true,
primary_window: { used_percent: 90, reset_after_seconds: 7_200 },
secondary_window: { used_percent: 100, reset_after_seconds: 28_800 },
},
},
expectedMs: 14_400_000,
},
])("maps $label to the expected cooldown", async ({ response, expectedMs }) => {
const now = 1_700_000_000_000;
const store = makeStore({});
mockWhamResponse(200, response);
await markCodexFailureAt({ store, now });
expect(fetchMock).toHaveBeenCalledTimes(1);
expect(fetchMock).toHaveBeenCalledWith(
"https://chatgpt.com/backend-api/wham/usage",
expect.objectContaining({
method: "GET",
headers: expect.objectContaining({
Authorization: "Bearer codex-access-token",
"ChatGPT-Account-Id": "acct_test_123",
}),
}),
);
expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + expectedMs);
});
it("maps HTTP 401 to a 12h cooldown", async () => {
const now = 1_700_000_000_000;
const store = makeStore({});
mockWhamResponse(401);
await markCodexFailureAt({ store, now });
expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 43_200_000);
});
it("maps HTTP 403 to a 24h cooldown", async () => {
const now = 1_700_000_000_000;
const store = makeStore({});
mockWhamResponse(403);
await markCodexFailureAt({ store, now });
expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 86_400_000);
});
it("maps other HTTP errors to a 5m cooldown", async () => {
const now = 1_700_000_000_000;
const store = makeStore({});
mockWhamResponse(500);
await markCodexFailureAt({ store, now });
expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 300_000);
});
it("preserves a longer existing cooldown via max semantics", async () => {
const now = 1_700_000_000_000;
const existingCooldownUntil = now + 6 * 60 * 60 * 1000;
const store = makeStore({
"openai-codex:default": {
cooldownUntil: existingCooldownUntil,
cooldownReason: "rate_limit",
errorCount: 2,
lastFailureAt: now - 1_000,
},
});
mockWhamResponse(200, {
rate_limit: {
limit_reached: false,
primary_window: { used_percent: 25, reset_after_seconds: 300 },
},
});
await markCodexFailureAt({ store, now, useLock: true });
expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(existingCooldownUntil);
});
it("falls back to a 30s cooldown when the WHAM probe fails", async () => {
const now = 1_700_000_000_000;
const store = makeStore({});
fetchMock.mockRejectedValueOnce(new Error("network unavailable"));
await markCodexFailureAt({ store, now, reason: "unknown" });
expect(store.usageStats?.["openai-codex:default"]?.cooldownUntil).toBe(now + 30_000);
});
it("leaves non-codex providers on the normal stepped backoff path", async () => {
const now = 1_700_000_000_000;
const store = makeStore({});
vi.useFakeTimers();
vi.setSystemTime(now);
try {
await markAuthProfileFailure({
store,
profileId: "anthropic:default",
reason: "rate_limit",
});
} finally {
vi.useRealTimers();
}
expect(fetchMock).not.toHaveBeenCalled();
expect(store.usageStats?.["anthropic:default"]?.cooldownUntil).toBe(now + 30_000);
});
});
describe("markAuthProfileFailure — per-model cooldown metadata", () => {
function makeStoreWithCopilot(usageStats: AuthProfileStore["usageStats"]): AuthProfileStore {
const store = makeStore(usageStats);

View File

@ -39,11 +39,192 @@ const FAILURE_REASON_ORDER = new Map<AuthProfileFailureReason, number>(
FAILURE_REASON_PRIORITY.map((reason, index) => [reason, index]),
);
const WHAM_USAGE_URL = "https://chatgpt.com/backend-api/wham/usage";
const WHAM_TIMEOUT_MS = 3_000;
const WHAM_BURST_COOLDOWN_MS = 15_000;
const WHAM_PROBE_FAILURE_COOLDOWN_MS = 30_000;
const WHAM_HTTP_ERROR_COOLDOWN_MS = 5 * 60 * 1000;
const WHAM_TOKEN_EXPIRED_COOLDOWN_MS = 12 * 60 * 60 * 1000;
const WHAM_DEAD_ACCOUNT_COOLDOWN_MS = 24 * 60 * 60 * 1000;
const WHAM_TEAM_ROLLING_MAX_COOLDOWN_MS = 2 * 60 * 60 * 1000;
const WHAM_PERSONAL_MAX_COOLDOWN_MS = 4 * 60 * 60 * 1000;
const WHAM_TEAM_WEEKLY_MAX_COOLDOWN_MS = 4 * 60 * 60 * 1000;
type WhamUsageWindow = {
limit_window_seconds?: number;
used_percent?: number;
reset_at?: number;
reset_after_seconds?: number;
};
type WhamUsageResponse = {
rate_limit?: {
limit_reached?: boolean;
primary_window?: WhamUsageWindow;
secondary_window?: WhamUsageWindow;
};
};
type WhamCooldownProbeResult = {
cooldownMs: number;
reason: string;
};
function isAuthCooldownBypassedForProvider(provider: string | undefined): boolean {
const normalized = normalizeProviderId(provider ?? "");
return normalized === "openrouter" || normalized === "kilocode";
}
function shouldProbeWhamForFailure(
provider: string | undefined,
reason: AuthProfileFailureReason,
): boolean {
return (
normalizeProviderId(provider ?? "") === "openai-codex" &&
(reason === "rate_limit" || reason === "unknown")
);
}
function resolveWhamResetMs(window: WhamUsageWindow | undefined, now: number): number | null {
if (!window) {
return null;
}
if (
typeof window.reset_after_seconds === "number" &&
Number.isFinite(window.reset_after_seconds) &&
window.reset_after_seconds > 0
) {
return window.reset_after_seconds * 1000;
}
if (
typeof window.reset_at === "number" &&
Number.isFinite(window.reset_at) &&
window.reset_at > 0
) {
return Math.max(0, window.reset_at * 1000 - now);
}
return null;
}
function isWhamWindowExhausted(window: WhamUsageWindow | undefined): boolean {
return !!(
window &&
typeof window.used_percent === "number" &&
Number.isFinite(window.used_percent) &&
window.used_percent >= 100
);
}
function applyWhamCooldownResult(params: {
existing: ProfileUsageStats;
computed: ProfileUsageStats;
now: number;
whamResult: WhamCooldownProbeResult;
}): ProfileUsageStats {
const existingCooldownUntil = params.existing.cooldownUntil;
const existingActiveCooldownUntil =
typeof existingCooldownUntil === "number" &&
Number.isFinite(existingCooldownUntil) &&
existingCooldownUntil > params.now
? existingCooldownUntil
: 0;
return {
...params.computed,
cooldownUntil: Math.max(
existingActiveCooldownUntil,
params.now + params.whamResult.cooldownMs,
),
};
}
export async function probeWhamForCooldown(
store: AuthProfileStore,
profileId: string,
): Promise<WhamCooldownProbeResult | null> {
const profile = store.profiles[profileId];
if (profile?.type !== "oauth" || !profile.access) {
return null;
}
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), WHAM_TIMEOUT_MS);
try {
const headers: Record<string, string> = {
Authorization: `Bearer ${profile.access}`,
Accept: "application/json",
"User-Agent": "CodexBar",
};
if (profile.accountId) {
headers["ChatGPT-Account-Id"] = profile.accountId;
}
const res = await fetch(WHAM_USAGE_URL, {
method: "GET",
headers,
signal: controller.signal,
});
if (!res.ok) {
if (res.status === 401) {
return { cooldownMs: WHAM_TOKEN_EXPIRED_COOLDOWN_MS, reason: "wham_token_expired" };
}
if (res.status === 403) {
return { cooldownMs: WHAM_DEAD_ACCOUNT_COOLDOWN_MS, reason: "wham_account_dead" };
}
return { cooldownMs: WHAM_HTTP_ERROR_COOLDOWN_MS, reason: "wham_http_error" };
}
const data = (await res.json()) as WhamUsageResponse;
if (!data.rate_limit) {
return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
}
if (data.rate_limit.limit_reached === false) {
return { cooldownMs: WHAM_BURST_COOLDOWN_MS, reason: "wham_burst_contention" };
}
const now = Date.now();
const primaryResetMs = resolveWhamResetMs(data.rate_limit.primary_window, now);
const secondaryResetMs = resolveWhamResetMs(data.rate_limit.secondary_window, now);
if (!data.rate_limit.secondary_window) {
if (primaryResetMs === null) {
return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
}
return {
cooldownMs: Math.min(Math.floor(primaryResetMs / 2), WHAM_PERSONAL_MAX_COOLDOWN_MS),
reason: "wham_personal_rolling",
};
}
if (isWhamWindowExhausted(data.rate_limit.secondary_window)) {
if (secondaryResetMs === null) {
return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
}
return {
cooldownMs: Math.min(Math.floor(secondaryResetMs / 2), WHAM_TEAM_WEEKLY_MAX_COOLDOWN_MS),
reason: "wham_team_weekly",
};
}
if (isWhamWindowExhausted(data.rate_limit.primary_window)) {
if (primaryResetMs === null) {
return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
}
return {
cooldownMs: Math.min(Math.floor(primaryResetMs / 2), WHAM_TEAM_ROLLING_MAX_COOLDOWN_MS),
reason: "wham_team_rolling",
};
}
return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
} catch {
return { cooldownMs: WHAM_PROBE_FAILURE_COOLDOWN_MS, reason: "wham_probe_failed" };
} finally {
clearTimeout(timeout);
}
}
export function resolveProfileUnusableUntil(
stats: Pick<ProfileUsageStats, "cooldownUntil" | "disabledUntil">,
): number | null {
@ -567,6 +748,11 @@ export async function markAuthProfileFailure(params: {
if (!profile || isAuthCooldownBypassedForProvider(profile.provider)) {
return;
}
const whamResult = shouldProbeWhamForFailure(profile.provider, reason)
? await probeWhamForCooldown(store, profileId)
: null;
let nextStats: ProfileUsageStats | undefined;
let previousStats: ProfileUsageStats | undefined;
let updateTime = 0;
@ -593,8 +779,16 @@ export async function markAuthProfileFailure(params: {
cfgResolved,
modelId,
});
nextStats = computed;
updateUsageStatsEntry(freshStore, profileId, () => computed);
nextStats =
whamResult && shouldProbeWhamForFailure(profile.provider, reason)
? applyWhamCooldownResult({
existing: previousStats ?? {},
computed,
now,
whamResult,
})
: computed;
updateUsageStatsEntry(freshStore, profileId, () => nextStats ?? computed);
return true;
},
});
@ -632,8 +826,16 @@ export async function markAuthProfileFailure(params: {
cfgResolved,
modelId,
});
nextStats = computed;
updateUsageStatsEntry(store, profileId, () => computed);
nextStats =
whamResult && shouldProbeWhamForFailure(store.profiles[profileId]?.provider, reason)
? applyWhamCooldownResult({
existing: previousStats ?? {},
computed,
now,
whamResult,
})
: computed;
updateUsageStatsEntry(store, profileId, () => nextStats ?? computed);
authProfileUsageDeps.saveAuthProfileStore(store, agentDir);
logAuthProfileFailureStateChange({
runId,

View File

@ -4,15 +4,18 @@ import type { ProviderUsageSnapshot, UsageWindow } from "./provider-usage.types.
type CodexUsageResponse = {
rate_limit?: {
limit_reached?: boolean;
primary_window?: {
limit_window_seconds?: number;
used_percent?: number;
reset_at?: number;
reset_after_seconds?: number;
};
secondary_window?: {
limit_window_seconds?: number;
used_percent?: number;
reset_at?: number;
reset_after_seconds?: number;
};
};
plan_type?: string;