mirror of https://github.com/openclaw/openclaw.git
fix: escalate to model fallback after rate-limit profile rotation cap (#58707)
* fix: escalate to model fallback after rate-limit profile rotation cap Per-model rate limits (e.g. Anthropic Sonnet-only quotas) are not relieved by rotating auth profiles — if all profiles share the same model quota, cycling between them loops forever without falling back to the next model in the configured fallbacks chain. Apply the same rotation-cap pattern introduced for overloaded_error (#58348) to rate_limit errors: - Add `rateLimitedProfileRotations` to auth.cooldowns config (default: 1) - After N profile rotations on a rate_limit error, throw FailoverError to trigger cross-provider model fallback - Add `resolveRateLimitProfileRotationLimit` helper following the same pattern as `resolveOverloadProfileRotationLimit` Fixes #58572 * fix: cap prompt-side rate-limit failover (#58707) (thanks @Forgely3D) * fix: restore latest-main gates for #58707 --------- Co-authored-by: Ember (Forgely3D) <ember@forgely.co> Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
parent
8fce663861
commit
4fa11632b4
|
|
@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai
|
|||
- WhatsApp/reactions: add `reactionLevel` guidance for agent reactions. Thanks @mcaxtr.
|
||||
- Feishu/comments: add a dedicated Drive comment-event flow with comment-thread context resolution, in-thread replies, and `feishu_drive` comment actions for document collaboration workflows. (#58497) thanks @wittam-01.
|
||||
- Tasks/chat: add `/tasks` as a chat-native background task board for the current session, with recent task details and agent-local fallback counts when no linked tasks are visible. Related #54226. Thanks @vincentkoc.
|
||||
- Agents/failover: cap prompt-side and assistant-side same-provider auth-profile retries for rate-limit failures before cross-provider model fallback, add the `auth.cooldowns.rateLimitedProfileRotations` knob, and document the new fallback behavior. (#58707) Thanks @Forgely3D
|
||||
|
||||
### Fixes
|
||||
|
||||
|
|
|
|||
|
|
@ -7907,6 +7907,23 @@
|
|||
"help": "Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
|
||||
"hasChildren": false
|
||||
},
|
||||
{
|
||||
"path": "auth.cooldowns.rateLimitedProfileRotations",
|
||||
"kind": "core",
|
||||
"type": "integer",
|
||||
"required": false,
|
||||
"deprecated": false,
|
||||
"sensitive": false,
|
||||
"tags": [
|
||||
"access",
|
||||
"auth",
|
||||
"performance",
|
||||
"storage"
|
||||
],
|
||||
"label": "Rate-Limited Profile Rotations",
|
||||
"help": "Maximum same-provider auth-profile rotations allowed for rate-limit errors before switching to model fallback (default: 1).",
|
||||
"hasChildren": false
|
||||
},
|
||||
{
|
||||
"path": "auth.order",
|
||||
"kind": "core",
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5729}
|
||||
{"generatedBy":"scripts/generate-config-doc-baseline.ts","recordType":"meta","totalPaths":5730}
|
||||
{"recordType":"path","path":"acp","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["advanced"],"label":"ACP","help":"ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.","hasChildren":true}
|
||||
{"recordType":"path","path":"acp.allowedAgents","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":["access"],"label":"ACP Allowed Agents","help":"Allowlist of ACP target agent ids permitted for ACP runtime sessions. Empty means no additional allowlist restriction.","hasChildren":true}
|
||||
{"recordType":"path","path":"acp.allowedAgents.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
|
|
@ -701,6 +701,7 @@
|
|||
{"recordType":"path","path":"auth.cooldowns.failureWindowHours","kind":"core","type":"number","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Failover Window (hours)","help":"Failure window (hours) for backoff counters (default: 24).","hasChildren":false}
|
||||
{"recordType":"path","path":"auth.cooldowns.overloadedBackoffMs","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","reliability","storage"],"label":"Overloaded Backoff (ms)","help":"Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).","hasChildren":false}
|
||||
{"recordType":"path","path":"auth.cooldowns.overloadedProfileRotations","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","storage"],"label":"Overloaded Profile Rotations","help":"Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).","hasChildren":false}
|
||||
{"recordType":"path","path":"auth.cooldowns.rateLimitedProfileRotations","kind":"core","type":"integer","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth","performance","storage"],"label":"Rate-Limited Profile Rotations","help":"Maximum same-provider auth-profile rotations allowed for rate-limit errors before switching to model fallback (default: 1).","hasChildren":false}
|
||||
{"recordType":"path","path":"auth.order","kind":"core","type":"object","required":false,"deprecated":false,"sensitive":false,"tags":["access","auth"],"label":"Auth Profile Order","help":"Ordered auth profile IDs per provider (used for automatic failover).","hasChildren":true}
|
||||
{"recordType":"path","path":"auth.order.*","kind":"core","type":"array","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":true}
|
||||
{"recordType":"path","path":"auth.order.*.*","kind":"core","type":"string","required":false,"deprecated":false,"sensitive":false,"tags":[],"hasChildren":false}
|
||||
|
|
|
|||
|
|
@ -138,10 +138,12 @@ If all profiles for a provider fail, OpenClaw moves to the next model in
|
|||
`agents.defaults.model.fallbacks`. This applies to auth failures, rate limits, and
|
||||
timeouts that exhausted profile rotation (other errors do not advance fallback).
|
||||
|
||||
Overloaded errors are handled more aggressively than billing cooldowns. By default,
|
||||
OpenClaw allows one same-provider auth-profile retry, then switches to the next
|
||||
configured model fallback without waiting. Tune this with
|
||||
`auth.cooldowns.overloadedProfileRotations` and `auth.cooldowns.overloadedBackoffMs`.
|
||||
Overloaded and rate-limit errors are handled more aggressively than billing
|
||||
cooldowns. By default, OpenClaw allows one same-provider auth-profile retry,
|
||||
then switches to the next configured model fallback without waiting. Tune this
|
||||
with `auth.cooldowns.overloadedProfileRotations`,
|
||||
`auth.cooldowns.overloadedBackoffMs`, and
|
||||
`auth.cooldowns.rateLimitedProfileRotations`.
|
||||
|
||||
When a run starts with a model override (hooks or CLI), fallbacks still end at
|
||||
`agents.defaults.model.primary` after trying any configured fallbacks.
|
||||
|
|
@ -154,6 +156,7 @@ See [Gateway configuration](/gateway/configuration) for:
|
|||
- `auth.cooldowns.billingBackoffHours` / `auth.cooldowns.billingBackoffHoursByProvider`
|
||||
- `auth.cooldowns.billingMaxHours` / `auth.cooldowns.failureWindowHours`
|
||||
- `auth.cooldowns.overloadedProfileRotations` / `auth.cooldowns.overloadedBackoffMs`
|
||||
- `auth.cooldowns.rateLimitedProfileRotations`
|
||||
- `agents.defaults.model.primary` / `agents.defaults.model.fallbacks`
|
||||
- `agents.defaults.imageModel` routing
|
||||
|
||||
|
|
|
|||
|
|
@ -3031,6 +3031,7 @@ Notes:
|
|||
failureWindowHours: 24,
|
||||
overloadedProfileRotations: 1,
|
||||
overloadedBackoffMs: 0,
|
||||
rateLimitedProfileRotations: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
@ -3042,6 +3043,7 @@ Notes:
|
|||
- `failureWindowHours`: rolling window in hours used for backoff counters (default: `24`).
|
||||
- `overloadedProfileRotations`: maximum same-provider auth-profile rotations for overloaded errors before switching to model fallback (default: `1`).
|
||||
- `overloadedBackoffMs`: fixed delay before retrying an overloaded provider/profile rotation (default: `0`).
|
||||
- `rateLimitedProfileRotations`: maximum same-provider auth-profile rotations for rate-limit errors before switching to model fallback (default: `1`).
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -90,9 +90,12 @@ export function resolveNpmDistTagMirrorAuth(params?: {
|
|||
nodeAuthToken?: string | null;
|
||||
npmToken?: string | null;
|
||||
}): NpmDistTagMirrorAuth {
|
||||
const nodeAuthToken =
|
||||
params && "nodeAuthToken" in params ? params.nodeAuthToken : process.env.NODE_AUTH_TOKEN;
|
||||
const npmToken = params && "npmToken" in params ? params.npmToken : process.env.NPM_TOKEN;
|
||||
return resolveNpmDistTagMirrorAuthBase({
|
||||
nodeAuthToken: params?.nodeAuthToken ?? process.env.NODE_AUTH_TOKEN,
|
||||
npmToken: params?.npmToken ?? process.env.NPM_TOKEN,
|
||||
nodeAuthToken,
|
||||
npmToken,
|
||||
}) as NpmDistTagMirrorAuth;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ beforeEach(() => {
|
|||
|
||||
const OVERLOADED_ERROR_PAYLOAD =
|
||||
'{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}';
|
||||
const RATE_LIMIT_ERROR_MESSAGE = "rate limit exceeded";
|
||||
|
||||
function makeConfig(): OpenClawConfig {
|
||||
const apiKeyField = ["api", "Key"].join("");
|
||||
|
|
@ -196,6 +197,27 @@ async function readUsageStats(agentDir: string) {
|
|||
return JSON.parse(raw).usageStats as Record<string, Record<string, unknown> | undefined>;
|
||||
}
|
||||
|
||||
async function writeMultiProfileAuthStore(agentDir: string) {
|
||||
await fs.writeFile(
|
||||
path.join(agentDir, "auth-profiles.json"),
|
||||
JSON.stringify({
|
||||
version: 1,
|
||||
profiles: {
|
||||
"openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" },
|
||||
"openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" },
|
||||
"openai:p3": { type: "api_key", provider: "openai", key: "sk-openai-3" },
|
||||
"groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" },
|
||||
},
|
||||
usageStats: {
|
||||
"openai:p1": { lastUsed: 1 },
|
||||
"openai:p2": { lastUsed: 2 },
|
||||
"openai:p3": { lastUsed: 3 },
|
||||
"groq:p1": { lastUsed: 4 },
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
async function runEmbeddedFallback(params: {
|
||||
agentDir: string;
|
||||
workspaceDir: string;
|
||||
|
|
@ -236,6 +258,29 @@ function mockPrimaryOverloadedThenFallbackSuccess() {
|
|||
mockPrimaryErrorThenFallbackSuccess(OVERLOADED_ERROR_PAYLOAD);
|
||||
}
|
||||
|
||||
function mockPrimaryPromptErrorThenFallbackSuccess(errorMessage: string) {
|
||||
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
|
||||
const attemptParams = params as { provider: string };
|
||||
if (attemptParams.provider === "openai") {
|
||||
return makeEmbeddedRunnerAttempt({
|
||||
promptError: new Error(errorMessage),
|
||||
});
|
||||
}
|
||||
if (attemptParams.provider === "groq") {
|
||||
return makeEmbeddedRunnerAttempt({
|
||||
assistantTexts: ["fallback ok"],
|
||||
lastAssistant: buildEmbeddedRunnerAssistant({
|
||||
provider: "groq",
|
||||
model: "mock-2",
|
||||
stopReason: "stop",
|
||||
content: [{ type: "text", text: "fallback ok" }],
|
||||
}),
|
||||
});
|
||||
}
|
||||
throw new Error(`Unexpected provider ${attemptParams.provider}`);
|
||||
});
|
||||
}
|
||||
|
||||
function mockPrimaryErrorThenFallbackSuccess(errorMessage: string) {
|
||||
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
|
||||
const attemptParams = params as { provider: string; modelId: string; authProfileId?: string };
|
||||
|
|
@ -572,22 +617,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
|
|||
|
||||
it("respects overloadedProfileRotations=0 and falls back immediately", async () => {
|
||||
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
await fs.writeFile(
|
||||
path.join(agentDir, "auth-profiles.json"),
|
||||
JSON.stringify({
|
||||
version: 1,
|
||||
profiles: {
|
||||
"openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" },
|
||||
"openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" },
|
||||
"groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" },
|
||||
},
|
||||
usageStats: {
|
||||
"openai:p1": { lastUsed: 1 },
|
||||
"openai:p2": { lastUsed: 2 },
|
||||
"groq:p1": { lastUsed: 3 },
|
||||
},
|
||||
}),
|
||||
);
|
||||
await writeMultiProfileAuthStore(agentDir);
|
||||
|
||||
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
|
||||
const attemptParams = params as { provider: string };
|
||||
|
|
@ -638,4 +668,117 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
|
|||
expect(groqAttempts.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
it("caps rate-limit profile rotations and escalates to cross-provider fallback (#58572)", async () => {
|
||||
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
await writeMultiProfileAuthStore(agentDir);
|
||||
|
||||
mockPrimaryErrorThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
|
||||
|
||||
const result = await runEmbeddedFallback({
|
||||
agentDir,
|
||||
workspaceDir,
|
||||
sessionKey: "agent:test:rate-limit-multi-profile-cap",
|
||||
runId: "run:rate-limit-multi-profile-cap",
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("groq");
|
||||
expect(result.model).toBe("mock-2");
|
||||
expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok");
|
||||
|
||||
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "openai",
|
||||
);
|
||||
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "groq",
|
||||
);
|
||||
expect(openaiAttempts.length).toBe(2);
|
||||
expect(groqAttempts.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
it("respects rateLimitedProfileRotations=0 and falls back immediately", async () => {
|
||||
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
await writeMultiProfileAuthStore(agentDir);
|
||||
|
||||
mockPrimaryErrorThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
|
||||
|
||||
const result = await runEmbeddedFallback({
|
||||
agentDir,
|
||||
workspaceDir,
|
||||
sessionKey: "agent:test:rate-limit-no-rotation",
|
||||
runId: "run:rate-limit-no-rotation",
|
||||
config: {
|
||||
...makeConfig(),
|
||||
auth: { cooldowns: { rateLimitedProfileRotations: 0 } },
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("groq");
|
||||
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "openai",
|
||||
);
|
||||
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "groq",
|
||||
);
|
||||
expect(openaiAttempts.length).toBe(1);
|
||||
expect(groqAttempts.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
it("caps prompt-side rate-limit profile rotations before cross-provider fallback", async () => {
|
||||
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
await writeMultiProfileAuthStore(agentDir);
|
||||
|
||||
mockPrimaryPromptErrorThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
|
||||
|
||||
const result = await runEmbeddedFallback({
|
||||
agentDir,
|
||||
workspaceDir,
|
||||
sessionKey: "agent:test:prompt-rate-limit-multi-profile-cap",
|
||||
runId: "run:prompt-rate-limit-multi-profile-cap",
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("groq");
|
||||
expect(result.model).toBe("mock-2");
|
||||
|
||||
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "openai",
|
||||
);
|
||||
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "groq",
|
||||
);
|
||||
expect(openaiAttempts.length).toBe(2);
|
||||
expect(groqAttempts.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
it("respects prompt-side rateLimitedProfileRotations=0 and falls back immediately", async () => {
|
||||
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
await writeMultiProfileAuthStore(agentDir);
|
||||
|
||||
mockPrimaryPromptErrorThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
|
||||
|
||||
const result = await runEmbeddedFallback({
|
||||
agentDir,
|
||||
workspaceDir,
|
||||
sessionKey: "agent:test:prompt-rate-limit-no-rotation",
|
||||
runId: "run:prompt-rate-limit-no-rotation",
|
||||
config: {
|
||||
...makeConfig(),
|
||||
auth: { cooldowns: { rateLimitedProfileRotations: 0 } },
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("groq");
|
||||
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "openai",
|
||||
);
|
||||
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
|
||||
(call) => (call[0] as { provider?: string })?.provider === "groq",
|
||||
);
|
||||
expect(openaiAttempts.length).toBe(1);
|
||||
expect(groqAttempts.length).toBe(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ import {
|
|||
resolveMaxRunRetryIterations,
|
||||
resolveOverloadFailoverBackoffMs,
|
||||
resolveOverloadProfileRotationLimit,
|
||||
resolveRateLimitProfileRotationLimit,
|
||||
type RuntimeAuthState,
|
||||
scrubAnthropicRefusalMagic,
|
||||
} from "./run/helpers.js";
|
||||
|
|
@ -304,9 +305,36 @@ export async function runEmbeddedPiAgent(
|
|||
let autoCompactionCount = 0;
|
||||
let runLoopIterations = 0;
|
||||
let overloadProfileRotations = 0;
|
||||
let rateLimitProfileRotations = 0;
|
||||
let timeoutCompactionAttempts = 0;
|
||||
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
|
||||
const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config);
|
||||
const rateLimitProfileRotationLimit = resolveRateLimitProfileRotationLimit(params.config);
|
||||
const maybeEscalateRateLimitProfileFallback = (params: {
|
||||
failoverProvider: string;
|
||||
failoverModel: string;
|
||||
logFallbackDecision: (decision: "fallback_model", extra?: { status?: number }) => void;
|
||||
}) => {
|
||||
rateLimitProfileRotations += 1;
|
||||
if (rateLimitProfileRotations <= rateLimitProfileRotationLimit || !fallbackConfigured) {
|
||||
return;
|
||||
}
|
||||
const status = resolveFailoverStatus("rate_limit");
|
||||
log.warn(
|
||||
`rate-limit profile rotation cap reached for ${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} after ${rateLimitProfileRotations} rotations; escalating to model fallback`,
|
||||
);
|
||||
params.logFallbackDecision("fallback_model", { status });
|
||||
throw new FailoverError(
|
||||
"The AI service is temporarily rate-limited. Please try again in a moment.",
|
||||
{
|
||||
reason: "rate_limit",
|
||||
provider: params.failoverProvider,
|
||||
model: params.failoverModel,
|
||||
profileId: lastProfileId,
|
||||
status,
|
||||
},
|
||||
);
|
||||
};
|
||||
const maybeMarkAuthProfileFailure = async (failure: {
|
||||
profileId?: string;
|
||||
reason?: AuthProfileFailureReason | null;
|
||||
|
|
@ -1022,6 +1050,13 @@ export async function runEmbeddedPiAgent(
|
|||
fallbackConfigured,
|
||||
aborted,
|
||||
});
|
||||
if (promptFailoverReason === "rate_limit") {
|
||||
maybeEscalateRateLimitProfileFallback({
|
||||
failoverProvider: provider,
|
||||
failoverModel: modelId,
|
||||
logFallbackDecision: logPromptFailoverDecision,
|
||||
});
|
||||
}
|
||||
if (
|
||||
promptFailoverFailure &&
|
||||
promptFailoverReason !== "timeout" &&
|
||||
|
|
@ -1185,6 +1220,19 @@ export async function runEmbeddedPiAgent(
|
|||
}
|
||||
}
|
||||
|
||||
// For rate-limit errors, apply the same rotation cap so that
|
||||
// per-model quota exhaustion (e.g. Anthropic Sonnet-only limits)
|
||||
// escalates to cross-provider model fallback instead of spinning
|
||||
// forever across profiles that share the same model quota.
|
||||
// See: https://github.com/openclaw/openclaw/issues/58572
|
||||
if (assistantFailoverReason === "rate_limit") {
|
||||
maybeEscalateRateLimitProfileFallback({
|
||||
failoverProvider: activeErrorContext.provider,
|
||||
failoverModel: activeErrorContext.model,
|
||||
logFallbackDecision: logAssistantFailoverDecision,
|
||||
});
|
||||
}
|
||||
|
||||
const rotated = await advanceAuthProfile();
|
||||
if (rotated) {
|
||||
logAssistantFailoverDecision("rotate_profile");
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ export const RUNTIME_AUTH_REFRESH_MIN_DELAY_MS = 5 * 1000;
|
|||
|
||||
export const DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS = 0;
|
||||
export const DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS = 1;
|
||||
export const DEFAULT_MAX_RATE_LIMIT_PROFILE_ROTATIONS = 1;
|
||||
|
||||
export function resolveOverloadFailoverBackoffMs(cfg?: OpenClawConfig): number {
|
||||
return cfg?.auth?.cooldowns?.overloadedBackoffMs ?? DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS;
|
||||
|
|
@ -36,6 +37,10 @@ export function resolveOverloadProfileRotationLimit(cfg?: OpenClawConfig): numbe
|
|||
return cfg?.auth?.cooldowns?.overloadedProfileRotations ?? DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS;
|
||||
}
|
||||
|
||||
export function resolveRateLimitProfileRotationLimit(cfg?: OpenClawConfig): number {
|
||||
return cfg?.auth?.cooldowns?.rateLimitedProfileRotations ?? DEFAULT_MAX_RATE_LIMIT_PROFILE_ROTATIONS;
|
||||
}
|
||||
|
||||
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
|
||||
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,10 @@ import {
|
|||
resolveUsageProviderId,
|
||||
} from "../../infra/provider-usage.js";
|
||||
import type { MediaUnderstandingDecision } from "../../media-understanding/types.js";
|
||||
import { listTasksForAgentId, listTasksForSessionKey } from "../../tasks/task-registry.js";
|
||||
import {
|
||||
listTasksForAgentIdForStatus,
|
||||
listTasksForSessionKeyForStatus,
|
||||
} from "../../tasks/task-status-access.js";
|
||||
import {
|
||||
buildTaskStatusSnapshot,
|
||||
formatTaskStatusDetail,
|
||||
|
|
@ -61,7 +64,7 @@ function shouldLoadUsageSummary(params: {
|
|||
}
|
||||
|
||||
function formatSessionTaskLine(sessionKey: string): string | undefined {
|
||||
const snapshot = buildTaskStatusSnapshot(listTasksForSessionKey(sessionKey));
|
||||
const snapshot = buildTaskStatusSnapshot(listTasksForSessionKeyForStatus(sessionKey));
|
||||
const task = snapshot.focus;
|
||||
if (!task) {
|
||||
return undefined;
|
||||
|
|
@ -79,7 +82,7 @@ function formatSessionTaskLine(sessionKey: string): string | undefined {
|
|||
}
|
||||
|
||||
function formatAgentTaskCountsLine(agentId: string): string | undefined {
|
||||
const snapshot = buildTaskStatusSnapshot(listTasksForAgentId(agentId));
|
||||
const snapshot = buildTaskStatusSnapshot(listTasksForAgentIdForStatus(agentId));
|
||||
if (snapshot.totalCount === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,11 @@ import { resolveSessionAgentId } from "../../agents/agent-scope.js";
|
|||
import { logVerbose } from "../../globals.js";
|
||||
import { formatDurationCompact } from "../../infra/format-time/format-duration.ts";
|
||||
import { formatTimeAgo } from "../../infra/format-time/format-relative.ts";
|
||||
import { listTasksForAgentId, listTasksForSessionKey } from "../../tasks/task-registry.js";
|
||||
import type { TaskRecord } from "../../tasks/task-registry.types.js";
|
||||
import {
|
||||
listTasksForAgentIdForStatus,
|
||||
listTasksForSessionKeyForStatus,
|
||||
} from "../../tasks/task-status-access.js";
|
||||
import { buildTaskStatusSnapshot } from "../../tasks/task-status.js";
|
||||
import type { ReplyPayload } from "../types.js";
|
||||
import type { CommandHandler, HandleCommandsParams } from "./commands-types.js";
|
||||
|
|
@ -35,7 +38,7 @@ function formatTaskHeadline(snapshot: ReturnType<typeof buildTaskStatusSnapshot>
|
|||
}
|
||||
|
||||
function formatAgentFallbackLine(agentId: string): string | undefined {
|
||||
const snapshot = buildTaskStatusSnapshot(listTasksForAgentId(agentId));
|
||||
const snapshot = buildTaskStatusSnapshot(listTasksForAgentIdForStatus(agentId));
|
||||
if (snapshot.totalCount === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
|
@ -75,7 +78,9 @@ function formatVisibleTask(task: TaskRecord, index: number): string {
|
|||
}
|
||||
|
||||
export function buildTasksText(params: { sessionKey: string; agentId: string }): string {
|
||||
const sessionSnapshot = buildTaskStatusSnapshot(listTasksForSessionKey(params.sessionKey));
|
||||
const sessionSnapshot = buildTaskStatusSnapshot(
|
||||
listTasksForSessionKeyForStatus(params.sessionKey),
|
||||
);
|
||||
const lines = ["📋 Tasks", formatTaskHeadline(sessionSnapshot)];
|
||||
|
||||
if (sessionSnapshot.totalCount > 0) {
|
||||
|
|
|
|||
|
|
@ -799,6 +799,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
|
|||
minimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
},
|
||||
rateLimitedProfileRotations: {
|
||||
type: "integer",
|
||||
minimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
|
|
@ -13672,6 +13677,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
|
|||
help: "Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
|
||||
tags: ["auth", "access", "reliability", "storage"],
|
||||
},
|
||||
"auth.cooldowns.rateLimitedProfileRotations": {
|
||||
label: "Rate-Limited Profile Rotations",
|
||||
help: "Maximum same-provider auth-profile rotations allowed for rate-limit errors before switching to model fallback (default: 1).",
|
||||
tags: ["auth", "access", "performance", "storage"],
|
||||
},
|
||||
"agents.defaults.models": {
|
||||
label: "Models",
|
||||
help: "Configured model catalog (keys are full provider/model IDs).",
|
||||
|
|
|
|||
|
|
@ -805,6 +805,8 @@ export const FIELD_HELP: Record<string, string> = {
|
|||
"Maximum same-provider auth-profile rotations allowed for overloaded errors before switching to model fallback (default: 1).",
|
||||
"auth.cooldowns.overloadedBackoffMs":
|
||||
"Fixed delay in milliseconds before retrying an overloaded provider/profile rotation (default: 0).",
|
||||
"auth.cooldowns.rateLimitedProfileRotations":
|
||||
"Maximum same-provider auth-profile rotations allowed for rate-limit errors before switching to model fallback (default: 1).",
|
||||
"agents.defaults.workspace":
|
||||
"Default workspace path exposed to agent runtime tools for filesystem context and repo-aware behavior. Set this explicitly when running from wrappers so path resolution stays deterministic.",
|
||||
"agents.defaults.bootstrapMaxChars":
|
||||
|
|
|
|||
|
|
@ -473,6 +473,7 @@ export const FIELD_LABELS: Record<string, string> = {
|
|||
"auth.cooldowns.failureWindowHours": "Failover Window (hours)",
|
||||
"auth.cooldowns.overloadedProfileRotations": "Overloaded Profile Rotations",
|
||||
"auth.cooldowns.overloadedBackoffMs": "Overloaded Backoff (ms)",
|
||||
"auth.cooldowns.rateLimitedProfileRotations": "Rate-Limited Profile Rotations",
|
||||
"agents.defaults.models": "Models",
|
||||
"agents.defaults.model.primary": "Primary Model",
|
||||
"agents.defaults.model.fallbacks": "Model Fallbacks",
|
||||
|
|
|
|||
|
|
@ -36,5 +36,10 @@ export type AuthConfig = {
|
|||
* Default: 0.
|
||||
*/
|
||||
overloadedBackoffMs?: number;
|
||||
/**
|
||||
* Maximum same-provider auth-profile rotations to allow for rate-limit
|
||||
* errors before escalating to cross-provider model fallback. Default: 1.
|
||||
*/
|
||||
rateLimitedProfileRotations?: number;
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -452,6 +452,7 @@ export const OpenClawSchema = z
|
|||
failureWindowHours: z.number().positive().optional(),
|
||||
overloadedProfileRotations: z.number().int().nonnegative().optional(),
|
||||
overloadedBackoffMs: z.number().int().nonnegative().optional(),
|
||||
rateLimitedProfileRotations: z.number().int().nonnegative().optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import { applyJobPatch, createJob } from "./service/jobs.js";
|
||||
import { applyJobPatch, createJob, recomputeNextRuns } from "./service/jobs.js";
|
||||
import type { CronServiceState } from "./service/state.js";
|
||||
import { DEFAULT_TOP_OF_HOUR_STAGGER_MS } from "./stagger.js";
|
||||
import type { CronJob, CronJobPatch } from "./types.js";
|
||||
|
|
@ -538,3 +538,33 @@ describe("createJob delivery defaults", () => {
|
|||
expect(job.delivery).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("recomputeNextRuns", () => {
|
||||
it("backfills missing every anchorMs for legacy loaded jobs", () => {
|
||||
const now = Date.parse("2026-03-01T12:00:00.000Z");
|
||||
const createdAtMs = now - 120_000;
|
||||
const job: CronJob = {
|
||||
id: "legacy-every",
|
||||
name: "legacy-every",
|
||||
enabled: true,
|
||||
createdAtMs,
|
||||
updatedAtMs: createdAtMs,
|
||||
schedule: { kind: "every", everyMs: 60_000 },
|
||||
sessionTarget: "main",
|
||||
wakeMode: "now",
|
||||
payload: { kind: "systemEvent", text: "tick" },
|
||||
state: {},
|
||||
};
|
||||
const state = {
|
||||
...createMockState(now),
|
||||
store: { version: 1 as const, jobs: [job] },
|
||||
} as CronServiceState;
|
||||
|
||||
expect(recomputeNextRuns(state)).toBe(true);
|
||||
expect(job.schedule.kind).toBe("every");
|
||||
if (job.schedule.kind === "every") {
|
||||
expect(job.schedule.anchorMs).toBe(createdAtMs);
|
||||
}
|
||||
expect(job.state.nextRunAtMs).toBe(now);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -238,8 +238,12 @@ export function findJobOrThrow(state: CronServiceState, id: string) {
|
|||
return job;
|
||||
}
|
||||
|
||||
export function isJobEnabled(job: Pick<CronJob, "enabled">): boolean {
|
||||
return job.enabled ?? true;
|
||||
}
|
||||
|
||||
export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | undefined {
|
||||
if (!job.enabled) {
|
||||
if (!isJobEnabled(job)) {
|
||||
return undefined;
|
||||
}
|
||||
if (job.schedule.kind === "every") {
|
||||
|
|
@ -295,7 +299,7 @@ export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | und
|
|||
}
|
||||
|
||||
export function computeJobPreviousRunAtMs(job: CronJob, nowMs: number): number | undefined {
|
||||
if (!job.enabled || job.schedule.kind !== "cron") {
|
||||
if (!isJobEnabled(job) || job.schedule.kind !== "cron") {
|
||||
return undefined;
|
||||
}
|
||||
const previous = computeStaggeredCronPreviousRunAtMs(job, nowMs);
|
||||
|
|
@ -359,7 +363,21 @@ function normalizeJobTickState(params: { state: CronServiceState; job: CronJob;
|
|||
changed = true;
|
||||
}
|
||||
|
||||
if (!job.enabled) {
|
||||
if (job.schedule.kind === "every") {
|
||||
const normalizedAnchorMs = resolveEveryAnchorMs({
|
||||
schedule: job.schedule,
|
||||
fallbackAnchorMs: isFiniteTimestamp(job.createdAtMs) ? job.createdAtMs : nowMs,
|
||||
});
|
||||
if (job.schedule.anchorMs !== normalizedAnchorMs) {
|
||||
job.schedule = {
|
||||
...job.schedule,
|
||||
anchorMs: normalizedAnchorMs,
|
||||
};
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isJobEnabled(job)) {
|
||||
if (job.state.nextRunAtMs !== undefined) {
|
||||
job.state.nextRunAtMs = undefined;
|
||||
changed = true;
|
||||
|
|
@ -840,7 +858,9 @@ export function isJobDue(job: CronJob, nowMs: number, opts: { forced: boolean })
|
|||
if (opts.forced) {
|
||||
return true;
|
||||
}
|
||||
return job.enabled && typeof job.state.nextRunAtMs === "number" && nowMs >= job.state.nextRunAtMs;
|
||||
return (
|
||||
isJobEnabled(job) && typeof job.state.nextRunAtMs === "number" && nowMs >= job.state.nextRunAtMs
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveJobPayloadTextForMain(job: CronJob): string | undefined {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import {
|
|||
computeJobNextRunAtMs,
|
||||
createJob,
|
||||
findJobOrThrow,
|
||||
isJobEnabled,
|
||||
isJobDue,
|
||||
nextWakeAtMs,
|
||||
recomputeNextRuns,
|
||||
|
|
@ -162,7 +163,7 @@ export async function list(state: CronServiceState, opts?: { includeDisabled?: b
|
|||
return await locked(state, async () => {
|
||||
await ensureLoadedForRead(state);
|
||||
const includeDisabled = opts?.includeDisabled === true;
|
||||
const jobs = (state.store?.jobs ?? []).filter((j) => includeDisabled || j.enabled);
|
||||
const jobs = (state.store?.jobs ?? []).filter((j) => includeDisabled || isJobEnabled(j));
|
||||
return jobs.toSorted((a, b) => (a.state.nextRunAtMs ?? 0) - (b.state.nextRunAtMs ?? 0));
|
||||
});
|
||||
}
|
||||
|
|
@ -215,10 +216,10 @@ export async function listPage(state: CronServiceState, opts?: CronListPageOptio
|
|||
const sortDir = opts?.sortDir ?? "asc";
|
||||
const source = state.store?.jobs ?? [];
|
||||
const filtered = source.filter((job) => {
|
||||
if (enabledFilter === "enabled" && !job.enabled) {
|
||||
if (enabledFilter === "enabled" && !isJobEnabled(job)) {
|
||||
return false;
|
||||
}
|
||||
if (enabledFilter === "disabled" && job.enabled) {
|
||||
if (enabledFilter === "disabled" && isJobEnabled(job)) {
|
||||
return false;
|
||||
}
|
||||
if (!query) {
|
||||
|
|
@ -307,13 +308,13 @@ export async function update(state: CronServiceState, id: string, patch: CronJob
|
|||
|
||||
job.updatedAtMs = now;
|
||||
if (scheduleChanged || enabledChanged) {
|
||||
if (job.enabled) {
|
||||
if (isJobEnabled(job)) {
|
||||
job.state.nextRunAtMs = computeJobNextRunAtMs(job, now);
|
||||
} else {
|
||||
job.state.nextRunAtMs = undefined;
|
||||
job.state.runningAtMs = undefined;
|
||||
}
|
||||
} else if (job.enabled) {
|
||||
} else if (isJobEnabled(job)) {
|
||||
// Non-schedule edits should not mutate other jobs, but still repair a
|
||||
// missing/corrupt nextRunAtMs for the updated job.
|
||||
const nextRun = job.state.nextRunAtMs;
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import type {
|
|||
import {
|
||||
computeJobPreviousRunAtMs,
|
||||
computeJobNextRunAtMs,
|
||||
isJobEnabled,
|
||||
nextWakeAtMs,
|
||||
recomputeNextRunsForMaintenance,
|
||||
recordScheduleComputeError,
|
||||
|
|
@ -499,7 +500,7 @@ export function applyJobResult(
|
|||
);
|
||||
}
|
||||
}
|
||||
} else if (result.status === "error" && job.enabled) {
|
||||
} else if (result.status === "error" && isJobEnabled(job)) {
|
||||
// Apply exponential backoff for errored jobs to prevent retry storms.
|
||||
const backoff = errorBackoffMs(job.state.consecutiveErrors ?? 1);
|
||||
let normalNext: number | undefined;
|
||||
|
|
@ -527,7 +528,7 @@ export function applyJobResult(
|
|||
},
|
||||
"cron: applying error backoff",
|
||||
);
|
||||
} else if (job.enabled) {
|
||||
} else if (isJobEnabled(job)) {
|
||||
let naturalNext: number | undefined;
|
||||
try {
|
||||
naturalNext =
|
||||
|
|
@ -836,7 +837,7 @@ function isRunnableJob(params: {
|
|||
if (!job.state) {
|
||||
job.state = {};
|
||||
}
|
||||
if (!job.enabled) {
|
||||
if (!isJobEnabled(job)) {
|
||||
return false;
|
||||
}
|
||||
if (params.skipJobIds?.has(job.id)) {
|
||||
|
|
@ -853,7 +854,7 @@ function isRunnableJob(params: {
|
|||
const nextRun = job.state.nextRunAtMs;
|
||||
if (
|
||||
job.state.lastStatus === "error" &&
|
||||
job.enabled &&
|
||||
isJobEnabled(job) &&
|
||||
typeof nextRun === "number" &&
|
||||
typeof lastRun === "number" &&
|
||||
nextRun > lastRun
|
||||
|
|
@ -1079,7 +1080,7 @@ async function applyStartupCatchupOutcomes(
|
|||
let offset = staggerMs;
|
||||
for (const jobId of plan.deferredJobIds) {
|
||||
const job = state.store.jobs.find((entry) => entry.id === jobId);
|
||||
if (!job || !job.enabled) {
|
||||
if (!job || !isJobEnabled(job)) {
|
||||
continue;
|
||||
}
|
||||
job.state.nextRunAtMs = baseNow + offset;
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@ const TASK_ROOT = path.resolve(import.meta.dirname);
|
|||
const SRC_ROOT = path.resolve(TASK_ROOT, "..");
|
||||
|
||||
const ALLOWED_IMPORTERS = new Set([
|
||||
"auto-reply/reply/commands-status.ts",
|
||||
"tasks/runtime-internal.ts",
|
||||
"tasks/task-owner-access.ts",
|
||||
"tasks/task-status-access.ts",
|
||||
]);
|
||||
|
||||
async function listSourceFiles(root: string): Promise<string[]> {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,10 @@
|
|||
import { listTasksForAgentId, listTasksForSessionKey } from "./task-registry.js";
|
||||
import type { TaskRecord } from "./task-registry.types.js";
|
||||
|
||||
export function listTasksForSessionKeyForStatus(sessionKey: string): TaskRecord[] {
|
||||
return listTasksForSessionKey(sessionKey);
|
||||
}
|
||||
|
||||
export function listTasksForAgentIdForStatus(agentId: string): TaskRecord[] {
|
||||
return listTasksForAgentId(agentId);
|
||||
}
|
||||
Loading…
Reference in New Issue