From 475defdf823ff83bcf4cafae1b2c0879b34cdf9a Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sun, 29 Mar 2026 16:54:56 -0700 Subject: [PATCH] Anthropic: wire explicit service tier params (#45453) * Anthropic: add explicit service tier wrapper * Runner: wire explicit Anthropic service tiers * Tests: cover explicit Anthropic service tiers * Changelog: note Anthropic service tier follow-up * fix(agents): make Anthropic service tiers override fast mode * fix(config): drop duplicate healed sourceConfig * docs(anthropic): update fast mode service tier guidance * fix(agents): remove dead Anthropic Bedrock exports * fix(agents): avoid cross-provider Anthropic tier warnings * fix(agents): avoid cross-provider OpenAI tier warnings --- CHANGELOG.md | 1 + docs/providers/anthropic.md | 4 +- docs/tools/thinking.md | 4 +- .../pi-embedded-runner-extraparams.test.ts | 226 ++++++++++++++++++ .../anthropic-stream-wrappers.ts | 45 ++++ src/agents/pi-embedded-runner/extra-params.ts | 29 ++- 6 files changed, 299 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5112bc2f40f..5faab182a5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,7 @@ Docs: https://docs.openclaw.ai - Plugins/CLI: add descriptor-backed lazy plugin CLI registration so Matrix can keep its CLI module lazy-loaded without dropping `openclaw matrix ...` from parse-time command registration. (#57165) Thanks @gumadeiras. - Plugins/CLI: collect root-help plugin descriptors through a dedicated non-activating CLI metadata path so enabled plugins keep validated config semantics without triggering runtime-only plugin registration work, while preserving runtime CLI command registration for legacy channel plugins that still wire commands from full registration. (#57294) thanks @gumadeiras. - Anthropic/OAuth: inject `/fast` `service_tier` hints for direct `sk-ant-oat-*` requests so OAuth-authenticated Anthropic runs stop missing the same overload-routing signal as API-key traffic. Fixes #55758. Thanks @Cypherm and @vincentkoc. +- Anthropic/service tiers: support explicit `serviceTier` model params for direct Anthropic requests and let them override `/fast` defaults when both are set. (#45453) Thanks @vincentkoc. - Docs/anchors: fix broken English docs links and make Mint anchor audits run against the English-source docs tree. (#57039) thanks @velvet-shark. - Cron/announce: preserve all deliverable text payloads for announce mode instead of collapsing to the last chunk, so multi-line cron reports deliver in full to Telegram forum topics. diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md index 4772776c447..a7778d78697 100644 --- a/docs/providers/anthropic.md +++ b/docs/providers/anthropic.md @@ -47,7 +47,7 @@ openclaw onboard --anthropic-api-key "$ANTHROPIC_API_KEY" ## Fast mode (Anthropic API) -OpenClaw's shared `/fast` toggle also supports direct Anthropic API-key traffic. +OpenClaw's shared `/fast` toggle also supports direct public Anthropic traffic, including API-key and OAuth-authenticated requests sent to `api.anthropic.com`. - `/fast on` maps to `service_tier: "auto"` - `/fast off` maps to `service_tier: "standard_only"` @@ -69,8 +69,8 @@ OpenClaw's shared `/fast` toggle also supports direct Anthropic API-key traffic. Important limits: -- This is **API-key only**. Anthropic setup-token / OAuth auth does not honor OpenClaw fast-mode tier injection. - OpenClaw only injects Anthropic service tiers for direct `api.anthropic.com` requests. If you route `anthropic/*` through a proxy or gateway, `/fast` leaves `service_tier` untouched. +- Explicit Anthropic `serviceTier` or `service_tier` model params override the `/fast` default when both are set. - Anthropic reports the effective tier on the response under `usage.service_tier`. On accounts without Priority Tier capacity, `service_tier: "auto"` may still resolve to `standard`. ## Prompt caching (Anthropic API) diff --git a/docs/tools/thinking.md b/docs/tools/thinking.md index a8428dd3f0f..3b99975ca6c 100644 --- a/docs/tools/thinking.md +++ b/docs/tools/thinking.md @@ -56,8 +56,8 @@ title: "Thinking Levels" 5. Fallback: `off` - For `openai/*`, fast mode maps to OpenAI priority processing by sending `service_tier=priority` on supported Responses requests. - For `openai-codex/*`, fast mode sends the same `service_tier=priority` flag on Codex Responses. OpenClaw keeps one shared `/fast` toggle across both auth paths. -- For direct `anthropic/*` API-key requests, fast mode maps to Anthropic service tiers: `/fast on` sets `service_tier=auto`, `/fast off` sets `service_tier=standard_only`. -- Anthropic fast mode is API-key only. OpenClaw skips Anthropic service-tier injection for Claude setup-token / OAuth auth and for non-Anthropic proxy base URLs. +- For direct public `anthropic/*` requests, including OAuth-authenticated traffic sent to `api.anthropic.com`, fast mode maps to Anthropic service tiers: `/fast on` sets `service_tier=auto`, `/fast off` sets `service_tier=standard_only`. +- Explicit Anthropic `serviceTier` / `service_tier` model params override the fast-mode default when both are set. OpenClaw still skips Anthropic service-tier injection for non-Anthropic proxy base URLs. ## Verbose directives (/verbose or /v) diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 4f2e0a6b9ea..12bcd71c8ee 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -2122,6 +2122,179 @@ describe("applyExtraParamsToAgent", () => { expect(payload.service_tier).toBe("standard_only"); }); + it("injects configured Anthropic service_tier into direct Anthropic payloads", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + cfg: { + agents: { + defaults: { + models: { + "anthropic/claude-sonnet-4-5": { + params: { + serviceTier: "standard_only", + }, + }, + }, + }, + }, + }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + payload: {}, + }); + expect(payload.service_tier).toBe("standard_only"); + }); + + it("injects configured Anthropic service_tier into OAuth-authenticated Anthropic payloads", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + cfg: { + agents: { + defaults: { + models: { + "anthropic/claude-sonnet-4-5": { + params: { + serviceTier: "standard_only", + }, + }, + }, + }, + }, + }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + options: { + apiKey: "sk-ant-oat-test-token", + }, + payload: {}, + }); + expect(payload.service_tier).toBe("standard_only"); + }); + + it("does not warn for valid Anthropic serviceTier values", () => { + const warnSpy = vi.spyOn(log, "warn").mockImplementation(() => undefined); + try { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + cfg: { + agents: { + defaults: { + models: { + "anthropic/claude-sonnet-4-5": { + params: { + serviceTier: "standard_only", + }, + }, + }, + }, + }, + }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + payload: {}, + }); + + expect(payload.service_tier).toBe("standard_only"); + expect(warnSpy).not.toHaveBeenCalled(); + } finally { + warnSpy.mockRestore(); + } + }); + + it("accepts snake_case Anthropic service_tier params", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + extraParamsOverride: { + service_tier: "standard_only", + }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + payload: {}, + }); + expect(payload.service_tier).toBe("standard_only"); + }); + + it("lets explicit Anthropic service_tier override fast mode defaults", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + cfg: { + agents: { + defaults: { + models: { + "anthropic/claude-sonnet-4-5": { + params: { + fastMode: true, + serviceTier: "standard_only", + }, + }, + }, + }, + }, + }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + payload: {}, + }); + expect(payload.service_tier).toBe("standard_only"); + }); + + it("lets explicit Anthropic service_tier override OAuth fast mode defaults", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + cfg: { + agents: { + defaults: { + models: { + "anthropic/claude-sonnet-4-5": { + params: { + fastMode: true, + serviceTier: "standard_only", + }, + }, + }, + }, + }, + }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://api.anthropic.com", + } as unknown as Model<"anthropic-messages">, + options: { + apiKey: "sk-ant-oat-test-token", + }, + payload: {}, + }); + expect(payload.service_tier).toBe("standard_only"); + }); + it("injects Anthropic fast mode service_tier for OAuth auth", () => { const payload = runResponsesPayloadMutationCase({ applyProvider: "anthropic", @@ -2176,6 +2349,24 @@ describe("applyExtraParamsToAgent", () => { expect(payload).not.toHaveProperty("service_tier"); }); + it("does not inject explicit Anthropic service_tier for proxied base URLs", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "anthropic", + applyModelId: "claude-sonnet-4-5", + extraParamsOverride: { + serviceTier: "standard_only", + }, + model: { + api: "anthropic-messages", + provider: "anthropic", + id: "claude-sonnet-4-5", + baseUrl: "https://proxy.example.com/anthropic", + } as unknown as Model<"anthropic-messages">, + payload: {}, + }); + expect(payload).not.toHaveProperty("service_tier"); + }); + it("maps fast mode to priority service_tier for openai-codex responses", () => { const payload = runResponsesPayloadMutationCase({ applyProvider: "openai-codex", @@ -2305,12 +2496,47 @@ describe("applyExtraParamsToAgent", () => { }); expect(payload).not.toHaveProperty("service_tier"); + expect(warnSpy).toHaveBeenCalledTimes(1); expect(warnSpy).toHaveBeenCalledWith("ignoring invalid OpenAI service tier param: invalid"); } finally { warnSpy.mockRestore(); } }); + it("does not warn for valid OpenAI serviceTier values", () => { + const warnSpy = vi.spyOn(log, "warn").mockImplementation(() => undefined); + try { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "openai", + applyModelId: "gpt-5.4", + cfg: { + agents: { + defaults: { + models: { + "openai/gpt-5.4": { + params: { + serviceTier: "priority", + }, + }, + }, + }, + }, + }, + model: { + api: "openai-responses", + provider: "openai", + id: "gpt-5.4", + baseUrl: "https://api.openai.com/v1", + } as unknown as Model<"openai-responses">, + }); + + expect(payload.service_tier).toBe("priority"); + expect(warnSpy).not.toHaveBeenCalled(); + } finally { + warnSpy.mockRestore(); + } + }); + it("does not force store for OpenAI Responses routed through non-OpenAI base URLs", () => { const payload = runResponsesPayloadMutationCase({ applyProvider: "openai", diff --git a/src/agents/pi-embedded-runner/anthropic-stream-wrappers.ts b/src/agents/pi-embedded-runner/anthropic-stream-wrappers.ts index b09fe8fc167..d44c283bdca 100644 --- a/src/agents/pi-embedded-runner/anthropic-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/anthropic-stream-wrappers.ts @@ -89,6 +89,17 @@ function hasOpenAiAnthropicToolPayloadCompatFlag(model: { compat?: unknown }): b ); } +function normalizeAnthropicServiceTier(value: unknown): AnthropicServiceTier | undefined { + if (typeof value !== "string") { + return undefined; + } + const normalized = value.trim().toLowerCase(); + if (normalized === "auto" || normalized === "standard_only") { + return normalized; + } + return undefined; +} + function requiresAnthropicToolPayloadCompatibilityForModel( model: { api?: unknown; @@ -374,8 +385,42 @@ export function createAnthropicFastModeWrapper( }; } +export function createAnthropicServiceTierWrapper( + baseStreamFn: StreamFn | undefined, + serviceTier: AnthropicServiceTier, +): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + if ( + model.api !== "anthropic-messages" || + model.provider !== "anthropic" || + !isAnthropicPublicApiBaseUrl(model.baseUrl) + ) { + return underlying(model, context, options); + } + + return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => { + if (payloadObj.service_tier === undefined) { + payloadObj.service_tier = serviceTier; + } + }); + }; +} + export function resolveAnthropicFastMode( extraParams: Record | undefined, ): boolean | undefined { return resolveFastModeParam(extraParams); } + +export function resolveAnthropicServiceTier( + extraParams: Record | undefined, +): AnthropicServiceTier | undefined { + const raw = extraParams?.serviceTier ?? extraParams?.service_tier; + const normalized = normalizeAnthropicServiceTier(raw); + if (raw !== undefined && normalized === undefined) { + const rawSummary = typeof raw === "string" ? raw : typeof raw; + log.warn(`ignoring invalid Anthropic service tier param: ${rawSummary}`); + } + return normalized; +} diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 7f474e5fc7c..0894e2dacc5 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -12,8 +12,10 @@ import type { ProviderRuntimeModel } from "../../plugins/types.js"; import { createAnthropicBetaHeadersWrapper, createAnthropicFastModeWrapper, + createAnthropicServiceTierWrapper, createAnthropicToolPayloadCompatibilityWrapper, resolveAnthropicFastMode, + resolveAnthropicServiceTier, resolveAnthropicBetas, resolveCacheRetention, } from "./anthropic-stream-wrappers.js"; @@ -364,6 +366,19 @@ function applyPostPluginStreamWrappers( // upstream model-ID heuristics for Gemini 3.1 variants. ctx.agent.streamFn = createGoogleThinkingPayloadWrapper(ctx.agent.streamFn, ctx.thinkingLevel); + if (ctx.provider === "anthropic") { + const anthropicServiceTier = resolveAnthropicServiceTier(ctx.effectiveExtraParams); + if (anthropicServiceTier) { + log.debug( + `applying Anthropic service_tier=${anthropicServiceTier} for ${ctx.provider}/${ctx.modelId}`, + ); + ctx.agent.streamFn = createAnthropicServiceTierWrapper( + ctx.agent.streamFn, + anthropicServiceTier, + ); + } + } + const anthropicFastMode = resolveAnthropicFastMode(ctx.effectiveExtraParams); if (anthropicFastMode !== undefined) { log.debug( @@ -388,12 +403,14 @@ function applyPostPluginStreamWrappers( ctx.agent.streamFn = createOpenAIFastModeWrapper(ctx.agent.streamFn); } - const openAIServiceTier = resolveOpenAIServiceTier(ctx.effectiveExtraParams); - if (openAIServiceTier) { - log.debug( - `applying OpenAI service_tier=${openAIServiceTier} for ${ctx.provider}/${ctx.modelId}`, - ); - ctx.agent.streamFn = createOpenAIServiceTierWrapper(ctx.agent.streamFn, openAIServiceTier); + if (ctx.provider === "openai" || ctx.provider === "openai-codex") { + const openAIServiceTier = resolveOpenAIServiceTier(ctx.effectiveExtraParams); + if (openAIServiceTier) { + log.debug( + `applying OpenAI service_tier=${openAIServiceTier} for ${ctx.provider}/${ctx.modelId}`, + ); + ctx.agent.streamFn = createOpenAIServiceTierWrapper(ctx.agent.streamFn, openAIServiceTier); + } } // Work around upstream pi-ai hardcoding `store: false` for Responses API.