diff --git a/docs/help/testing.md b/docs/help/testing.md index 21ef02ce140..870e1daaeb7 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -206,7 +206,7 @@ Live tests are split into two layers so we can isolate failures: - `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly) - Set `OPENCLAW_LIVE_MODELS=modern` (or `all`, alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke - How to select models: - - `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet/Haiku 4.5, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4) + - `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4) - `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist - or `OPENCLAW_LIVE_MODELS="openai/gpt-5.2,anthropic/claude-opus-4-6,..."` (comma allowlist) - How to select providers: @@ -237,7 +237,7 @@ Live tests are split into two layers so we can isolate failures: - How to enable: - `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly) - How to select models: - - Default: modern allowlist (Opus/Sonnet/Haiku 4.5, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4) + - Default: modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4) - `OPENCLAW_LIVE_GATEWAY_MODELS=all` is an alias for the modern allowlist - Or set `OPENCLAW_LIVE_GATEWAY_MODELS="provider/model"` (or comma list) to narrow - How to select providers (avoid “OpenRouter everything”): diff --git a/scripts/docker/install-sh-e2e/run.sh b/scripts/docker/install-sh-e2e/run.sh index ea6ff833976..d14af33cf19 100755 --- a/scripts/docker/install-sh-e2e/run.sh +++ b/scripts/docker/install-sh-e2e/run.sh @@ -410,14 +410,10 @@ run_profile() { else agent_model="$(set_agent_model "$profile" \ "anthropic/claude-opus-4-6" \ - "claude-opus-4-6" \ - "anthropic/claude-opus-4-5" \ - "claude-opus-4-5")" + "claude-opus-4-6")" image_model="$(set_image_model "$profile" \ "anthropic/claude-opus-4-6" \ - "claude-opus-4-6" \ - "anthropic/claude-opus-4-5" \ - "claude-opus-4-5")" + "claude-opus-4-6")" fi echo "model=$agent_model" echo "imageModel=$image_model" diff --git a/src/agents/live-model-filter.ts b/src/agents/live-model-filter.ts index 8a4f16f6c05..495472a8685 100644 --- a/src/agents/live-model-filter.ts +++ b/src/agents/live-model-filter.ts @@ -7,16 +7,29 @@ export type ModelRef = { }; function isHighSignalClaudeModelId(id: string): boolean { - if (!/\bclaude\b/i.test(id)) { + const normalized = id.replace(/[_.]/g, "-"); + if (!/\bclaude\b/i.test(normalized)) { return true; } - if (/\bhaiku\b/i.test(id)) { + if (/\bhaiku\b/i.test(normalized)) { return false; } - if (/\bclaude-3(?:[-.]5|[-.]7)\b/i.test(id)) { + if (/\bclaude-3(?:[-.]5|[-.]7)\b/i.test(normalized)) { return false; } - return true; + const versionMatch = normalized.match(/\bclaude-[a-z0-9-]*?-(\d+)(?:-(\d+))?(?:\b|[-])/i); + if (!versionMatch) { + return false; + } + const major = Number.parseInt(versionMatch[1] ?? "0", 10); + const minor = Number.parseInt(versionMatch[2] ?? "0", 10); + if (major > 4) { + return true; + } + if (major < 4) { + return false; + } + return minor >= 6; } export function isModernModelRef(ref: ModelRef): boolean { diff --git a/src/agents/model-compat.test.ts b/src/agents/model-compat.test.ts index 14093ec474b..c742ae643b7 100644 --- a/src/agents/model-compat.test.ts +++ b/src/agents/model-compat.test.ts @@ -392,18 +392,19 @@ describe("isModernModelRef", () => { describe("isHighSignalLiveModelRef", () => { it("keeps modern higher-signal Claude families", () => { providerRuntimeMocks.resolveProviderModernModelRef.mockImplementation(({ provider, context }) => - provider === "anthropic" && ["claude-sonnet-4-5", "claude-opus-4-5"].includes(context.modelId) + provider === "anthropic" && ["claude-sonnet-4-6", "claude-opus-4-6"].includes(context.modelId) ? true : undefined, ); - expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-sonnet-4-5" })).toBe(true); - expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-opus-4-5" })).toBe(true); + expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-sonnet-4-6" })).toBe(true); + expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-opus-4-6" })).toBe(true); }); it("drops low-signal or old Claude variants even when provider marks them modern", () => { providerRuntimeMocks.resolveProviderModernModelRef.mockReturnValue(true); + expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-opus-4-5" })).toBe(false); expect( isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-haiku-4-5-20251001" }), ).toBe(false); diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index 78f07116264..3b636d5a3b7 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -1818,7 +1818,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { const agentDir = resolveOpenClawAgentDir(); const authStorage = discoverAuthStorage(agentDir); const modelRegistry = discoverModels(authStorage, agentDir); - const anthropic = modelRegistry.find("anthropic", "claude-opus-4-5") as Model | null; + const anthropic = modelRegistry.find("anthropic", "claude-opus-4-6") as Model | null; const zai = modelRegistry.find("zai", "glm-4.7") as Model | null; if (!anthropic || !zai) { @@ -1882,7 +1882,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { await withGatewayLiveProbeTimeout( client.request("sessions.patch", { key: sessionKey, - model: "anthropic/claude-opus-4-5", + model: "anthropic/claude-opus-4-6", }), "zai-fallback: sessions-patch-anthropic", ); @@ -1897,7 +1897,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { client, sessionKey, idempotencyKey: `idem-${randomUUID()}-tool`, - modelKey: "anthropic/claude-opus-4-5", + modelKey: "anthropic/claude-opus-4-6", message: `Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` + `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`, @@ -1906,7 +1906,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { }); assertNoReasoningTags({ text: toolText, - model: "anthropic/claude-opus-4-5", + model: "anthropic/claude-opus-4-6", phase: "zai-fallback-tool", label: "zai-fallback", });