test: require Claude 4.6 for Anthropic live selection

2026-03-31 16:40:58 +01:00 · 2026-03-31 16:40:58 +01:00 · a842e34f15
parent 8f2e1194b7
commit a842e34f15
5 changed files with 29 additions and 19 deletions
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@ -206,7 +206,7 @@ Live tests are split into two layers so we can isolate failures:
  - `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly)
 - Set `OPENCLAW_LIVE_MODELS=modern` (or `all`, alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke
 - How to select models:
-  - `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet/Haiku 4.5, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
+  - `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
  - `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist
  - or `OPENCLAW_LIVE_MODELS="openai/gpt-5.2,anthropic/claude-opus-4-6,..."` (comma allowlist)
 - How to select providers:
@ -237,7 +237,7 @@ Live tests are split into two layers so we can isolate failures:
 - How to enable:
  - `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly)
 - How to select models:
-  - Default: modern allowlist (Opus/Sonnet/Haiku 4.5, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
+  - Default: modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
  - `OPENCLAW_LIVE_GATEWAY_MODELS=all` is an alias for the modern allowlist
  - Or set `OPENCLAW_LIVE_GATEWAY_MODELS="provider/model"` (or comma list) to narrow
 - How to select providers (avoid “OpenRouter everything”):
--- a/scripts/docker/install-sh-e2e/run.sh
+++ b/scripts/docker/install-sh-e2e/run.sh
@ -410,14 +410,10 @@ run_profile() {
  else
    agent_model="$(set_agent_model "$profile" \
      "anthropic/claude-opus-4-6" \
-      "claude-opus-4-6" \
-      "anthropic/claude-opus-4-5" \
-      "claude-opus-4-5")"
+      "claude-opus-4-6")"
    image_model="$(set_image_model "$profile" \
      "anthropic/claude-opus-4-6" \
-      "claude-opus-4-6" \
-      "anthropic/claude-opus-4-5" \
-      "claude-opus-4-5")"
+      "claude-opus-4-6")"
  fi
  echo "model=$agent_model"
  echo "imageModel=$image_model"
--- a/src/agents/live-model-filter.ts
+++ b/src/agents/live-model-filter.ts
@ -7,16 +7,29 @@ export type ModelRef = {
 };

 function isHighSignalClaudeModelId(id: string): boolean {
-  if (!/\bclaude\b/i.test(id)) {
+  const normalized = id.replace(/[_.]/g, "-");
+  if (!/\bclaude\b/i.test(normalized)) {
    return true;
  }
-  if (/\bhaiku\b/i.test(id)) {
+  if (/\bhaiku\b/i.test(normalized)) {
    return false;
  }
-  if (/\bclaude-3(?:[-.]5|[-.]7)\b/i.test(id)) {
+  if (/\bclaude-3(?:[-.]5|[-.]7)\b/i.test(normalized)) {
    return false;
  }
-  return true;
+  const versionMatch = normalized.match(/\bclaude-[a-z0-9-]*?-(\d+)(?:-(\d+))?(?:\b|[-])/i);
+  if (!versionMatch) {
+    return false;
+  }
+  const major = Number.parseInt(versionMatch[1] ?? "0", 10);
+  const minor = Number.parseInt(versionMatch[2] ?? "0", 10);
+  if (major > 4) {
+    return true;
+  }
+  if (major < 4) {
+    return false;
+  }
+  return minor >= 6;
 }

 export function isModernModelRef(ref: ModelRef): boolean {
--- a/src/agents/model-compat.test.ts
+++ b/src/agents/model-compat.test.ts
@ -392,18 +392,19 @@ describe("isModernModelRef", () => {
 describe("isHighSignalLiveModelRef", () => {
  it("keeps modern higher-signal Claude families", () => {
    providerRuntimeMocks.resolveProviderModernModelRef.mockImplementation(({ provider, context }) =>
-      provider === "anthropic" && ["claude-sonnet-4-5", "claude-opus-4-5"].includes(context.modelId)
+      provider === "anthropic" && ["claude-sonnet-4-6", "claude-opus-4-6"].includes(context.modelId)
        ? true
        : undefined,
    );

-    expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-sonnet-4-5" })).toBe(true);
-    expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-opus-4-5" })).toBe(true);
+    expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-sonnet-4-6" })).toBe(true);
+    expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-opus-4-6" })).toBe(true);
  });

  it("drops low-signal or old Claude variants even when provider marks them modern", () => {
    providerRuntimeMocks.resolveProviderModernModelRef.mockReturnValue(true);

+    expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-opus-4-5" })).toBe(false);
    expect(
      isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-haiku-4-5-20251001" }),
    ).toBe(false);
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@ -1818,7 +1818,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
    const agentDir = resolveOpenClawAgentDir();
    const authStorage = discoverAuthStorage(agentDir);
    const modelRegistry = discoverModels(authStorage, agentDir);
-    const anthropic = modelRegistry.find("anthropic", "claude-opus-4-5") as Model<Api> | null;
+    const anthropic = modelRegistry.find("anthropic", "claude-opus-4-6") as Model<Api> | null;
    const zai = modelRegistry.find("zai", "glm-4.7") as Model<Api> | null;

    if (!anthropic || !zai) {
@ -1882,7 +1882,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
      await withGatewayLiveProbeTimeout(
        client.request("sessions.patch", {
          key: sessionKey,
-          model: "anthropic/claude-opus-4-5",
+          model: "anthropic/claude-opus-4-6",
        }),
        "zai-fallback: sessions-patch-anthropic",
      );
@ -1897,7 +1897,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
        client,
        sessionKey,
        idempotencyKey: `idem-${randomUUID()}-tool`,
-        modelKey: "anthropic/claude-opus-4-5",
+        modelKey: "anthropic/claude-opus-4-6",
        message:
          `Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` +
          `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`,
@ -1906,7 +1906,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
      });
      assertNoReasoningTags({
        text: toolText,
-        model: "anthropic/claude-opus-4-5",
+        model: "anthropic/claude-opus-4-6",
        phase: "zai-fallback-tool",
        label: "zai-fallback",
      });