test: require Claude 4.6 for Anthropic live selection

This commit is contained in:
Peter Steinberger 2026-03-31 16:40:58 +01:00
parent 8f2e1194b7
commit a842e34f15
No known key found for this signature in database
5 changed files with 29 additions and 19 deletions

View File

@ -206,7 +206,7 @@ Live tests are split into two layers so we can isolate failures:
- `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly)
- Set `OPENCLAW_LIVE_MODELS=modern` (or `all`, alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke
- How to select models:
- `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet/Haiku 4.5, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
- `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
- `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist
- or `OPENCLAW_LIVE_MODELS="openai/gpt-5.2,anthropic/claude-opus-4-6,..."` (comma allowlist)
- How to select providers:
@ -237,7 +237,7 @@ Live tests are split into two layers so we can isolate failures:
- How to enable:
- `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly)
- How to select models:
- Default: modern allowlist (Opus/Sonnet/Haiku 4.5, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
- Default: modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
- `OPENCLAW_LIVE_GATEWAY_MODELS=all` is an alias for the modern allowlist
- Or set `OPENCLAW_LIVE_GATEWAY_MODELS="provider/model"` (or comma list) to narrow
- How to select providers (avoid “OpenRouter everything”):

View File

@ -410,14 +410,10 @@ run_profile() {
else
agent_model="$(set_agent_model "$profile" \
"anthropic/claude-opus-4-6" \
"claude-opus-4-6" \
"anthropic/claude-opus-4-5" \
"claude-opus-4-5")"
"claude-opus-4-6")"
image_model="$(set_image_model "$profile" \
"anthropic/claude-opus-4-6" \
"claude-opus-4-6" \
"anthropic/claude-opus-4-5" \
"claude-opus-4-5")"
"claude-opus-4-6")"
fi
echo "model=$agent_model"
echo "imageModel=$image_model"

View File

@ -7,16 +7,29 @@ export type ModelRef = {
};
function isHighSignalClaudeModelId(id: string): boolean {
if (!/\bclaude\b/i.test(id)) {
const normalized = id.replace(/[_.]/g, "-");
if (!/\bclaude\b/i.test(normalized)) {
return true;
}
if (/\bhaiku\b/i.test(id)) {
if (/\bhaiku\b/i.test(normalized)) {
return false;
}
if (/\bclaude-3(?:[-.]5|[-.]7)\b/i.test(id)) {
if (/\bclaude-3(?:[-.]5|[-.]7)\b/i.test(normalized)) {
return false;
}
return true;
const versionMatch = normalized.match(/\bclaude-[a-z0-9-]*?-(\d+)(?:-(\d+))?(?:\b|[-])/i);
if (!versionMatch) {
return false;
}
const major = Number.parseInt(versionMatch[1] ?? "0", 10);
const minor = Number.parseInt(versionMatch[2] ?? "0", 10);
if (major > 4) {
return true;
}
if (major < 4) {
return false;
}
return minor >= 6;
}
export function isModernModelRef(ref: ModelRef): boolean {

View File

@ -392,18 +392,19 @@ describe("isModernModelRef", () => {
describe("isHighSignalLiveModelRef", () => {
it("keeps modern higher-signal Claude families", () => {
providerRuntimeMocks.resolveProviderModernModelRef.mockImplementation(({ provider, context }) =>
provider === "anthropic" && ["claude-sonnet-4-5", "claude-opus-4-5"].includes(context.modelId)
provider === "anthropic" && ["claude-sonnet-4-6", "claude-opus-4-6"].includes(context.modelId)
? true
: undefined,
);
expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-sonnet-4-5" })).toBe(true);
expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-opus-4-5" })).toBe(true);
expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-sonnet-4-6" })).toBe(true);
expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-opus-4-6" })).toBe(true);
});
it("drops low-signal or old Claude variants even when provider marks them modern", () => {
providerRuntimeMocks.resolveProviderModernModelRef.mockReturnValue(true);
expect(isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-opus-4-5" })).toBe(false);
expect(
isHighSignalLiveModelRef({ provider: "anthropic", id: "claude-haiku-4-5-20251001" }),
).toBe(false);

View File

@ -1818,7 +1818,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
const agentDir = resolveOpenClawAgentDir();
const authStorage = discoverAuthStorage(agentDir);
const modelRegistry = discoverModels(authStorage, agentDir);
const anthropic = modelRegistry.find("anthropic", "claude-opus-4-5") as Model<Api> | null;
const anthropic = modelRegistry.find("anthropic", "claude-opus-4-6") as Model<Api> | null;
const zai = modelRegistry.find("zai", "glm-4.7") as Model<Api> | null;
if (!anthropic || !zai) {
@ -1882,7 +1882,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
await withGatewayLiveProbeTimeout(
client.request("sessions.patch", {
key: sessionKey,
model: "anthropic/claude-opus-4-5",
model: "anthropic/claude-opus-4-6",
}),
"zai-fallback: sessions-patch-anthropic",
);
@ -1897,7 +1897,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
client,
sessionKey,
idempotencyKey: `idem-${randomUUID()}-tool`,
modelKey: "anthropic/claude-opus-4-5",
modelKey: "anthropic/claude-opus-4-6",
message:
`Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` +
`Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`,
@ -1906,7 +1906,7 @@ describeLive("gateway live (dev agent, profile keys)", () => {
});
assertNoReasoningTags({
text: toolText,
model: "anthropic/claude-opus-4-5",
model: "anthropic/claude-opus-4-6",
phase: "zai-fallback-tool",
label: "zai-fallback",
});