From 5bafa6edcfa6340ea15cb944291d57d272d6baef Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 3 Apr 2026 15:02:55 +0100 Subject: [PATCH] fix(auto-reply): align fallback model runtime state --- src/auto-reply/reply/agent-runner-utils.ts | 10 +++-- .../agent-runner.misc.runreplyagent.test.ts | 37 +++++++++++++------ 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/auto-reply/reply/agent-runner-utils.ts b/src/auto-reply/reply/agent-runner-utils.ts index f9579e2fcbd..96353311385 100644 --- a/src/auto-reply/reply/agent-runner-utils.ts +++ b/src/auto-reply/reply/agent-runner-utils.ts @@ -92,13 +92,17 @@ export const formatBunFetchSocketError = (message: string) => { ].join("\n"); }; -export const resolveEnforceFinalTag = (run: FollowupRun["run"], provider: string) => +export const resolveEnforceFinalTag = ( + run: FollowupRun["run"], + provider: string, + model = run.model, +) => Boolean( run.enforceFinalTag || isReasoningTagProvider(provider, { config: run.config, workspaceDir: run.workspaceDir, - modelId: run.model, + modelId: model, }), ); @@ -133,7 +137,7 @@ export function buildEmbeddedRunBaseParams(params: { ownerNumbers: params.run.ownerNumbers, inputProvenance: params.run.inputProvenance, senderIsOwner: params.run.senderIsOwner, - enforceFinalTag: resolveEnforceFinalTag(params.run, params.provider), + enforceFinalTag: resolveEnforceFinalTag(params.run, params.provider, params.model), silentExpected: params.run.silentExpected, provider: params.provider, model: params.model, diff --git a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts index 83b803efa56..8966facdb2d 100644 --- a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts +++ b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts @@ -15,6 +15,19 @@ import type { TemplateContext } from "../templating.js"; import type { FollowupRun, QueueSettings } from "./queue.js"; import { createMockTypingController } from "./test-helpers.js"; +function createCliBackendTestConfig() { + return { + agents: { + defaults: { + cliBackends: { + "claude-cli": {}, + "google-gemini-cli": {}, + }, + }, + }, + }; +} + const runEmbeddedPiAgentMock = vi.fn(); const runCliAgentMock = vi.fn(); const runWithModelFallbackMock = vi.fn(); @@ -150,7 +163,7 @@ describe("runReplyAgent onAgentRunStart", () => { config: provider === "claude-cli" ? { agents: { defaults: { cliBackends: { "claude-cli": {} } } } } - : {}, + : createCliBackendTestConfig(), skillsSnapshot: {}, provider, model, @@ -263,7 +276,7 @@ describe("runReplyAgent authProfileId fallback scoping", () => { messageProvider: "telegram", sessionFile: "/tmp/session.jsonl", workspaceDir: "/tmp", - config: {}, + config: createCliBackendTestConfig(), skillsSnapshot: {}, provider: "anthropic", model: "claude-opus", @@ -1166,7 +1179,7 @@ describe("runReplyAgent messaging tool suppression", () => { messageProvider, sessionFile: "/tmp/session.jsonl", workspaceDir: "/tmp", - config: {}, + config: createCliBackendTestConfig(), skillsSnapshot: {}, provider: "anthropic", model: "claude", @@ -1389,7 +1402,7 @@ describe("runReplyAgent reminder commitment guard", () => { messageProvider: "telegram", sessionFile: "/tmp/session.jsonl", workspaceDir: "/tmp", - config: {}, + config: createCliBackendTestConfig(), skillsSnapshot: {}, provider: "anthropic", model: "claude", @@ -1610,7 +1623,7 @@ describe("runReplyAgent fallback reasoning tags", () => { messageProvider: "whatsapp", sessionFile: "/tmp/session.jsonl", workspaceDir: "/tmp", - config: {}, + config: createCliBackendTestConfig(), skillsSnapshot: {}, provider: "anthropic", model: "claude", @@ -1658,9 +1671,9 @@ describe("runReplyAgent fallback reasoning tags", () => { }); runWithModelFallbackMock.mockImplementationOnce( async ({ run }: RunWithModelFallbackParams) => ({ - result: await run("google-gemini-cli", "gemini-3"), - provider: "google-gemini-cli", - model: "gemini-3", + result: await run("google", "gemini-2.5-pro"), + provider: "google", + model: "gemini-2.5-pro", }), ); @@ -1739,7 +1752,7 @@ describe("runReplyAgent response usage footer", () => { messageProvider: "whatsapp", sessionFile: "/tmp/session.jsonl", workspaceDir: "/tmp", - config: {}, + config: createCliBackendTestConfig(), skillsSnapshot: {}, provider: "anthropic", model: "claude", @@ -1848,7 +1861,7 @@ describe("runReplyAgent transient HTTP retry", () => { messageProvider: "telegram", sessionFile: "/tmp/session.jsonl", workspaceDir: "/tmp", - config: {}, + config: createCliBackendTestConfig(), skillsSnapshot: {}, provider: "anthropic", model: "claude", @@ -1924,7 +1937,7 @@ describe("runReplyAgent billing error classification", () => { messageProvider: "telegram", sessionFile: "/tmp/session.jsonl", workspaceDir: "/tmp", - config: {}, + config: createCliBackendTestConfig(), skillsSnapshot: {}, provider: "anthropic", model: "claude", @@ -1985,7 +1998,7 @@ describe("runReplyAgent mid-turn rate-limit fallback", () => { messageProvider: "telegram", sessionFile: "/tmp/session.jsonl", workspaceDir: "/tmp", - config: {}, + config: createCliBackendTestConfig(), skillsSnapshot: {}, provider: "anthropic", model: "claude",