From 69a0a6c8472438db82b8706acbcbacc76f03dee6 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 24 Mar 2026 10:27:24 -0700 Subject: [PATCH] fix: tighten ACP final fallback semantics (#53692) (thanks @w-sss) --- CHANGELOG.md | 1 + .../reply/dispatch-acp-delivery.test.ts | 20 +++++ src/auto-reply/reply/dispatch-acp.test.ts | 78 ++++++++++++------- src/auto-reply/reply/dispatch-acp.ts | 16 ++-- 4 files changed, 82 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd20a8d3132..ea93ac741ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- ACP/direct chats: always deliver a terminal ACP result when final TTS does not yield audio, even if block text already streamed earlier, and skip redundant empty-text final synthesis. (#53692) Thanks @w-sss. - Doctor/image generation: seed migrated legacy Nano Banana Google provider config with the `/v1beta` API root and an empty model list so `openclaw doctor --fix` completes and the migrated native Google image path keeps hitting the correct endpoint. (#53757) Thanks @mahopan. - Security/skills: validate skill installer metadata against strict regex allowlists per package manager, sanitize skill metadata for terminal output, add URL protocol allowlisting in markdown preview and skill homepage links, warn on non-bundled skill install sources, and remove unsafe `file://` workspace links. (#53471) Thanks @BunsDev. - Feishu/docx block ordering: preserve the document tree order from `docx.document.convert` when inserting blocks, fixing heading/paragraph/list misordering in newly written Feishu documents. (#40524) Thanks @TaoXieSZ. diff --git a/src/auto-reply/reply/dispatch-acp-delivery.test.ts b/src/auto-reply/reply/dispatch-acp-delivery.test.ts index ce02f98289d..b7193a13ea5 100644 --- a/src/auto-reply/reply/dispatch-acp-delivery.test.ts +++ b/src/auto-reply/reply/dispatch-acp-delivery.test.ts @@ -42,6 +42,26 @@ function createCoordinator(onReplyStart?: (...args: unknown[]) => Promise) } describe("createAcpDispatchDeliveryCoordinator", () => { + it("bypasses TTS when skipTts is requested", async () => { + const dispatcher = createDispatcher(); + const coordinator = createAcpDispatchDeliveryCoordinator({ + cfg: createAcpTestConfig(), + ctx: buildTestCtx({ + Provider: "discord", + Surface: "discord", + SessionKey: "agent:codex-acp:session-1", + }), + dispatcher, + inboundAudio: false, + shouldRouteToOriginating: false, + }); + + await coordinator.deliver("final", { text: "hello" }, { skipTts: true }); + + expect(ttsMocks.maybeApplyTtsToPayload).not.toHaveBeenCalled(); + expect(dispatcher.sendFinalReply).toHaveBeenCalledWith({ text: "hello" }); + }); + it("starts reply lifecycle only once when called directly and through deliver", async () => { const onReplyStart = vi.fn(async () => {}); const coordinator = createCoordinator(onReplyStart); diff --git a/src/auto-reply/reply/dispatch-acp.test.ts b/src/auto-reply/reply/dispatch-acp.test.ts index d5e89fbc770..5c53d80940b 100644 --- a/src/auto-reply/reply/dispatch-acp.test.ts +++ b/src/auto-reply/reply/dispatch-acp.test.ts @@ -436,14 +436,12 @@ describe("tryDispatchAcpReply", () => { ); }); - it("delivers accumulated block text as fallback when TTS synthesis returns no media", async () => { + it("delivers final fallback text even when routed block text already existed", async () => { setReadyAcpResolution(); - // Configure TTS mode as "final" but TTS synthesis returns no mediaUrl ttsMocks.resolveTtsConfig.mockReturnValue({ mode: "final" }); - // Mock TTS to return no mediaUrl for this test only (use Once to avoid cross-test leak) - ttsMocks.maybeApplyTtsToPayload.mockResolvedValueOnce( - {} as ReturnType, - ); + ttsMocks.maybeApplyTtsToPayload + .mockResolvedValueOnce({ text: "CODEX_OK" }) + .mockResolvedValueOnce({} as ReturnType); managerMocks.runTurn.mockImplementation( async ({ onEvent }: { onEvent: (event: unknown) => Promise }) => { @@ -459,14 +457,11 @@ describe("tryDispatchAcpReply", () => { shouldRouteToOriginating: true, }); - // Should deliver final text as fallback when TTS produced no media. - // Note: ACP sends block first (during flush on done), then final fallback. - // So routeReply is called twice: 1 for block + 1 for final. - expect(result?.counts.block).toBeGreaterThanOrEqual(1); + expect(result?.counts.block).toBe(1); expect(result?.counts.final).toBe(1); expect(routeMocks.routeReply).toHaveBeenCalledTimes(2); - // Verify final delivery contains the expected text - expect(routeMocks.routeReply).toHaveBeenCalledWith( + expect(routeMocks.routeReply).toHaveBeenNthCalledWith( + 2, expect.objectContaining({ payload: expect.objectContaining({ text: "CODEX_OK", @@ -475,12 +470,15 @@ describe("tryDispatchAcpReply", () => { ); }); - it("does not duplicate delivery when blocks were already routed", async () => { + it("does not add text fallback when final TTS already delivered audio", async () => { setReadyAcpResolution(); - // Configure TTS mode as "none" - should skip TTS for final delivery - ttsMocks.resolveTtsConfig.mockReturnValue({ mode: "none" }); - - // Simulate normal flow where projector routes blocks + ttsMocks.resolveTtsConfig.mockReturnValue({ mode: "final" }); + ttsMocks.maybeApplyTtsToPayload + .mockResolvedValueOnce({ text: "Task completed" }) + .mockResolvedValueOnce({ + mediaUrl: "https://example.com/final.mp3", + audioAsVoice: true, + } as Awaited>); managerMocks.runTurn.mockImplementation( async ({ onEvent }: { onEvent: (event: unknown) => Promise }) => { await onEvent({ type: "text_delta", text: "Task completed", tag: "agent_message_chunk" }); @@ -495,12 +493,18 @@ describe("tryDispatchAcpReply", () => { shouldRouteToOriginating: true, }); - // Should NOT deliver duplicate final text when blocks were already routed - // The block delivery should be sufficient - expect(result?.counts.block).toBeGreaterThanOrEqual(1); - expect(result?.counts.final).toBe(0); - // Verify routeReply was called for block, not for duplicate final - expect(routeMocks.routeReply).toHaveBeenCalledTimes(1); + expect(result?.counts.block).toBe(1); + expect(result?.counts.final).toBe(1); + expect(routeMocks.routeReply).toHaveBeenCalledTimes(2); + expect(routeMocks.routeReply).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + payload: expect.objectContaining({ + mediaUrl: "https://example.com/final.mp3", + audioAsVoice: true, + }), + }), + ); }); it("skips fallback when TTS mode is all (blocks already processed with TTS)", async () => { @@ -522,9 +526,31 @@ describe("tryDispatchAcpReply", () => { shouldRouteToOriginating: true, }); - // Should NOT trigger fallback for ttsMode="all" to avoid duplicate TTS + expect(result?.counts.block).toBe(1); expect(result?.counts.final).toBe(0); - // Note: maybeApplyTtsToPayload is called during block delivery, not in the fallback path - // We just verify that no final delivery occurred + expect(routeMocks.routeReply).toHaveBeenCalledTimes(1); + }); + + it("skips final TTS and fallback when no block text was accumulated", async () => { + setReadyAcpResolution(); + ttsMocks.resolveTtsConfig.mockReturnValue({ mode: "final" }); + + managerMocks.runTurn.mockImplementation( + async ({ onEvent }: { onEvent: (event: unknown) => Promise }) => { + await onEvent({ type: "done" }); + }, + ); + + const { dispatcher } = createDispatcher(); + const result = await runDispatch({ + bodyForAgent: "run acp", + dispatcher, + shouldRouteToOriginating: true, + }); + + expect(result?.counts.block).toBe(0); + expect(result?.counts.final).toBe(0); + expect(routeMocks.routeReply).not.toHaveBeenCalled(); + expect(ttsMocks.maybeApplyTtsToPayload).not.toHaveBeenCalled(); }); }); diff --git a/src/auto-reply/reply/dispatch-acp.ts b/src/auto-reply/reply/dispatch-acp.ts index defabf8a860..47e2d966ef7 100644 --- a/src/auto-reply/reply/dispatch-acp.ts +++ b/src/auto-reply/reply/dispatch-acp.ts @@ -316,11 +316,12 @@ export async function tryDispatchAcpReply(params: { await projector.flush(true); const ttsMode = resolveTtsConfig(params.cfg).mode ?? "final"; const accumulatedBlockText = delivery.getAccumulatedBlockText(); + const hasAccumulatedBlockText = accumulatedBlockText.trim().length > 0; const routedCounts = delivery.getRoutedCounts(); // Attempt final TTS synthesis for ttsMode="final" (independent of delivery status). // This ensures routed ACP flows still get final audio even after block delivery. let ttsSucceeded = false; - if (ttsMode === "final") { + if (ttsMode === "final" && hasAccumulatedBlockText) { try { const ttsSyntheticReply = await maybeApplyTtsToPayload({ payload: { text: accumulatedBlockText }, @@ -349,16 +350,17 @@ export async function tryDispatchAcpReply(params: { // TTS failed, fall through to text fallback } } - // Only attempt text fallback if no delivery has happened yet. - // For routed flows, check routedCounts (block or final) to detect prior successful delivery. - // For non-routed flows, we cannot reliably detect delivery success (blockCount increments - // before send), so we skip the fallback guard to allow recovery when block delivery fails. + // Only attempt text fallback if a terminal delivery has not happened yet. + // Block routing alone is not enough here because some ACP parent surfaces only expose + // terminal replies, so routed block text can still leave the user with no visible result. + // For non-routed flows, we still skip the final-only guard because dispatcher counts are not + // tracked here and we want to recover when earlier block sends fail. // Skip fallback for ttsMode="all" because blocks were already processed with TTS. const shouldSkipTextFallback = ttsMode === "all" || ttsSucceeded || - (params.shouldRouteToOriginating && (routedCounts.block > 0 || routedCounts.final > 0)); - if (!shouldSkipTextFallback && accumulatedBlockText.trim()) { + (params.shouldRouteToOriginating && routedCounts.final > 0); + if (!shouldSkipTextFallback && hasAccumulatedBlockText) { // Fallback to text-only delivery (no TTS). // For routed flows, use delivery.deliver with skipTts to bypass TTS re-entry. // For non-routed flows, use dispatcher directly to bypass TTS.