Merge 229817fdc7 into 392ddb56e2

2026-03-15 22:37:29 +00:00 · 2026-03-15 22:37:29 +00:00 · a1defb236a
parent 392ddb56e2 229817fdc7
commit a1defb236a
2 changed files with 137 additions and 2 deletions
--- a/src/agents/tools/tts-tool.test.ts
+++ b/src/agents/tools/tts-tool.test.ts
@ -4,6 +4,11 @@ vi.mock("../../auto-reply/tokens.js", () => ({
  SILENT_REPLY_TOKEN: "QUIET_TOKEN",
 }));

+const textToSpeechMock = vi.hoisted(() => vi.fn());
+vi.mock("../../tts/tts.js", () => ({
+  textToSpeech: textToSpeechMock,
+}));
+
 const { createTtsTool } = await import("./tts-tool.js");

 describe("createTtsTool", () => {
@ -13,4 +18,79 @@ describe("createTtsTool", () => {
    expect(tool.description).toContain("QUIET_TOKEN");
    expect(tool.description).not.toContain("NO_REPLY");
  });
+
+  it("returns metadata only in deliveryMode=return", async () => {
+    textToSpeechMock.mockResolvedValueOnce({
+      success: true,
+      audioPath: "/tmp/openclaw/tts-test/voice.mp3",
+      provider: "openai",
+      voiceCompatible: false,
+    });
+    const tool = createTtsTool();
+
+    const result = await tool.execute("call-1", {
+      text: "hello",
+      deliveryMode: "return",
+    });
+
+    expect(result.content[0]?.type).toBe("text");
+    expect((result.content[0] as { text: string }).text).not.toContain("MEDIA:");
+    expect(result.details).toMatchObject({
+      ok: true,
+      deliveryMode: "return",
+      audioPath: "/tmp/openclaw/tts-test/voice.mp3",
+      mimeType: "audio/mpeg",
+      sent: false,
+    });
+  });
+
+  it("uses audio/ogg mimeType for voice-compatible return output", async () => {
+    textToSpeechMock.mockResolvedValueOnce({
+      success: true,
+      audioPath: "/tmp/openclaw/tts-test/voice.opus",
+      provider: "openai",
+      voiceCompatible: true,
+    });
+    const tool = createTtsTool();
+
+    const result = await tool.execute("call-voice", {
+      text: "hello",
+      channel: "telegram",
+      deliveryMode: "return",
+    });
+
+    expect(result.details).toMatchObject({
+      ok: true,
+      deliveryMode: "return",
+      mimeType: "audio/ogg",
+    });
+  });
+
+  it("includes ok=true in send mode success details", async () => {
+    textToSpeechMock.mockResolvedValueOnce({
+      success: true,
+      audioPath: "/tmp/openclaw/tts-test/voice.mp3",
+      provider: "openai",
+      voiceCompatible: false,
+    });
+    const tool = createTtsTool();
+
+    const result = await tool.execute("call-send", {
+      text: "hello",
+      deliveryMode: "send",
+    });
+
+    expect(result.details).toMatchObject({ ok: true, deliveryMode: "send" });
+  });
+
+  it("returns validation error for invalid deliveryMode", async () => {
+    const tool = createTtsTool();
+    const result = await tool.execute("call-2", {
+      text: "hello",
+      deliveryMode: "invalid-mode",
+    });
+
+    expect((result.details as { error?: { code?: string } }).error?.code).toBe("VALIDATION_ERROR");
+    expect((result.content[0] as { text: string }).text).toContain("deliveryMode must be one of");
+  });
 });
--- a/src/agents/tools/tts-tool.ts
+++ b/src/agents/tools/tts-tool.ts
@ -12,6 +12,12 @@ const TtsToolSchema = Type.Object({
  channel: Type.Optional(
    Type.String({ description: "Optional channel id to pick output format (e.g. telegram)." }),
  ),
+  deliveryMode: Type.Optional(
+    Type.Union([Type.Literal("send"), Type.Literal("return")], {
+      description:
+        "Delivery mode: 'send' (default) returns MEDIA output for normal delivery; 'return' returns metadata only without MEDIA output.",
+    }),
+  ),
 });

 export function createTtsTool(opts?: {
@ -27,6 +33,25 @@ export function createTtsTool(opts?: {
      const params = args as Record<string, unknown>;
      const text = readStringParam(params, "text", { required: true });
      const channel = readStringParam(params, "channel");
+      const deliveryModeRaw = readStringParam(params, "deliveryMode");
+      const deliveryMode = deliveryModeRaw == null || deliveryModeRaw === "" ? "send" : deliveryModeRaw;
+      if (deliveryMode !== "send" && deliveryMode !== "return") {
+        return {
+          content: [
+            {
+              type: "text",
+              text: "deliveryMode must be one of: send, return",
+            },
+          ],
+          details: {
+            ok: false,
+            error: {
+              code: "VALIDATION_ERROR",
+              message: "deliveryMode must be one of: send, return",
+            },
+          },
+        };
+      }
      const cfg = opts?.config ?? loadConfig();
      const result = await textToSpeech({
        text,
@ -35,6 +60,25 @@ export function createTtsTool(opts?: {
      });

      if (result.success && result.audioPath) {
+        if (deliveryMode === "return") {
+          return {
+            content: [
+              {
+                type: "text",
+                text: "TTS audio generated (return mode).",
+              },
+            ],
+            details: {
+              ok: true,
+              deliveryMode: "return",
+              audioPath: result.audioPath,
+              mimeType: result.voiceCompatible ? "audio/ogg" : "audio/mpeg",
+              sent: false,
+              provider: result.provider,
+            },
+          };
+        }
+
        const lines: string[] = [];
        // Tag Telegram Opus output as a voice bubble instead of a file attachment.
        if (result.voiceCompatible) {
@ -43,7 +87,12 @@ export function createTtsTool(opts?: {
        lines.push(`MEDIA:${result.audioPath}`);
        return {
          content: [{ type: "text", text: lines.join("\n") }],
-          details: { audioPath: result.audioPath, provider: result.provider },
+          details: {
+            ok: true,
+            audioPath: result.audioPath,
+            provider: result.provider,
+            deliveryMode: "send",
+          },
        };
      }

@ -54,7 +103,13 @@ export function createTtsTool(opts?: {
            text: result.error ?? "TTS conversion failed",
          },
        ],
-        details: { error: result.error },
+        details: {
+          ok: false,
+          error: {
+            code: "TTS_GENERATION_FAILED",
+            message: result.error ?? "TTS conversion failed",
+          },
+        },
      };
    },
  };