import { writeFileSync } from "node:fs"; import { afterEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../../src/config/config.js"; import { buildMicrosoftSpeechProvider, isCjkDominant, listMicrosoftVoices, } from "./speech-provider.js"; import * as ttsModule from "./tts.js"; const TEST_CFG = {} as OpenClawConfig; describe("listMicrosoftVoices", () => { const originalFetch = globalThis.fetch; afterEach(() => { globalThis.fetch = originalFetch; vi.restoreAllMocks(); }); it("maps Microsoft voice metadata into speech voice options", async () => { globalThis.fetch = vi.fn().mockResolvedValue( new Response( JSON.stringify([ { ShortName: "en-US-AvaNeural", FriendlyName: "Microsoft Ava Online (Natural) - English (United States)", Locale: "en-US", Gender: "Female", VoiceTag: { ContentCategories: ["General"], VoicePersonalities: ["Friendly", "Positive"], }, }, ]), { status: 200 }, ), ) as unknown as typeof globalThis.fetch; const voices = await listMicrosoftVoices(); expect(voices).toEqual([ { id: "en-US-AvaNeural", name: "Microsoft Ava Online (Natural) - English (United States)", category: "General", description: "Friendly, Positive", locale: "en-US", gender: "Female", personalities: ["Friendly", "Positive"], }, ]); }); it("throws on Microsoft voice list failures", async () => { globalThis.fetch = vi .fn() .mockResolvedValue( new Response("nope", { status: 503 }), ) as unknown as typeof globalThis.fetch; await expect(listMicrosoftVoices()).rejects.toThrow("Microsoft voices API error (503)"); }); }); describe("isCjkDominant", () => { it("returns true for Chinese text", () => { expect(isCjkDominant("你好世界")).toBe(true); }); it("returns true for mixed text with majority CJK", () => { expect(isCjkDominant("你好,这是一个测试 hello")).toBe(true); }); it("returns false for English text", () => { expect(isCjkDominant("Hello, this is a test")).toBe(false); }); it("returns false for empty string", () => { expect(isCjkDominant("")).toBe(false); }); it("returns false for mostly English with a few CJK chars", () => { expect(isCjkDominant("This is a long English sentence with one 字")).toBe(false); }); }); describe("buildMicrosoftSpeechProvider", () => { afterEach(() => { vi.restoreAllMocks(); }); it("switches to a Chinese voice for CJK text when no explicit voice override is set", async () => { const provider = buildMicrosoftSpeechProvider(); const edgeSpy = vi.spyOn(ttsModule, "edgeTTS").mockImplementation(async ({ outputPath }) => { writeFileSync(outputPath, Buffer.from([0xff, 0xfb, 0x90, 0x00])); }); await provider.synthesize({ text: "你好,这是一个测试 hello", cfg: TEST_CFG, providerConfig: { enabled: true, voice: "en-US-MichelleNeural", lang: "en-US", outputFormat: "audio-24khz-48kbitrate-mono-mp3", outputFormatConfigured: true, saveSubtitles: false, }, providerOverrides: {}, timeoutMs: 1000, target: "audio-file", }); expect(edgeSpy).toHaveBeenCalledWith( expect.objectContaining({ config: expect.objectContaining({ voice: "zh-CN-XiaoxiaoNeural", lang: "zh-CN", }), }), ); }); it("preserves an explicitly configured English voice for CJK text", async () => { const provider = buildMicrosoftSpeechProvider(); const edgeSpy = vi.spyOn(ttsModule, "edgeTTS").mockImplementation(async ({ outputPath }) => { writeFileSync(outputPath, Buffer.from([0xff, 0xfb, 0x90, 0x00])); }); await provider.synthesize({ text: "你好,这是一个测试 hello", cfg: TEST_CFG, providerConfig: { enabled: true, voice: "en-US-AvaNeural", lang: "en-US", outputFormat: "audio-24khz-48kbitrate-mono-mp3", outputFormatConfigured: true, saveSubtitles: false, }, providerOverrides: {}, timeoutMs: 1000, target: "audio-file", }); expect(edgeSpy).toHaveBeenCalledWith( expect.objectContaining({ config: expect.objectContaining({ voice: "en-US-AvaNeural", lang: "en-US", }), }), ); }); });