From db0db3abdba33506367341850c023b3c2ba823ac Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 5 Apr 2026 18:33:58 +0100 Subject: [PATCH] test: make talk gateway fixtures provider agnostic --- extensions/elevenlabs/speech-provider.test.ts | 2 +- .../doctor-legacy-config.migrations.test.ts | 4 +- .../config.legacy-config-snapshot.test.ts | 4 +- src/config/talk.normalize.test.ts | 2 +- src/gateway/server-methods/talk.test.ts | 22 +- src/gateway/server.talk-config.test.ts | 283 ++++++++++++------ src/gateway/test-helpers.mocks.ts | 31 +- 7 files changed, 203 insertions(+), 145 deletions(-) diff --git a/extensions/elevenlabs/speech-provider.test.ts b/extensions/elevenlabs/speech-provider.test.ts index 512e060133f..cfce550ccd3 100644 --- a/extensions/elevenlabs/speech-provider.test.ts +++ b/extensions/elevenlabs/speech-provider.test.ts @@ -6,7 +6,7 @@ describe("elevenlabs speech provider", () => { const cases = [ { value: "pMsXgVXv3BLzUgSXRplE", expected: true }, { value: "21m00Tcm4TlvDq8ikWAM", expected: true }, - { value: "EXAVITQu4vr4xnSDxMaL", expected: true }, + { value: "VoiceAlias1234567890", expected: true }, { value: "a1b2c3d4e5", expected: true }, { value: "a".repeat(40), expected: true }, { value: "", expected: false }, diff --git a/src/commands/doctor-legacy-config.migrations.test.ts b/src/commands/doctor-legacy-config.migrations.test.ts index b6200982dfe..dea2067133a 100644 --- a/src/commands/doctor-legacy-config.migrations.test.ts +++ b/src/commands/doctor-legacy-config.migrations.test.ts @@ -736,7 +736,7 @@ describe("normalizeCompatibilityConfigValues", () => { talk: { voiceId: "voice-123", voiceAliases: { - Clawd: "EXAVITQu4vr4xnSDxMaL", + Clawd: "VoiceAlias1234567890", }, modelId: "eleven_v3", outputFormat: "pcm_44100", @@ -751,7 +751,7 @@ describe("normalizeCompatibilityConfigValues", () => { elevenlabs: { voiceId: "voice-123", voiceAliases: { - Clawd: "EXAVITQu4vr4xnSDxMaL", + Clawd: "VoiceAlias1234567890", }, modelId: "eleven_v3", outputFormat: "pcm_44100", diff --git a/src/config/config.legacy-config-snapshot.test.ts b/src/config/config.legacy-config-snapshot.test.ts index ab16446700a..21c8e7f05a9 100644 --- a/src/config/config.legacy-config-snapshot.test.ts +++ b/src/config/config.legacy-config-snapshot.test.ts @@ -8,7 +8,7 @@ describe("talk.voiceAliases", () => { await writeOpenClawConfig(home, { talk: { voiceAliases: { - Clawd: "EXAVITQu4vr4xnSDxMaL", + Clawd: "VoiceAlias1234567890", Roger: "CwhRBWXzGAHq8TQ4Fs17", }, }, @@ -19,7 +19,7 @@ describe("talk.voiceAliases", () => { expect(snap.valid).toBe(true); expect(snap.legacyIssues.some((issue) => issue.path === "talk")).toBe(true); expect(snap.sourceConfig.talk?.providers?.elevenlabs?.voiceAliases).toEqual({ - Clawd: "EXAVITQu4vr4xnSDxMaL", + Clawd: "VoiceAlias1234567890", Roger: "CwhRBWXzGAHq8TQ4Fs17", }); }); diff --git a/src/config/talk.normalize.test.ts b/src/config/talk.normalize.test.ts index 4720f892c61..4e84103b237 100644 --- a/src/config/talk.normalize.test.ts +++ b/src/config/talk.normalize.test.ts @@ -23,7 +23,7 @@ describe("talk normalization", () => { it("keeps core Talk normalization generic and ignores legacy provider-flat fields", () => { const normalized = normalizeTalkSection({ voiceId: "voice-123", - voiceAliases: { Clawd: "EXAVITQu4vr4xnSDxMaL" }, // pragma: allowlist secret + voiceAliases: { Clawd: "VoiceAlias1234567890" }, modelId: "eleven_v3", outputFormat: "pcm_44100", apiKey: "secret-key", // pragma: allowlist secret diff --git a/src/gateway/server-methods/talk.test.ts b/src/gateway/server-methods/talk.test.ts index 6d203208518..f96b7c15bf6 100644 --- a/src/gateway/server-methods/talk.test.ts +++ b/src/gateway/server-methods/talk.test.ts @@ -27,11 +27,11 @@ vi.mock("../../tts/tts.js", () => ({ function createTalkConfig(apiKey: unknown): OpenClawConfig { return { talk: { - provider: "elevenlabs", + provider: "acme", providers: { - elevenlabs: { + acme: { apiKey, - voiceId: "voice-default", + voiceId: "stub-default-voice", }, }, }, @@ -44,11 +44,11 @@ describe("talk.speak handler", () => { }); it("uses the active runtime config snapshot instead of the raw config snapshot", async () => { - const runtimeConfig = createTalkConfig("env-elevenlabs-key"); + const runtimeConfig = createTalkConfig("env-acme-key"); const diskConfig = createTalkConfig({ source: "env", provider: "default", - id: "ELEVENLABS_API_KEY", + id: "ACME_SPEECH_API_KEY", }); mocks.loadConfig.mockReturnValue(runtimeConfig); @@ -59,8 +59,8 @@ describe("talk.speak handler", () => { config: diskConfig, }); mocks.getSpeechProvider.mockReturnValue({ - id: "elevenlabs", - label: "ElevenLabs", + id: "acme", + label: "Acme Speech", resolveTalkConfig: ({ talkProviderConfig, }: { @@ -69,11 +69,11 @@ describe("talk.speak handler", () => { }); mocks.synthesizeSpeech.mockImplementation( async ({ cfg }: { cfg: OpenClawConfig; text: string; disableFallback: boolean }) => { - expect(cfg.messages?.tts?.provider).toBe("elevenlabs"); - expect(cfg.messages?.tts?.providers?.elevenlabs?.apiKey).toBe("env-elevenlabs-key"); + expect(cfg.messages?.tts?.provider).toBe("acme"); + expect(cfg.messages?.tts?.providers?.acme?.apiKey).toBe("env-acme-key"); return { success: true, - provider: "elevenlabs", + provider: "acme", audioBuffer: Buffer.from([1, 2, 3]), outputFormat: "mp3", voiceCompatible: false, @@ -103,7 +103,7 @@ describe("talk.speak handler", () => { expect(respond).toHaveBeenCalledWith( true, expect.objectContaining({ - provider: "elevenlabs", + provider: "acme", audioBase64: Buffer.from([1, 2, 3]).toString("base64"), outputFormat: "mp3", mimeType: "audio/mpeg", diff --git a/src/gateway/server.talk-config.test.ts b/src/gateway/server.talk-config.test.ts index 858f2deb497..8f65139c5b7 100644 --- a/src/gateway/server.talk-config.test.ts +++ b/src/gateway/server.talk-config.test.ts @@ -56,6 +56,10 @@ const TALK_CONFIG_DEVICE_PATH = path.join( `openclaw-talk-config-device-${process.pid}.json`, ); const TALK_CONFIG_DEVICE = loadOrCreateDeviceIdentity(TALK_CONFIG_DEVICE_PATH); +const GENERIC_TALK_PROVIDER_ID = "acme"; +const GENERIC_TALK_API_ENV = "ACME_SPEECH_API_KEY"; +const DEFAULT_STUB_VOICE_ID = "stub-default-voice"; +const ALIAS_STUB_VOICE_ID = "VoiceAlias1234567890"; async function createFreshOperatorDevice(scopes: string[], nonce: string) { const signedAtMs = Date.now(); @@ -90,18 +94,21 @@ async function connectOperator(ws: GatewaySocket, scopes: string[]) { } async function writeTalkConfig(config: { + provider?: string; apiKey?: string | { source: "env" | "file" | "exec"; provider: string; id: string }; voiceId?: string; silenceTimeoutMs?: number; }) { const { writeConfigFile } = await import("../config/config.js"); + const providerId = config.provider ?? GENERIC_TALK_PROVIDER_ID; await writeConfigFile({ talk: { + provider: providerId, silenceTimeoutMs: config.silenceTimeoutMs, providers: config.apiKey !== undefined || config.voiceId !== undefined ? { - elevenlabs: { + [providerId]: { ...(config.apiKey !== undefined ? { apiKey: config.apiKey } : {}), ...(config.voiceId !== undefined ? { voiceId: config.voiceId } : {}), }, @@ -147,6 +154,22 @@ async function invokeTalkSpeakDirect(params: Record) { return response; } +async function withSpeechProviders( + speechProviders: NonNullable["speechProviders"]>, + run: () => Promise, +): Promise { + const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry(); + setActivePluginRegistry({ + ...createEmptyPluginRegistry(), + speechProviders, + }); + try { + return await run(); + } finally { + setActivePluginRegistry(previousRegistry); + } +} + function expectTalkConfig( talk: TalkConfig | undefined, expected: { @@ -175,8 +198,9 @@ describe("gateway talk.config", () => { const { writeConfigFile } = await import("../config/config.js"); await writeConfigFile({ talk: { + provider: GENERIC_TALK_PROVIDER_ID, providers: { - elevenlabs: { + [GENERIC_TALK_PROVIDER_ID]: { voiceId: "voice-123", apiKey: "secret-key-abc", // pragma: allowlist secret }, @@ -196,7 +220,7 @@ describe("gateway talk.config", () => { const res = await fetchTalkConfig(ws); expect(res.ok).toBe(true); expectTalkConfig(res.payload?.config?.talk, { - provider: "elevenlabs", + provider: GENERIC_TALK_PROVIDER_ID, voiceId: "voice-123", apiKey: "__OPENCLAW_REDACTED__", silenceTimeoutMs: 1500, @@ -239,7 +263,7 @@ describe("gateway talk.config", () => { const res = await fetchTalkConfig(ws, { includeSecrets: true }); expect(res.ok).toBe(true); expectTalkConfig(res.payload?.config?.talk, { - provider: "elevenlabs", + provider: GENERIC_TALK_PROVIDER_ID, apiKey: "secret-key-abc", }); }); @@ -250,11 +274,11 @@ describe("gateway talk.config", () => { apiKey: { source: "env", provider: "default", - id: "ELEVENLABS_API_KEY", + id: GENERIC_TALK_API_ENV, }, }); - await withEnvAsync({ ELEVENLABS_API_KEY: "env-elevenlabs-key" }, async () => { + await withEnvAsync({ [GENERIC_TALK_API_ENV]: "env-acme-key" }, async () => { await withServer(async (ws) => { await connectOperator(ws, ["operator.read", "operator.write", "operator.talk.secrets"]); const res = await fetchTalkConfig(ws, { includeSecrets: true }); @@ -263,10 +287,10 @@ describe("gateway talk.config", () => { const secretRef = { source: "env", provider: "default", - id: "ELEVENLABS_API_KEY", + id: GENERIC_TALK_API_ENV, } satisfies SecretRef; expectTalkConfig(res.payload?.config?.talk, { - provider: "elevenlabs", + provider: GENERIC_TALK_PROVIDER_ID, apiKey: secretRef, }); }); @@ -274,43 +298,57 @@ describe("gateway talk.config", () => { }); it("resolves plugin-owned Talk defaults before redaction", async () => { - const { writeConfigFile } = await import("../config/config.js"); - await writeConfigFile({ - talk: { - provider: "elevenlabs", - providers: { - elevenlabs: { - voiceId: "voice-from-config", - }, - }, - }, + await writeTalkConfig({ + provider: GENERIC_TALK_PROVIDER_ID, + voiceId: "voice-from-config", }); - await withEnvAsync({ ELEVENLABS_API_KEY: "env-elevenlabs-key" }, async () => { - await withServer(async (ws) => { - await connectOperator(ws, ["operator.read"]); - const res = await fetchTalkConfig(ws); - expect(res.ok, JSON.stringify(res.error)).toBe(true); - expectTalkConfig(res.payload?.config?.talk, { - provider: "elevenlabs", - voiceId: "voice-from-config", - apiKey: "__OPENCLAW_REDACTED__", - }); - }); + await withEnvAsync({ [GENERIC_TALK_API_ENV]: "env-acme-key" }, async () => { + await withSpeechProviders( + [ + { + pluginId: "acme-talk-defaults-test", + source: "test", + provider: { + id: GENERIC_TALK_PROVIDER_ID, + label: "Acme Speech", + isConfigured: () => true, + resolveTalkConfig: ({ talkProviderConfig }) => ({ + ...talkProviderConfig, + apiKey: + typeof process.env[GENERIC_TALK_API_ENV] === "string" + ? process.env[GENERIC_TALK_API_ENV] + : undefined, + }), + synthesize: async () => ({ + audioBuffer: Buffer.from([1]), + outputFormat: "mp3", + fileExtension: ".mp3", + voiceCompatible: false, + }), + }, + }, + ], + async () => { + await withServer(async (ws) => { + await connectOperator(ws, ["operator.read"]); + const res = await fetchTalkConfig(ws); + expect(res.ok, JSON.stringify(res.error)).toBe(true); + expectTalkConfig(res.payload?.config?.talk, { + provider: GENERIC_TALK_PROVIDER_ID, + voiceId: "voice-from-config", + apiKey: "__OPENCLAW_REDACTED__", + }); + }); + }, + ); }); }); it("returns canonical provider talk payloads", async () => { - const { writeConfigFile } = await import("../config/config.js"); - await writeConfigFile({ - talk: { - provider: "elevenlabs", - providers: { - elevenlabs: { - voiceId: "voice-normalized", - }, - }, - }, + await writeTalkConfig({ + provider: GENERIC_TALK_PROVIDER_ID, + voiceId: "voice-normalized", }); await withServer(async (ws) => { @@ -318,7 +356,7 @@ describe("gateway talk.config", () => { const res = await fetchTalkConfig(ws); expect(res.ok).toBe(true); expectTalkConfig(res.payload?.config?.talk, { - provider: "elevenlabs", + provider: GENERIC_TALK_PROVIDER_ID, voiceId: "voice-normalized", }); }); @@ -385,9 +423,9 @@ describe("gateway talk.config", () => { providers: { elevenlabs: { apiKey: "elevenlabs-talk-key", // pragma: allowlist secret - voiceId: "voice-default", + voiceId: DEFAULT_STUB_VOICE_ID, voiceAliases: { - Clawd: "EXAVITQu4vr4xnSDxMaL", + Clawd: ALIAS_STUB_VOICE_ID, }, }, }, @@ -407,12 +445,75 @@ describe("gateway talk.config", () => { globalThis.fetch = withFetchPreconnect(fetchMock); try { - const res = await invokeTalkSpeakDirect({ - text: "Hello from talk mode.", - voiceId: "clawd", - outputFormat: "pcm_44100", - latencyTier: 3, - }); + const res = await withSpeechProviders( + [ + { + pluginId: "elevenlabs-test", + source: "test", + provider: { + id: "elevenlabs", + label: "ElevenLabs", + isConfigured: () => true, + resolveTalkOverrides: ({ params }) => ({ + ...(typeof params.voiceId === "string" && params.voiceId.trim().length > 0 + ? { voiceId: params.voiceId.trim() } + : {}), + ...(typeof params.modelId === "string" && params.modelId.trim().length > 0 + ? { modelId: params.modelId.trim() } + : {}), + ...(typeof params.outputFormat === "string" && params.outputFormat.trim().length > 0 + ? { outputFormat: params.outputFormat.trim() } + : {}), + ...(typeof params.latencyTier === "number" + ? { latencyTier: params.latencyTier } + : {}), + }), + synthesize: async (req) => { + const config = req.providerConfig as Record; + const overrides = (req.providerOverrides ?? {}) as Record; + const voiceId = + (typeof overrides.voiceId === "string" && overrides.voiceId.trim().length > 0 + ? overrides.voiceId.trim() + : undefined) ?? + (typeof config.voiceId === "string" && config.voiceId.trim().length > 0 + ? config.voiceId.trim() + : undefined) ?? + DEFAULT_STUB_VOICE_ID; + const outputFormat = + typeof overrides.outputFormat === "string" && + overrides.outputFormat.trim().length > 0 + ? overrides.outputFormat.trim() + : "mp3"; + const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`); + url.searchParams.set("output_format", outputFormat); + const response = await globalThis.fetch(url.href, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + text: req.text, + ...(typeof overrides.latencyTier === "number" + ? { latency_optimization_level: overrides.latencyTier } + : {}), + }), + }); + return { + audioBuffer: Buffer.from(await response.arrayBuffer()), + outputFormat, + fileExtension: outputFormat.startsWith("pcm") ? ".pcm" : ".mp3", + voiceCompatible: false, + }; + }, + }, + }, + ], + async () => + await invokeTalkSpeakDirect({ + text: "Hello from talk mode.", + voiceId: "clawd", + outputFormat: "pcm_44100", + latencyTier: 3, + }), + ); expect(res?.ok, JSON.stringify(res?.error)).toBe(true); expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("elevenlabs"); expect((res?.payload as TalkSpeakPayload | undefined)?.outputFormat).toBe("pcm_44100"); @@ -421,7 +522,7 @@ describe("gateway talk.config", () => { ); expect(fetchMock).toHaveBeenCalled(); - expect(fetchUrl).toContain("/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL"); + expect(fetchUrl).toContain(`/v1/text-to-speech/${ALIAS_STUB_VOICE_ID}`); expect(fetchUrl).toContain("output_format=pcm_44100"); const init = requestInits[0]; const bodyText = typeof init?.body === "string" ? init.body : "{}"; @@ -446,10 +547,8 @@ describe("gateway talk.config", () => { }); await withServer(async () => { - const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry(); - setActivePluginRegistry({ - ...createEmptyPluginRegistry(), - speechProviders: [ + await withSpeechProviders( + [ { pluginId: "acme-plugin", source: "test", @@ -466,19 +565,17 @@ describe("gateway talk.config", () => { }, }, ], - }); - try { - const res = await invokeTalkSpeakDirect({ - text: "Hello from plugin talk mode.", - }); - expect(res?.ok, JSON.stringify(res?.error)).toBe(true); - expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("acme"); - expect((res?.payload as TalkSpeakPayload | undefined)?.audioBase64).toBe( - Buffer.from([7, 8, 9]).toString("base64"), - ); - } finally { - setActivePluginRegistry(previousRegistry); - } + async () => { + const res = await invokeTalkSpeakDirect({ + text: "Hello from plugin talk mode.", + }); + expect(res?.ok, JSON.stringify(res?.error)).toBe(true); + expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("acme"); + expect((res?.payload as TalkSpeakPayload | undefined)?.audioBase64).toBe( + Buffer.from([7, 8, 9]).toString("base64"), + ); + }, + ); }); }); @@ -511,10 +608,8 @@ describe("gateway talk.config", () => { }, }); - const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry(); - setActivePluginRegistry({ - ...createEmptyPluginRegistry(), - speechProviders: [ + await withSpeechProviders( + [ { pluginId: "acme-plugin", source: "test", @@ -528,18 +623,15 @@ describe("gateway talk.config", () => { }, }, ], - }); - - try { - const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." }); - expect(res?.ok).toBe(false); - expect(res?.error?.details).toEqual({ - reason: "synthesis_failed", - fallbackEligible: false, - }); - } finally { - setActivePluginRegistry(previousRegistry); - } + async () => { + const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." }); + expect(res?.ok).toBe(false); + expect(res?.error?.details).toEqual({ + reason: "synthesis_failed", + fallbackEligible: false, + }); + }, + ); }); it("rejects empty audio results as invalid_audio_result", async () => { @@ -555,10 +647,8 @@ describe("gateway talk.config", () => { }, }); - const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry(); - setActivePluginRegistry({ - ...createEmptyPluginRegistry(), - speechProviders: [ + await withSpeechProviders( + [ { pluginId: "acme-plugin", source: "test", @@ -575,17 +665,14 @@ describe("gateway talk.config", () => { }, }, ], - }); - - try { - const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." }); - expect(res?.ok).toBe(false); - expect(res?.error?.details).toEqual({ - reason: "invalid_audio_result", - fallbackEligible: false, - }); - } finally { - setActivePluginRegistry(previousRegistry); - } + async () => { + const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." }); + expect(res?.ok).toBe(false); + expect(res?.error?.details).toEqual({ + reason: "invalid_audio_result", + fallbackEligible: false, + }); + }, + ); }); }); diff --git a/src/gateway/test-helpers.mocks.ts b/src/gateway/test-helpers.mocks.ts index c4bb6c80e56..54f45884382 100644 --- a/src/gateway/test-helpers.mocks.ts +++ b/src/gateway/test-helpers.mocks.ts @@ -263,7 +263,7 @@ const createStubPluginRegistry = (): PluginRegistry => ({ provider: createStubSpeechProvider({ id: "elevenlabs", label: "ElevenLabs", - voices: ["EXAVITQu4vr4xnSDxMaL", "voice-default"], + voices: ["stub-default-voice", "stub-alt-voice"], resolveTalkOverrides: ({ params }) => ({ ...(trimString(params.voiceId) == null ? {} : { voiceId: trimString(params.voiceId) }), ...(trimString(params.modelId) == null ? {} : { modelId: trimString(params.modelId) }), @@ -274,35 +274,6 @@ const createStubPluginRegistry = (): PluginRegistry => ({ ? {} : { latencyTier: asNumber(params.latencyTier) }), }), - synthesize: async (req) => { - const config = req.providerConfig as Record; - const overrides = (req.providerOverrides ?? {}) as Record; - const voiceId = - trimString(overrides.voiceId) ?? trimString(config.voiceId) ?? "voice-default"; - const outputFormat = trimString(overrides.outputFormat) ?? "mp3"; - const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`); - url.searchParams.set("output_format", outputFormat); - const audioBuffer = await fetchStubSpeechAudio( - url.href, - { - method: "POST", - headers: { "content-type": "application/json" }, - body: JSON.stringify({ - text: req.text, - ...(asNumber(overrides.latencyTier) == null - ? {} - : { latency_optimization_level: asNumber(overrides.latencyTier) }), - }), - }, - "elevenlabs", - ); - return { - audioBuffer, - outputFormat, - fileExtension: outputFormat.startsWith("pcm") ? ".pcm" : ".mp3", - voiceCompatible: false, - }; - }, }), }, ],