test: make talk gateway fixtures provider agnostic

2026-04-05 18:33:58 +01:00 · 2026-04-05 18:33:58 +01:00 · db0db3abdb
parent 962650f879
commit db0db3abdb
7 changed files with 203 additions and 145 deletions
--- a/extensions/elevenlabs/speech-provider.test.ts
+++ b/extensions/elevenlabs/speech-provider.test.ts
@ -6,7 +6,7 @@ describe("elevenlabs speech provider", () => {
    const cases = [
      { value: "pMsXgVXv3BLzUgSXRplE", expected: true },
      { value: "21m00Tcm4TlvDq8ikWAM", expected: true },
-      { value: "EXAVITQu4vr4xnSDxMaL", expected: true },
+      { value: "VoiceAlias1234567890", expected: true },
      { value: "a1b2c3d4e5", expected: true },
      { value: "a".repeat(40), expected: true },
      { value: "", expected: false },
--- a/src/commands/doctor-legacy-config.migrations.test.ts
+++ b/src/commands/doctor-legacy-config.migrations.test.ts
@ -736,7 +736,7 @@ describe("normalizeCompatibilityConfigValues", () => {
      talk: {
        voiceId: "voice-123",
        voiceAliases: {
-          Clawd: "EXAVITQu4vr4xnSDxMaL",
+          Clawd: "VoiceAlias1234567890",
        },
        modelId: "eleven_v3",
        outputFormat: "pcm_44100",
@ -751,7 +751,7 @@ describe("normalizeCompatibilityConfigValues", () => {
        elevenlabs: {
          voiceId: "voice-123",
          voiceAliases: {
-            Clawd: "EXAVITQu4vr4xnSDxMaL",
+            Clawd: "VoiceAlias1234567890",
          },
          modelId: "eleven_v3",
          outputFormat: "pcm_44100",
--- a/src/config/config.legacy-config-snapshot.test.ts
+++ b/src/config/config.legacy-config-snapshot.test.ts
@ -8,7 +8,7 @@ describe("talk.voiceAliases", () => {
      await writeOpenClawConfig(home, {
        talk: {
          voiceAliases: {
-            Clawd: "EXAVITQu4vr4xnSDxMaL",
+            Clawd: "VoiceAlias1234567890",
            Roger: "CwhRBWXzGAHq8TQ4Fs17",
          },
        },
@ -19,7 +19,7 @@ describe("talk.voiceAliases", () => {
      expect(snap.valid).toBe(true);
      expect(snap.legacyIssues.some((issue) => issue.path === "talk")).toBe(true);
      expect(snap.sourceConfig.talk?.providers?.elevenlabs?.voiceAliases).toEqual({
-        Clawd: "EXAVITQu4vr4xnSDxMaL",
+        Clawd: "VoiceAlias1234567890",
        Roger: "CwhRBWXzGAHq8TQ4Fs17",
      });
    });
--- a/src/config/talk.normalize.test.ts
+++ b/src/config/talk.normalize.test.ts
@ -23,7 +23,7 @@ describe("talk normalization", () => {
  it("keeps core Talk normalization generic and ignores legacy provider-flat fields", () => {
    const normalized = normalizeTalkSection({
      voiceId: "voice-123",
-      voiceAliases: { Clawd: "EXAVITQu4vr4xnSDxMaL" }, // pragma: allowlist secret
+      voiceAliases: { Clawd: "VoiceAlias1234567890" },
      modelId: "eleven_v3",
      outputFormat: "pcm_44100",
      apiKey: "secret-key", // pragma: allowlist secret
--- a/src/gateway/server-methods/talk.test.ts
+++ b/src/gateway/server-methods/talk.test.ts
@ -27,11 +27,11 @@ vi.mock("../../tts/tts.js", () => ({
 function createTalkConfig(apiKey: unknown): OpenClawConfig {
  return {
    talk: {
-      provider: "elevenlabs",
+      provider: "acme",
      providers: {
-        elevenlabs: {
+        acme: {
          apiKey,
-          voiceId: "voice-default",
+          voiceId: "stub-default-voice",
        },
      },
    },
@ -44,11 +44,11 @@ describe("talk.speak handler", () => {
  });

  it("uses the active runtime config snapshot instead of the raw config snapshot", async () => {
-    const runtimeConfig = createTalkConfig("env-elevenlabs-key");
+    const runtimeConfig = createTalkConfig("env-acme-key");
    const diskConfig = createTalkConfig({
      source: "env",
      provider: "default",
-      id: "ELEVENLABS_API_KEY",
+      id: "ACME_SPEECH_API_KEY",
    });

    mocks.loadConfig.mockReturnValue(runtimeConfig);
@ -59,8 +59,8 @@ describe("talk.speak handler", () => {
      config: diskConfig,
    });
    mocks.getSpeechProvider.mockReturnValue({
-      id: "elevenlabs",
-      label: "ElevenLabs",
+      id: "acme",
+      label: "Acme Speech",
      resolveTalkConfig: ({
        talkProviderConfig,
      }: {
@ -69,11 +69,11 @@ describe("talk.speak handler", () => {
    });
    mocks.synthesizeSpeech.mockImplementation(
      async ({ cfg }: { cfg: OpenClawConfig; text: string; disableFallback: boolean }) => {
-        expect(cfg.messages?.tts?.provider).toBe("elevenlabs");
-        expect(cfg.messages?.tts?.providers?.elevenlabs?.apiKey).toBe("env-elevenlabs-key");
+        expect(cfg.messages?.tts?.provider).toBe("acme");
+        expect(cfg.messages?.tts?.providers?.acme?.apiKey).toBe("env-acme-key");
        return {
          success: true,
-          provider: "elevenlabs",
+          provider: "acme",
          audioBuffer: Buffer.from([1, 2, 3]),
          outputFormat: "mp3",
          voiceCompatible: false,
@ -103,7 +103,7 @@ describe("talk.speak handler", () => {
    expect(respond).toHaveBeenCalledWith(
      true,
      expect.objectContaining({
-        provider: "elevenlabs",
+        provider: "acme",
        audioBase64: Buffer.from([1, 2, 3]).toString("base64"),
        outputFormat: "mp3",
        mimeType: "audio/mpeg",
--- a/src/gateway/server.talk-config.test.ts
+++ b/src/gateway/server.talk-config.test.ts
@ -56,6 +56,10 @@ const TALK_CONFIG_DEVICE_PATH = path.join(
  `openclaw-talk-config-device-${process.pid}.json`,
 );
 const TALK_CONFIG_DEVICE = loadOrCreateDeviceIdentity(TALK_CONFIG_DEVICE_PATH);
+const GENERIC_TALK_PROVIDER_ID = "acme";
+const GENERIC_TALK_API_ENV = "ACME_SPEECH_API_KEY";
+const DEFAULT_STUB_VOICE_ID = "stub-default-voice";
+const ALIAS_STUB_VOICE_ID = "VoiceAlias1234567890";

 async function createFreshOperatorDevice(scopes: string[], nonce: string) {
  const signedAtMs = Date.now();
@ -90,18 +94,21 @@ async function connectOperator(ws: GatewaySocket, scopes: string[]) {
 }

 async function writeTalkConfig(config: {
+  provider?: string;
  apiKey?: string | { source: "env" | "file" | "exec"; provider: string; id: string };
  voiceId?: string;
  silenceTimeoutMs?: number;
 }) {
  const { writeConfigFile } = await import("../config/config.js");
+  const providerId = config.provider ?? GENERIC_TALK_PROVIDER_ID;
  await writeConfigFile({
    talk: {
+      provider: providerId,
      silenceTimeoutMs: config.silenceTimeoutMs,
      providers:
        config.apiKey !== undefined || config.voiceId !== undefined
          ? {
-              elevenlabs: {
+              [providerId]: {
                ...(config.apiKey !== undefined ? { apiKey: config.apiKey } : {}),
                ...(config.voiceId !== undefined ? { voiceId: config.voiceId } : {}),
              },
@ -147,6 +154,22 @@ async function invokeTalkSpeakDirect(params: Record<string, unknown>) {
  return response;
 }

+async function withSpeechProviders<T>(
+  speechProviders: NonNullable<ReturnType<typeof createEmptyPluginRegistry>["speechProviders"]>,
+  run: () => Promise<T>,
+): Promise<T> {
+  const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry();
+  setActivePluginRegistry({
+    ...createEmptyPluginRegistry(),
+    speechProviders,
+  });
+  try {
+    return await run();
+  } finally {
+    setActivePluginRegistry(previousRegistry);
+  }
+}
+
 function expectTalkConfig(
  talk: TalkConfig | undefined,
  expected: {
@ -175,8 +198,9 @@ describe("gateway talk.config", () => {
    const { writeConfigFile } = await import("../config/config.js");
    await writeConfigFile({
      talk: {
+        provider: GENERIC_TALK_PROVIDER_ID,
        providers: {
-          elevenlabs: {
+          [GENERIC_TALK_PROVIDER_ID]: {
            voiceId: "voice-123",
            apiKey: "secret-key-abc", // pragma: allowlist secret
          },
@ -196,7 +220,7 @@ describe("gateway talk.config", () => {
      const res = await fetchTalkConfig(ws);
      expect(res.ok).toBe(true);
      expectTalkConfig(res.payload?.config?.talk, {
-        provider: "elevenlabs",
+        provider: GENERIC_TALK_PROVIDER_ID,
        voiceId: "voice-123",
        apiKey: "__OPENCLAW_REDACTED__",
        silenceTimeoutMs: 1500,
@ -239,7 +263,7 @@ describe("gateway talk.config", () => {
      const res = await fetchTalkConfig(ws, { includeSecrets: true });
      expect(res.ok).toBe(true);
      expectTalkConfig(res.payload?.config?.talk, {
-        provider: "elevenlabs",
+        provider: GENERIC_TALK_PROVIDER_ID,
        apiKey: "secret-key-abc",
      });
    });
@ -250,11 +274,11 @@ describe("gateway talk.config", () => {
      apiKey: {
        source: "env",
        provider: "default",
-        id: "ELEVENLABS_API_KEY",
+        id: GENERIC_TALK_API_ENV,
      },
    });

-    await withEnvAsync({ ELEVENLABS_API_KEY: "env-elevenlabs-key" }, async () => {
+    await withEnvAsync({ [GENERIC_TALK_API_ENV]: "env-acme-key" }, async () => {
      await withServer(async (ws) => {
        await connectOperator(ws, ["operator.read", "operator.write", "operator.talk.secrets"]);
        const res = await fetchTalkConfig(ws, { includeSecrets: true });
@ -263,10 +287,10 @@ describe("gateway talk.config", () => {
        const secretRef = {
          source: "env",
          provider: "default",
-          id: "ELEVENLABS_API_KEY",
+          id: GENERIC_TALK_API_ENV,
        } satisfies SecretRef;
        expectTalkConfig(res.payload?.config?.talk, {
-          provider: "elevenlabs",
+          provider: GENERIC_TALK_PROVIDER_ID,
          apiKey: secretRef,
        });
      });
@ -274,43 +298,57 @@ describe("gateway talk.config", () => {
  });

  it("resolves plugin-owned Talk defaults before redaction", async () => {
-    const { writeConfigFile } = await import("../config/config.js");
-    await writeConfigFile({
-      talk: {
-        provider: "elevenlabs",
-        providers: {
-          elevenlabs: {
-            voiceId: "voice-from-config",
-          },
-        },
-      },
+    await writeTalkConfig({
+      provider: GENERIC_TALK_PROVIDER_ID,
+      voiceId: "voice-from-config",
    });

-    await withEnvAsync({ ELEVENLABS_API_KEY: "env-elevenlabs-key" }, async () => {
-      await withServer(async (ws) => {
-        await connectOperator(ws, ["operator.read"]);
-        const res = await fetchTalkConfig(ws);
-        expect(res.ok, JSON.stringify(res.error)).toBe(true);
-        expectTalkConfig(res.payload?.config?.talk, {
-          provider: "elevenlabs",
-          voiceId: "voice-from-config",
-          apiKey: "__OPENCLAW_REDACTED__",
-        });
-      });
+    await withEnvAsync({ [GENERIC_TALK_API_ENV]: "env-acme-key" }, async () => {
+      await withSpeechProviders(
+        [
+          {
+            pluginId: "acme-talk-defaults-test",
+            source: "test",
+            provider: {
+              id: GENERIC_TALK_PROVIDER_ID,
+              label: "Acme Speech",
+              isConfigured: () => true,
+              resolveTalkConfig: ({ talkProviderConfig }) => ({
+                ...talkProviderConfig,
+                apiKey:
+                  typeof process.env[GENERIC_TALK_API_ENV] === "string"
+                    ? process.env[GENERIC_TALK_API_ENV]
+                    : undefined,
+              }),
+              synthesize: async () => ({
+                audioBuffer: Buffer.from([1]),
+                outputFormat: "mp3",
+                fileExtension: ".mp3",
+                voiceCompatible: false,
+              }),
+            },
+          },
+        ],
+        async () => {
+          await withServer(async (ws) => {
+            await connectOperator(ws, ["operator.read"]);
+            const res = await fetchTalkConfig(ws);
+            expect(res.ok, JSON.stringify(res.error)).toBe(true);
+            expectTalkConfig(res.payload?.config?.talk, {
+              provider: GENERIC_TALK_PROVIDER_ID,
+              voiceId: "voice-from-config",
+              apiKey: "__OPENCLAW_REDACTED__",
+            });
+          });
+        },
+      );
    });
  });

  it("returns canonical provider talk payloads", async () => {
-    const { writeConfigFile } = await import("../config/config.js");
-    await writeConfigFile({
-      talk: {
-        provider: "elevenlabs",
-        providers: {
-          elevenlabs: {
-            voiceId: "voice-normalized",
-          },
-        },
-      },
+    await writeTalkConfig({
+      provider: GENERIC_TALK_PROVIDER_ID,
+      voiceId: "voice-normalized",
    });

    await withServer(async (ws) => {
@ -318,7 +356,7 @@ describe("gateway talk.config", () => {
      const res = await fetchTalkConfig(ws);
      expect(res.ok).toBe(true);
      expectTalkConfig(res.payload?.config?.talk, {
-        provider: "elevenlabs",
+        provider: GENERIC_TALK_PROVIDER_ID,
        voiceId: "voice-normalized",
      });
    });
@ -385,9 +423,9 @@ describe("gateway talk.config", () => {
        providers: {
          elevenlabs: {
            apiKey: "elevenlabs-talk-key", // pragma: allowlist secret
-            voiceId: "voice-default",
+            voiceId: DEFAULT_STUB_VOICE_ID,
            voiceAliases: {
-              Clawd: "EXAVITQu4vr4xnSDxMaL",
+              Clawd: ALIAS_STUB_VOICE_ID,
            },
          },
        },
@ -407,12 +445,75 @@ describe("gateway talk.config", () => {
    globalThis.fetch = withFetchPreconnect(fetchMock);

    try {
-      const res = await invokeTalkSpeakDirect({
-        text: "Hello from talk mode.",
-        voiceId: "clawd",
-        outputFormat: "pcm_44100",
-        latencyTier: 3,
-      });
+      const res = await withSpeechProviders(
+        [
+          {
+            pluginId: "elevenlabs-test",
+            source: "test",
+            provider: {
+              id: "elevenlabs",
+              label: "ElevenLabs",
+              isConfigured: () => true,
+              resolveTalkOverrides: ({ params }) => ({
+                ...(typeof params.voiceId === "string" && params.voiceId.trim().length > 0
+                  ? { voiceId: params.voiceId.trim() }
+                  : {}),
+                ...(typeof params.modelId === "string" && params.modelId.trim().length > 0
+                  ? { modelId: params.modelId.trim() }
+                  : {}),
+                ...(typeof params.outputFormat === "string" && params.outputFormat.trim().length > 0
+                  ? { outputFormat: params.outputFormat.trim() }
+                  : {}),
+                ...(typeof params.latencyTier === "number"
+                  ? { latencyTier: params.latencyTier }
+                  : {}),
+              }),
+              synthesize: async (req) => {
+                const config = req.providerConfig as Record<string, unknown>;
+                const overrides = (req.providerOverrides ?? {}) as Record<string, unknown>;
+                const voiceId =
+                  (typeof overrides.voiceId === "string" && overrides.voiceId.trim().length > 0
+                    ? overrides.voiceId.trim()
+                    : undefined) ??
+                  (typeof config.voiceId === "string" && config.voiceId.trim().length > 0
+                    ? config.voiceId.trim()
+                    : undefined) ??
+                  DEFAULT_STUB_VOICE_ID;
+                const outputFormat =
+                  typeof overrides.outputFormat === "string" &&
+                  overrides.outputFormat.trim().length > 0
+                    ? overrides.outputFormat.trim()
+                    : "mp3";
+                const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`);
+                url.searchParams.set("output_format", outputFormat);
+                const response = await globalThis.fetch(url.href, {
+                  method: "POST",
+                  headers: { "content-type": "application/json" },
+                  body: JSON.stringify({
+                    text: req.text,
+                    ...(typeof overrides.latencyTier === "number"
+                      ? { latency_optimization_level: overrides.latencyTier }
+                      : {}),
+                  }),
+                });
+                return {
+                  audioBuffer: Buffer.from(await response.arrayBuffer()),
+                  outputFormat,
+                  fileExtension: outputFormat.startsWith("pcm") ? ".pcm" : ".mp3",
+                  voiceCompatible: false,
+                };
+              },
+            },
+          },
+        ],
+        async () =>
+          await invokeTalkSpeakDirect({
+            text: "Hello from talk mode.",
+            voiceId: "clawd",
+            outputFormat: "pcm_44100",
+            latencyTier: 3,
+          }),
+      );
      expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
      expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("elevenlabs");
      expect((res?.payload as TalkSpeakPayload | undefined)?.outputFormat).toBe("pcm_44100");
@ -421,7 +522,7 @@ describe("gateway talk.config", () => {
      );

      expect(fetchMock).toHaveBeenCalled();
-      expect(fetchUrl).toContain("/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL");
+      expect(fetchUrl).toContain(`/v1/text-to-speech/${ALIAS_STUB_VOICE_ID}`);
      expect(fetchUrl).toContain("output_format=pcm_44100");
      const init = requestInits[0];
      const bodyText = typeof init?.body === "string" ? init.body : "{}";
@ -446,10 +547,8 @@ describe("gateway talk.config", () => {
    });

    await withServer(async () => {
-      const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry();
-      setActivePluginRegistry({
-        ...createEmptyPluginRegistry(),
-        speechProviders: [
+      await withSpeechProviders(
+        [
          {
            pluginId: "acme-plugin",
            source: "test",
@ -466,19 +565,17 @@ describe("gateway talk.config", () => {
            },
          },
        ],
-      });
-      try {
-        const res = await invokeTalkSpeakDirect({
-          text: "Hello from plugin talk mode.",
-        });
-        expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
-        expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("acme");
-        expect((res?.payload as TalkSpeakPayload | undefined)?.audioBase64).toBe(
-          Buffer.from([7, 8, 9]).toString("base64"),
-        );
-      } finally {
-        setActivePluginRegistry(previousRegistry);
-      }
+        async () => {
+          const res = await invokeTalkSpeakDirect({
+            text: "Hello from plugin talk mode.",
+          });
+          expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
+          expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("acme");
+          expect((res?.payload as TalkSpeakPayload | undefined)?.audioBase64).toBe(
+            Buffer.from([7, 8, 9]).toString("base64"),
+          );
+        },
+      );
    });
  });

@ -511,10 +608,8 @@ describe("gateway talk.config", () => {
      },
    });

-    const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry();
-    setActivePluginRegistry({
-      ...createEmptyPluginRegistry(),
-      speechProviders: [
+    await withSpeechProviders(
+      [
        {
          pluginId: "acme-plugin",
          source: "test",
@ -528,18 +623,15 @@ describe("gateway talk.config", () => {
          },
        },
      ],
-    });
-
-    try {
-      const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
-      expect(res?.ok).toBe(false);
-      expect(res?.error?.details).toEqual({
-        reason: "synthesis_failed",
-        fallbackEligible: false,
-      });
-    } finally {
-      setActivePluginRegistry(previousRegistry);
-    }
+      async () => {
+        const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
+        expect(res?.ok).toBe(false);
+        expect(res?.error?.details).toEqual({
+          reason: "synthesis_failed",
+          fallbackEligible: false,
+        });
+      },
+    );
  });

  it("rejects empty audio results as invalid_audio_result", async () => {
@ -555,10 +647,8 @@ describe("gateway talk.config", () => {
      },
    });

-    const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry();
-    setActivePluginRegistry({
-      ...createEmptyPluginRegistry(),
-      speechProviders: [
+    await withSpeechProviders(
+      [
        {
          pluginId: "acme-plugin",
          source: "test",
@ -575,17 +665,14 @@ describe("gateway talk.config", () => {
          },
        },
      ],
-    });
-
-    try {
-      const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
-      expect(res?.ok).toBe(false);
-      expect(res?.error?.details).toEqual({
-        reason: "invalid_audio_result",
-        fallbackEligible: false,
-      });
-    } finally {
-      setActivePluginRegistry(previousRegistry);
-    }
+      async () => {
+        const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
+        expect(res?.ok).toBe(false);
+        expect(res?.error?.details).toEqual({
+          reason: "invalid_audio_result",
+          fallbackEligible: false,
+        });
+      },
+    );
  });
 });
--- a/src/gateway/test-helpers.mocks.ts
+++ b/src/gateway/test-helpers.mocks.ts
@ -263,7 +263,7 @@ const createStubPluginRegistry = (): PluginRegistry => ({
      provider: createStubSpeechProvider({
        id: "elevenlabs",
        label: "ElevenLabs",
-        voices: ["EXAVITQu4vr4xnSDxMaL", "voice-default"],
+        voices: ["stub-default-voice", "stub-alt-voice"],
        resolveTalkOverrides: ({ params }) => ({
          ...(trimString(params.voiceId) == null ? {} : { voiceId: trimString(params.voiceId) }),
          ...(trimString(params.modelId) == null ? {} : { modelId: trimString(params.modelId) }),
@ -274,35 +274,6 @@ const createStubPluginRegistry = (): PluginRegistry => ({
            ? {}
            : { latencyTier: asNumber(params.latencyTier) }),
        }),
-        synthesize: async (req) => {
-          const config = req.providerConfig as Record<string, unknown>;
-          const overrides = (req.providerOverrides ?? {}) as Record<string, unknown>;
-          const voiceId =
-            trimString(overrides.voiceId) ?? trimString(config.voiceId) ?? "voice-default";
-          const outputFormat = trimString(overrides.outputFormat) ?? "mp3";
-          const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`);
-          url.searchParams.set("output_format", outputFormat);
-          const audioBuffer = await fetchStubSpeechAudio(
-            url.href,
-            {
-              method: "POST",
-              headers: { "content-type": "application/json" },
-              body: JSON.stringify({
-                text: req.text,
-                ...(asNumber(overrides.latencyTier) == null
-                  ? {}
-                  : { latency_optimization_level: asNumber(overrides.latencyTier) }),
-              }),
-            },
-            "elevenlabs",
-          );
-          return {
-            audioBuffer,
-            outputFormat,
-            fileExtension: outputFormat.startsWith("pcm") ? ".pcm" : ".mp3",
-            voiceCompatible: false,
-          };
-        },
      }),
    },
  ],