test: make talk gateway fixtures provider agnostic

This commit is contained in:
Peter Steinberger 2026-04-05 18:33:58 +01:00
parent 962650f879
commit db0db3abdb
No known key found for this signature in database
7 changed files with 203 additions and 145 deletions

View File

@ -6,7 +6,7 @@ describe("elevenlabs speech provider", () => {
const cases = [
{ value: "pMsXgVXv3BLzUgSXRplE", expected: true },
{ value: "21m00Tcm4TlvDq8ikWAM", expected: true },
{ value: "EXAVITQu4vr4xnSDxMaL", expected: true },
{ value: "VoiceAlias1234567890", expected: true },
{ value: "a1b2c3d4e5", expected: true },
{ value: "a".repeat(40), expected: true },
{ value: "", expected: false },

View File

@ -736,7 +736,7 @@ describe("normalizeCompatibilityConfigValues", () => {
talk: {
voiceId: "voice-123",
voiceAliases: {
Clawd: "EXAVITQu4vr4xnSDxMaL",
Clawd: "VoiceAlias1234567890",
},
modelId: "eleven_v3",
outputFormat: "pcm_44100",
@ -751,7 +751,7 @@ describe("normalizeCompatibilityConfigValues", () => {
elevenlabs: {
voiceId: "voice-123",
voiceAliases: {
Clawd: "EXAVITQu4vr4xnSDxMaL",
Clawd: "VoiceAlias1234567890",
},
modelId: "eleven_v3",
outputFormat: "pcm_44100",

View File

@ -8,7 +8,7 @@ describe("talk.voiceAliases", () => {
await writeOpenClawConfig(home, {
talk: {
voiceAliases: {
Clawd: "EXAVITQu4vr4xnSDxMaL",
Clawd: "VoiceAlias1234567890",
Roger: "CwhRBWXzGAHq8TQ4Fs17",
},
},
@ -19,7 +19,7 @@ describe("talk.voiceAliases", () => {
expect(snap.valid).toBe(true);
expect(snap.legacyIssues.some((issue) => issue.path === "talk")).toBe(true);
expect(snap.sourceConfig.talk?.providers?.elevenlabs?.voiceAliases).toEqual({
Clawd: "EXAVITQu4vr4xnSDxMaL",
Clawd: "VoiceAlias1234567890",
Roger: "CwhRBWXzGAHq8TQ4Fs17",
});
});

View File

@ -23,7 +23,7 @@ describe("talk normalization", () => {
it("keeps core Talk normalization generic and ignores legacy provider-flat fields", () => {
const normalized = normalizeTalkSection({
voiceId: "voice-123",
voiceAliases: { Clawd: "EXAVITQu4vr4xnSDxMaL" }, // pragma: allowlist secret
voiceAliases: { Clawd: "VoiceAlias1234567890" },
modelId: "eleven_v3",
outputFormat: "pcm_44100",
apiKey: "secret-key", // pragma: allowlist secret

View File

@ -27,11 +27,11 @@ vi.mock("../../tts/tts.js", () => ({
function createTalkConfig(apiKey: unknown): OpenClawConfig {
return {
talk: {
provider: "elevenlabs",
provider: "acme",
providers: {
elevenlabs: {
acme: {
apiKey,
voiceId: "voice-default",
voiceId: "stub-default-voice",
},
},
},
@ -44,11 +44,11 @@ describe("talk.speak handler", () => {
});
it("uses the active runtime config snapshot instead of the raw config snapshot", async () => {
const runtimeConfig = createTalkConfig("env-elevenlabs-key");
const runtimeConfig = createTalkConfig("env-acme-key");
const diskConfig = createTalkConfig({
source: "env",
provider: "default",
id: "ELEVENLABS_API_KEY",
id: "ACME_SPEECH_API_KEY",
});
mocks.loadConfig.mockReturnValue(runtimeConfig);
@ -59,8 +59,8 @@ describe("talk.speak handler", () => {
config: diskConfig,
});
mocks.getSpeechProvider.mockReturnValue({
id: "elevenlabs",
label: "ElevenLabs",
id: "acme",
label: "Acme Speech",
resolveTalkConfig: ({
talkProviderConfig,
}: {
@ -69,11 +69,11 @@ describe("talk.speak handler", () => {
});
mocks.synthesizeSpeech.mockImplementation(
async ({ cfg }: { cfg: OpenClawConfig; text: string; disableFallback: boolean }) => {
expect(cfg.messages?.tts?.provider).toBe("elevenlabs");
expect(cfg.messages?.tts?.providers?.elevenlabs?.apiKey).toBe("env-elevenlabs-key");
expect(cfg.messages?.tts?.provider).toBe("acme");
expect(cfg.messages?.tts?.providers?.acme?.apiKey).toBe("env-acme-key");
return {
success: true,
provider: "elevenlabs",
provider: "acme",
audioBuffer: Buffer.from([1, 2, 3]),
outputFormat: "mp3",
voiceCompatible: false,
@ -103,7 +103,7 @@ describe("talk.speak handler", () => {
expect(respond).toHaveBeenCalledWith(
true,
expect.objectContaining({
provider: "elevenlabs",
provider: "acme",
audioBase64: Buffer.from([1, 2, 3]).toString("base64"),
outputFormat: "mp3",
mimeType: "audio/mpeg",

View File

@ -56,6 +56,10 @@ const TALK_CONFIG_DEVICE_PATH = path.join(
`openclaw-talk-config-device-${process.pid}.json`,
);
const TALK_CONFIG_DEVICE = loadOrCreateDeviceIdentity(TALK_CONFIG_DEVICE_PATH);
const GENERIC_TALK_PROVIDER_ID = "acme";
const GENERIC_TALK_API_ENV = "ACME_SPEECH_API_KEY";
const DEFAULT_STUB_VOICE_ID = "stub-default-voice";
const ALIAS_STUB_VOICE_ID = "VoiceAlias1234567890";
async function createFreshOperatorDevice(scopes: string[], nonce: string) {
const signedAtMs = Date.now();
@ -90,18 +94,21 @@ async function connectOperator(ws: GatewaySocket, scopes: string[]) {
}
async function writeTalkConfig(config: {
provider?: string;
apiKey?: string | { source: "env" | "file" | "exec"; provider: string; id: string };
voiceId?: string;
silenceTimeoutMs?: number;
}) {
const { writeConfigFile } = await import("../config/config.js");
const providerId = config.provider ?? GENERIC_TALK_PROVIDER_ID;
await writeConfigFile({
talk: {
provider: providerId,
silenceTimeoutMs: config.silenceTimeoutMs,
providers:
config.apiKey !== undefined || config.voiceId !== undefined
? {
elevenlabs: {
[providerId]: {
...(config.apiKey !== undefined ? { apiKey: config.apiKey } : {}),
...(config.voiceId !== undefined ? { voiceId: config.voiceId } : {}),
},
@ -147,6 +154,22 @@ async function invokeTalkSpeakDirect(params: Record<string, unknown>) {
return response;
}
async function withSpeechProviders<T>(
speechProviders: NonNullable<ReturnType<typeof createEmptyPluginRegistry>["speechProviders"]>,
run: () => Promise<T>,
): Promise<T> {
const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry();
setActivePluginRegistry({
...createEmptyPluginRegistry(),
speechProviders,
});
try {
return await run();
} finally {
setActivePluginRegistry(previousRegistry);
}
}
function expectTalkConfig(
talk: TalkConfig | undefined,
expected: {
@ -175,8 +198,9 @@ describe("gateway talk.config", () => {
const { writeConfigFile } = await import("../config/config.js");
await writeConfigFile({
talk: {
provider: GENERIC_TALK_PROVIDER_ID,
providers: {
elevenlabs: {
[GENERIC_TALK_PROVIDER_ID]: {
voiceId: "voice-123",
apiKey: "secret-key-abc", // pragma: allowlist secret
},
@ -196,7 +220,7 @@ describe("gateway talk.config", () => {
const res = await fetchTalkConfig(ws);
expect(res.ok).toBe(true);
expectTalkConfig(res.payload?.config?.talk, {
provider: "elevenlabs",
provider: GENERIC_TALK_PROVIDER_ID,
voiceId: "voice-123",
apiKey: "__OPENCLAW_REDACTED__",
silenceTimeoutMs: 1500,
@ -239,7 +263,7 @@ describe("gateway talk.config", () => {
const res = await fetchTalkConfig(ws, { includeSecrets: true });
expect(res.ok).toBe(true);
expectTalkConfig(res.payload?.config?.talk, {
provider: "elevenlabs",
provider: GENERIC_TALK_PROVIDER_ID,
apiKey: "secret-key-abc",
});
});
@ -250,11 +274,11 @@ describe("gateway talk.config", () => {
apiKey: {
source: "env",
provider: "default",
id: "ELEVENLABS_API_KEY",
id: GENERIC_TALK_API_ENV,
},
});
await withEnvAsync({ ELEVENLABS_API_KEY: "env-elevenlabs-key" }, async () => {
await withEnvAsync({ [GENERIC_TALK_API_ENV]: "env-acme-key" }, async () => {
await withServer(async (ws) => {
await connectOperator(ws, ["operator.read", "operator.write", "operator.talk.secrets"]);
const res = await fetchTalkConfig(ws, { includeSecrets: true });
@ -263,10 +287,10 @@ describe("gateway talk.config", () => {
const secretRef = {
source: "env",
provider: "default",
id: "ELEVENLABS_API_KEY",
id: GENERIC_TALK_API_ENV,
} satisfies SecretRef;
expectTalkConfig(res.payload?.config?.talk, {
provider: "elevenlabs",
provider: GENERIC_TALK_PROVIDER_ID,
apiKey: secretRef,
});
});
@ -274,43 +298,57 @@ describe("gateway talk.config", () => {
});
it("resolves plugin-owned Talk defaults before redaction", async () => {
const { writeConfigFile } = await import("../config/config.js");
await writeConfigFile({
talk: {
provider: "elevenlabs",
providers: {
elevenlabs: {
voiceId: "voice-from-config",
},
},
},
await writeTalkConfig({
provider: GENERIC_TALK_PROVIDER_ID,
voiceId: "voice-from-config",
});
await withEnvAsync({ ELEVENLABS_API_KEY: "env-elevenlabs-key" }, async () => {
await withServer(async (ws) => {
await connectOperator(ws, ["operator.read"]);
const res = await fetchTalkConfig(ws);
expect(res.ok, JSON.stringify(res.error)).toBe(true);
expectTalkConfig(res.payload?.config?.talk, {
provider: "elevenlabs",
voiceId: "voice-from-config",
apiKey: "__OPENCLAW_REDACTED__",
});
});
await withEnvAsync({ [GENERIC_TALK_API_ENV]: "env-acme-key" }, async () => {
await withSpeechProviders(
[
{
pluginId: "acme-talk-defaults-test",
source: "test",
provider: {
id: GENERIC_TALK_PROVIDER_ID,
label: "Acme Speech",
isConfigured: () => true,
resolveTalkConfig: ({ talkProviderConfig }) => ({
...talkProviderConfig,
apiKey:
typeof process.env[GENERIC_TALK_API_ENV] === "string"
? process.env[GENERIC_TALK_API_ENV]
: undefined,
}),
synthesize: async () => ({
audioBuffer: Buffer.from([1]),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
}),
},
},
],
async () => {
await withServer(async (ws) => {
await connectOperator(ws, ["operator.read"]);
const res = await fetchTalkConfig(ws);
expect(res.ok, JSON.stringify(res.error)).toBe(true);
expectTalkConfig(res.payload?.config?.talk, {
provider: GENERIC_TALK_PROVIDER_ID,
voiceId: "voice-from-config",
apiKey: "__OPENCLAW_REDACTED__",
});
});
},
);
});
});
it("returns canonical provider talk payloads", async () => {
const { writeConfigFile } = await import("../config/config.js");
await writeConfigFile({
talk: {
provider: "elevenlabs",
providers: {
elevenlabs: {
voiceId: "voice-normalized",
},
},
},
await writeTalkConfig({
provider: GENERIC_TALK_PROVIDER_ID,
voiceId: "voice-normalized",
});
await withServer(async (ws) => {
@ -318,7 +356,7 @@ describe("gateway talk.config", () => {
const res = await fetchTalkConfig(ws);
expect(res.ok).toBe(true);
expectTalkConfig(res.payload?.config?.talk, {
provider: "elevenlabs",
provider: GENERIC_TALK_PROVIDER_ID,
voiceId: "voice-normalized",
});
});
@ -385,9 +423,9 @@ describe("gateway talk.config", () => {
providers: {
elevenlabs: {
apiKey: "elevenlabs-talk-key", // pragma: allowlist secret
voiceId: "voice-default",
voiceId: DEFAULT_STUB_VOICE_ID,
voiceAliases: {
Clawd: "EXAVITQu4vr4xnSDxMaL",
Clawd: ALIAS_STUB_VOICE_ID,
},
},
},
@ -407,12 +445,75 @@ describe("gateway talk.config", () => {
globalThis.fetch = withFetchPreconnect(fetchMock);
try {
const res = await invokeTalkSpeakDirect({
text: "Hello from talk mode.",
voiceId: "clawd",
outputFormat: "pcm_44100",
latencyTier: 3,
});
const res = await withSpeechProviders(
[
{
pluginId: "elevenlabs-test",
source: "test",
provider: {
id: "elevenlabs",
label: "ElevenLabs",
isConfigured: () => true,
resolveTalkOverrides: ({ params }) => ({
...(typeof params.voiceId === "string" && params.voiceId.trim().length > 0
? { voiceId: params.voiceId.trim() }
: {}),
...(typeof params.modelId === "string" && params.modelId.trim().length > 0
? { modelId: params.modelId.trim() }
: {}),
...(typeof params.outputFormat === "string" && params.outputFormat.trim().length > 0
? { outputFormat: params.outputFormat.trim() }
: {}),
...(typeof params.latencyTier === "number"
? { latencyTier: params.latencyTier }
: {}),
}),
synthesize: async (req) => {
const config = req.providerConfig as Record<string, unknown>;
const overrides = (req.providerOverrides ?? {}) as Record<string, unknown>;
const voiceId =
(typeof overrides.voiceId === "string" && overrides.voiceId.trim().length > 0
? overrides.voiceId.trim()
: undefined) ??
(typeof config.voiceId === "string" && config.voiceId.trim().length > 0
? config.voiceId.trim()
: undefined) ??
DEFAULT_STUB_VOICE_ID;
const outputFormat =
typeof overrides.outputFormat === "string" &&
overrides.outputFormat.trim().length > 0
? overrides.outputFormat.trim()
: "mp3";
const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`);
url.searchParams.set("output_format", outputFormat);
const response = await globalThis.fetch(url.href, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
text: req.text,
...(typeof overrides.latencyTier === "number"
? { latency_optimization_level: overrides.latencyTier }
: {}),
}),
});
return {
audioBuffer: Buffer.from(await response.arrayBuffer()),
outputFormat,
fileExtension: outputFormat.startsWith("pcm") ? ".pcm" : ".mp3",
voiceCompatible: false,
};
},
},
},
],
async () =>
await invokeTalkSpeakDirect({
text: "Hello from talk mode.",
voiceId: "clawd",
outputFormat: "pcm_44100",
latencyTier: 3,
}),
);
expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("elevenlabs");
expect((res?.payload as TalkSpeakPayload | undefined)?.outputFormat).toBe("pcm_44100");
@ -421,7 +522,7 @@ describe("gateway talk.config", () => {
);
expect(fetchMock).toHaveBeenCalled();
expect(fetchUrl).toContain("/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL");
expect(fetchUrl).toContain(`/v1/text-to-speech/${ALIAS_STUB_VOICE_ID}`);
expect(fetchUrl).toContain("output_format=pcm_44100");
const init = requestInits[0];
const bodyText = typeof init?.body === "string" ? init.body : "{}";
@ -446,10 +547,8 @@ describe("gateway talk.config", () => {
});
await withServer(async () => {
const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry();
setActivePluginRegistry({
...createEmptyPluginRegistry(),
speechProviders: [
await withSpeechProviders(
[
{
pluginId: "acme-plugin",
source: "test",
@ -466,19 +565,17 @@ describe("gateway talk.config", () => {
},
},
],
});
try {
const res = await invokeTalkSpeakDirect({
text: "Hello from plugin talk mode.",
});
expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("acme");
expect((res?.payload as TalkSpeakPayload | undefined)?.audioBase64).toBe(
Buffer.from([7, 8, 9]).toString("base64"),
);
} finally {
setActivePluginRegistry(previousRegistry);
}
async () => {
const res = await invokeTalkSpeakDirect({
text: "Hello from plugin talk mode.",
});
expect(res?.ok, JSON.stringify(res?.error)).toBe(true);
expect((res?.payload as TalkSpeakPayload | undefined)?.provider).toBe("acme");
expect((res?.payload as TalkSpeakPayload | undefined)?.audioBase64).toBe(
Buffer.from([7, 8, 9]).toString("base64"),
);
},
);
});
});
@ -511,10 +608,8 @@ describe("gateway talk.config", () => {
},
});
const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry();
setActivePluginRegistry({
...createEmptyPluginRegistry(),
speechProviders: [
await withSpeechProviders(
[
{
pluginId: "acme-plugin",
source: "test",
@ -528,18 +623,15 @@ describe("gateway talk.config", () => {
},
},
],
});
try {
const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
expect(res?.ok).toBe(false);
expect(res?.error?.details).toEqual({
reason: "synthesis_failed",
fallbackEligible: false,
});
} finally {
setActivePluginRegistry(previousRegistry);
}
async () => {
const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
expect(res?.ok).toBe(false);
expect(res?.error?.details).toEqual({
reason: "synthesis_failed",
fallbackEligible: false,
});
},
);
});
it("rejects empty audio results as invalid_audio_result", async () => {
@ -555,10 +647,8 @@ describe("gateway talk.config", () => {
},
});
const previousRegistry = getActivePluginRegistry() ?? createEmptyPluginRegistry();
setActivePluginRegistry({
...createEmptyPluginRegistry(),
speechProviders: [
await withSpeechProviders(
[
{
pluginId: "acme-plugin",
source: "test",
@ -575,17 +665,14 @@ describe("gateway talk.config", () => {
},
},
],
});
try {
const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
expect(res?.ok).toBe(false);
expect(res?.error?.details).toEqual({
reason: "invalid_audio_result",
fallbackEligible: false,
});
} finally {
setActivePluginRegistry(previousRegistry);
}
async () => {
const res = await invokeTalkSpeakDirect({ text: "Hello from talk mode." });
expect(res?.ok).toBe(false);
expect(res?.error?.details).toEqual({
reason: "invalid_audio_result",
fallbackEligible: false,
});
},
);
});
});

View File

@ -263,7 +263,7 @@ const createStubPluginRegistry = (): PluginRegistry => ({
provider: createStubSpeechProvider({
id: "elevenlabs",
label: "ElevenLabs",
voices: ["EXAVITQu4vr4xnSDxMaL", "voice-default"],
voices: ["stub-default-voice", "stub-alt-voice"],
resolveTalkOverrides: ({ params }) => ({
...(trimString(params.voiceId) == null ? {} : { voiceId: trimString(params.voiceId) }),
...(trimString(params.modelId) == null ? {} : { modelId: trimString(params.modelId) }),
@ -274,35 +274,6 @@ const createStubPluginRegistry = (): PluginRegistry => ({
? {}
: { latencyTier: asNumber(params.latencyTier) }),
}),
synthesize: async (req) => {
const config = req.providerConfig as Record<string, unknown>;
const overrides = (req.providerOverrides ?? {}) as Record<string, unknown>;
const voiceId =
trimString(overrides.voiceId) ?? trimString(config.voiceId) ?? "voice-default";
const outputFormat = trimString(overrides.outputFormat) ?? "mp3";
const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`);
url.searchParams.set("output_format", outputFormat);
const audioBuffer = await fetchStubSpeechAudio(
url.href,
{
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
text: req.text,
...(asNumber(overrides.latencyTier) == null
? {}
: { latency_optimization_level: asNumber(overrides.latencyTier) }),
}),
},
"elevenlabs",
);
return {
audioBuffer,
outputFormat,
fileExtension: outputFormat.startsWith("pcm") ? ".pcm" : ".mp3",
voiceCompatible: false,
};
},
}),
},
],