mirror of https://github.com/openclaw/openclaw.git
refactor(tts): move speech providers into plugins
This commit is contained in:
parent
1d08ad4bac
commit
de6bf58e79
|
|
@ -1,5 +1,5 @@
|
|||
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import { buildElevenLabsSpeechProvider } from "openclaw/plugin-sdk/speech";
|
||||
import { buildElevenLabsSpeechProvider } from "./speech-provider.js";
|
||||
|
||||
export default definePluginEntry({
|
||||
id: "elevenlabs",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,126 @@
|
|||
import type { SpeechProviderPlugin } from "openclaw/plugin-sdk/core";
|
||||
import { elevenLabsTTS, type SpeechVoiceOption } from "openclaw/plugin-sdk/speech-core";
|
||||
|
||||
const ELEVENLABS_TTS_MODELS = [
|
||||
"eleven_multilingual_v2",
|
||||
"eleven_turbo_v2_5",
|
||||
"eleven_monolingual_v1",
|
||||
] as const;
|
||||
|
||||
function normalizeElevenLabsBaseUrl(baseUrl: string | undefined): string {
|
||||
const trimmed = baseUrl?.trim();
|
||||
return trimmed?.replace(/\/+$/, "") || "https://api.elevenlabs.io";
|
||||
}
|
||||
|
||||
export async function listElevenLabsVoices(params: {
|
||||
apiKey: string;
|
||||
baseUrl?: string;
|
||||
}): Promise<SpeechVoiceOption[]> {
|
||||
const res = await fetch(`${normalizeElevenLabsBaseUrl(params.baseUrl)}/v1/voices`, {
|
||||
headers: {
|
||||
"xi-api-key": params.apiKey,
|
||||
},
|
||||
});
|
||||
if (!res.ok) {
|
||||
throw new Error(`ElevenLabs voices API error (${res.status})`);
|
||||
}
|
||||
const json = (await res.json()) as {
|
||||
voices?: Array<{
|
||||
voice_id?: string;
|
||||
name?: string;
|
||||
category?: string;
|
||||
description?: string;
|
||||
}>;
|
||||
};
|
||||
return Array.isArray(json.voices)
|
||||
? json.voices
|
||||
.map((voice) => ({
|
||||
id: voice.voice_id?.trim() ?? "",
|
||||
name: voice.name?.trim() || undefined,
|
||||
category: voice.category?.trim() || undefined,
|
||||
description: voice.description?.trim() || undefined,
|
||||
}))
|
||||
.filter((voice) => voice.id.length > 0)
|
||||
: [];
|
||||
}
|
||||
|
||||
export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {
|
||||
return {
|
||||
id: "elevenlabs",
|
||||
label: "ElevenLabs",
|
||||
models: ELEVENLABS_TTS_MODELS,
|
||||
listVoices: async (req) => {
|
||||
const apiKey =
|
||||
req.apiKey ||
|
||||
req.config?.elevenlabs.apiKey ||
|
||||
process.env.ELEVENLABS_API_KEY ||
|
||||
process.env.XI_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error("ElevenLabs API key missing");
|
||||
}
|
||||
return listElevenLabsVoices({
|
||||
apiKey,
|
||||
baseUrl: req.baseUrl ?? req.config?.elevenlabs.baseUrl,
|
||||
});
|
||||
},
|
||||
isConfigured: ({ config }) =>
|
||||
Boolean(config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY),
|
||||
synthesize: async (req) => {
|
||||
const apiKey =
|
||||
req.config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error("ElevenLabs API key missing");
|
||||
}
|
||||
const outputFormat =
|
||||
req.overrides?.elevenlabs?.outputFormat ??
|
||||
(req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128");
|
||||
const audioBuffer = await elevenLabsTTS({
|
||||
text: req.text,
|
||||
apiKey,
|
||||
baseUrl: req.config.elevenlabs.baseUrl,
|
||||
voiceId: req.overrides?.elevenlabs?.voiceId ?? req.config.elevenlabs.voiceId,
|
||||
modelId: req.overrides?.elevenlabs?.modelId ?? req.config.elevenlabs.modelId,
|
||||
outputFormat,
|
||||
seed: req.overrides?.elevenlabs?.seed ?? req.config.elevenlabs.seed,
|
||||
applyTextNormalization:
|
||||
req.overrides?.elevenlabs?.applyTextNormalization ??
|
||||
req.config.elevenlabs.applyTextNormalization,
|
||||
languageCode: req.overrides?.elevenlabs?.languageCode ?? req.config.elevenlabs.languageCode,
|
||||
voiceSettings: {
|
||||
...req.config.elevenlabs.voiceSettings,
|
||||
...req.overrides?.elevenlabs?.voiceSettings,
|
||||
},
|
||||
timeoutMs: req.config.timeoutMs,
|
||||
});
|
||||
return {
|
||||
audioBuffer,
|
||||
outputFormat,
|
||||
fileExtension: req.target === "voice-note" ? ".opus" : ".mp3",
|
||||
voiceCompatible: req.target === "voice-note",
|
||||
};
|
||||
},
|
||||
synthesizeTelephony: async (req) => {
|
||||
const apiKey =
|
||||
req.config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error("ElevenLabs API key missing");
|
||||
}
|
||||
const outputFormat = "pcm_22050";
|
||||
const sampleRate = 22_050;
|
||||
const audioBuffer = await elevenLabsTTS({
|
||||
text: req.text,
|
||||
apiKey,
|
||||
baseUrl: req.config.elevenlabs.baseUrl,
|
||||
voiceId: req.config.elevenlabs.voiceId,
|
||||
modelId: req.config.elevenlabs.modelId,
|
||||
outputFormat,
|
||||
seed: req.config.elevenlabs.seed,
|
||||
applyTextNormalization: req.config.elevenlabs.applyTextNormalization,
|
||||
languageCode: req.config.elevenlabs.languageCode,
|
||||
voiceSettings: req.config.elevenlabs.voiceSettings,
|
||||
timeoutMs: req.config.timeoutMs,
|
||||
});
|
||||
return { audioBuffer, outputFormat, sampleRate };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import { buildMicrosoftSpeechProvider } from "openclaw/plugin-sdk/speech";
|
||||
import { buildMicrosoftSpeechProvider } from "./speech-provider.js";
|
||||
|
||||
export default definePluginEntry({
|
||||
id: "microsoft",
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@
|
|||
"private": true,
|
||||
"description": "OpenClaw Microsoft speech plugin",
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"node-edge-tts": "^1.2.10"
|
||||
},
|
||||
"openclaw": {
|
||||
"extensions": [
|
||||
"./index.ts"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,43 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
import { listMicrosoftVoices } from "./speech-provider.js";
|
||||
|
||||
const fetchMock = vi.fn<typeof fetch>();
|
||||
|
||||
describe("listMicrosoftVoices", () => {
|
||||
afterEach(() => {
|
||||
fetchMock.mockReset();
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
it("maps Microsoft voices to the shared speech voice shape", async () => {
|
||||
fetchMock.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => [
|
||||
{
|
||||
ShortName: "en-US-AvaMultilingualNeural",
|
||||
FriendlyName: "Microsoft Ava",
|
||||
Locale: "en-US",
|
||||
Gender: "Female",
|
||||
VoiceTag: {
|
||||
ContentCategories: ["General"],
|
||||
VoicePersonalities: ["Friendly", "Warm"],
|
||||
},
|
||||
},
|
||||
],
|
||||
} as Response);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
await expect(listMicrosoftVoices()).resolves.toEqual([
|
||||
{
|
||||
id: "en-US-AvaMultilingualNeural",
|
||||
name: "Microsoft Ava",
|
||||
category: "General",
|
||||
description: "Friendly, Warm",
|
||||
locale: "en-US",
|
||||
gender: "Female",
|
||||
personalities: ["Friendly", "Warm"],
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import {
|
||||
CHROMIUM_FULL_VERSION,
|
||||
TRUSTED_CLIENT_TOKEN,
|
||||
generateSecMsGecToken,
|
||||
} from "node-edge-tts/dist/drm.js";
|
||||
import type { SpeechProviderPlugin } from "openclaw/plugin-sdk/core";
|
||||
import {
|
||||
edgeTTS,
|
||||
inferEdgeExtension,
|
||||
isVoiceCompatibleAudio,
|
||||
resolvePreferredOpenClawTmpDir,
|
||||
type SpeechVoiceOption,
|
||||
} from "openclaw/plugin-sdk/speech-core";
|
||||
|
||||
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
|
||||
|
||||
type MicrosoftVoiceListEntry = {
|
||||
ShortName?: string;
|
||||
FriendlyName?: string;
|
||||
Locale?: string;
|
||||
Gender?: string;
|
||||
VoiceTag?: {
|
||||
ContentCategories?: string[];
|
||||
VoicePersonalities?: string[];
|
||||
};
|
||||
};
|
||||
|
||||
function buildMicrosoftVoiceHeaders(): Record<string, string> {
|
||||
const major = CHROMIUM_FULL_VERSION.split(".")[0] || "0";
|
||||
return {
|
||||
Authority: "speech.platform.bing.com",
|
||||
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
||||
Accept: "*/*",
|
||||
"User-Agent":
|
||||
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ` +
|
||||
`(KHTML, like Gecko) Chrome/${major}.0.0.0 Safari/537.36 Edg/${major}.0.0.0`,
|
||||
"Sec-MS-GEC": generateSecMsGecToken(),
|
||||
"Sec-MS-GEC-Version": `1-${CHROMIUM_FULL_VERSION}`,
|
||||
};
|
||||
}
|
||||
|
||||
function formatMicrosoftVoiceDescription(entry: MicrosoftVoiceListEntry): string | undefined {
|
||||
const personalities = entry.VoiceTag?.VoicePersonalities?.filter(Boolean) ?? [];
|
||||
return personalities.length > 0 ? personalities.join(", ") : undefined;
|
||||
}
|
||||
|
||||
export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
|
||||
const response = await fetch(
|
||||
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
|
||||
`?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`,
|
||||
{
|
||||
headers: buildMicrosoftVoiceHeaders(),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Microsoft voices API error (${response.status})`);
|
||||
}
|
||||
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
|
||||
return Array.isArray(voices)
|
||||
? voices
|
||||
.map((voice) => ({
|
||||
id: voice.ShortName?.trim() ?? "",
|
||||
name: voice.FriendlyName?.trim() || voice.ShortName?.trim() || undefined,
|
||||
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
|
||||
description: formatMicrosoftVoiceDescription(voice),
|
||||
locale: voice.Locale?.trim() || undefined,
|
||||
gender: voice.Gender?.trim() || undefined,
|
||||
personalities: voice.VoiceTag?.VoicePersonalities?.filter(
|
||||
(value): value is string => value.trim().length > 0,
|
||||
),
|
||||
}))
|
||||
.filter((voice) => voice.id.length > 0)
|
||||
: [];
|
||||
}
|
||||
|
||||
export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
|
||||
return {
|
||||
id: "microsoft",
|
||||
label: "Microsoft",
|
||||
aliases: ["edge"],
|
||||
listVoices: async () => await listMicrosoftVoices(),
|
||||
isConfigured: ({ config }) => config.edge.enabled,
|
||||
synthesize: async (req) => {
|
||||
const tempRoot = resolvePreferredOpenClawTmpDir();
|
||||
mkdirSync(tempRoot, { recursive: true, mode: 0o700 });
|
||||
const tempDir = mkdtempSync(path.join(tempRoot, "tts-microsoft-"));
|
||||
let outputFormat = req.overrides?.microsoft?.outputFormat ?? req.config.edge.outputFormat;
|
||||
const fallbackOutputFormat =
|
||||
outputFormat !== DEFAULT_EDGE_OUTPUT_FORMAT ? DEFAULT_EDGE_OUTPUT_FORMAT : undefined;
|
||||
|
||||
try {
|
||||
const runEdge = async (format: string) => {
|
||||
const fileExtension = inferEdgeExtension(format);
|
||||
const outputPath = path.join(tempDir, `speech${fileExtension}`);
|
||||
await edgeTTS({
|
||||
text: req.text,
|
||||
outputPath,
|
||||
config: {
|
||||
...req.config.edge,
|
||||
voice: req.overrides?.microsoft?.voice ?? req.config.edge.voice,
|
||||
outputFormat: format,
|
||||
},
|
||||
timeoutMs: req.config.timeoutMs,
|
||||
});
|
||||
const audioBuffer = readFileSync(outputPath);
|
||||
return {
|
||||
audioBuffer,
|
||||
outputFormat: format,
|
||||
fileExtension,
|
||||
voiceCompatible: isVoiceCompatibleAudio({ fileName: outputPath }),
|
||||
};
|
||||
};
|
||||
|
||||
try {
|
||||
return await runEdge(outputFormat);
|
||||
} catch (err) {
|
||||
if (!fallbackOutputFormat || fallbackOutputFormat === outputFormat) {
|
||||
throw err;
|
||||
}
|
||||
outputFormat = fallbackOutputFormat;
|
||||
return await runEdge(outputFormat);
|
||||
}
|
||||
} finally {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
import { buildOpenAIImageGenerationProvider } from "openclaw/plugin-sdk/image-generation";
|
||||
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import { buildOpenAISpeechProvider } from "openclaw/plugin-sdk/speech";
|
||||
import { openaiMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
import { buildOpenAICodexProviderPlugin } from "./openai-codex-provider.js";
|
||||
import { buildOpenAIProvider } from "./openai-provider.js";
|
||||
import { buildOpenAISpeechProvider } from "./speech-provider.js";
|
||||
|
||||
export default definePluginEntry({
|
||||
id: "openai",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,57 @@
|
|||
import type { SpeechProviderPlugin } from "openclaw/plugin-sdk/core";
|
||||
import { OPENAI_TTS_MODELS, OPENAI_TTS_VOICES, openaiTTS } from "openclaw/plugin-sdk/speech-core";
|
||||
|
||||
export function buildOpenAISpeechProvider(): SpeechProviderPlugin {
|
||||
return {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
models: OPENAI_TTS_MODELS,
|
||||
voices: OPENAI_TTS_VOICES,
|
||||
listVoices: async () => OPENAI_TTS_VOICES.map((voice) => ({ id: voice, name: voice })),
|
||||
isConfigured: ({ config }) => Boolean(config.openai.apiKey || process.env.OPENAI_API_KEY),
|
||||
synthesize: async (req) => {
|
||||
const apiKey = req.config.openai.apiKey || process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error("OpenAI API key missing");
|
||||
}
|
||||
const responseFormat = req.target === "voice-note" ? "opus" : "mp3";
|
||||
const audioBuffer = await openaiTTS({
|
||||
text: req.text,
|
||||
apiKey,
|
||||
baseUrl: req.config.openai.baseUrl,
|
||||
model: req.overrides?.openai?.model ?? req.config.openai.model,
|
||||
voice: req.overrides?.openai?.voice ?? req.config.openai.voice,
|
||||
speed: req.overrides?.openai?.speed ?? req.config.openai.speed,
|
||||
instructions: req.config.openai.instructions,
|
||||
responseFormat,
|
||||
timeoutMs: req.config.timeoutMs,
|
||||
});
|
||||
return {
|
||||
audioBuffer,
|
||||
outputFormat: responseFormat,
|
||||
fileExtension: responseFormat === "opus" ? ".opus" : ".mp3",
|
||||
voiceCompatible: req.target === "voice-note",
|
||||
};
|
||||
},
|
||||
synthesizeTelephony: async (req) => {
|
||||
const apiKey = req.config.openai.apiKey || process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error("OpenAI API key missing");
|
||||
}
|
||||
const outputFormat = "pcm";
|
||||
const sampleRate = 24_000;
|
||||
const audioBuffer = await openaiTTS({
|
||||
text: req.text,
|
||||
apiKey,
|
||||
baseUrl: req.config.openai.baseUrl,
|
||||
model: req.config.openai.model,
|
||||
voice: req.config.openai.voice,
|
||||
speed: req.config.openai.speed,
|
||||
instructions: req.config.openai.instructions,
|
||||
responseFormat: outputFormat,
|
||||
timeoutMs: req.config.timeoutMs,
|
||||
});
|
||||
return { audioBuffer, outputFormat, sampleRate };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -165,6 +165,10 @@
|
|||
"types": "./dist/plugin-sdk/speech-runtime.d.ts",
|
||||
"default": "./dist/plugin-sdk/speech-runtime.js"
|
||||
},
|
||||
"./plugin-sdk/speech-core": {
|
||||
"types": "./dist/plugin-sdk/speech-core.d.ts",
|
||||
"default": "./dist/plugin-sdk/speech-core.js"
|
||||
},
|
||||
"./plugin-sdk/plugin-runtime": {
|
||||
"types": "./dist/plugin-sdk/plugin-runtime.d.ts",
|
||||
"default": "./dist/plugin-sdk/plugin-runtime.js"
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
"text-runtime",
|
||||
"agent-runtime",
|
||||
"speech-runtime",
|
||||
"speech-core",
|
||||
"plugin-runtime",
|
||||
"security-runtime",
|
||||
"gateway-runtime",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
// Shared speech-provider implementation helpers for bundled and third-party plugins.
|
||||
|
||||
export type { SpeechProviderPlugin } from "../plugins/types.js";
|
||||
export type { SpeechVoiceOption } from "../tts/provider-types.js";
|
||||
|
||||
export {
|
||||
edgeTTS,
|
||||
elevenLabsTTS,
|
||||
inferEdgeExtension,
|
||||
OPENAI_TTS_MODELS,
|
||||
OPENAI_TTS_VOICES,
|
||||
openaiTTS,
|
||||
parseTtsDirectives,
|
||||
} from "../tts/tts-core.js";
|
||||
|
||||
export { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
|
||||
export { isVoiceCompatibleAudio } from "../media/audio.js";
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
// Public speech-provider builders for bundled or third-party plugins.
|
||||
|
||||
export { buildElevenLabsSpeechProvider } from "../tts/providers/elevenlabs.js";
|
||||
export { buildMicrosoftSpeechProvider } from "../tts/providers/microsoft.js";
|
||||
export { buildOpenAISpeechProvider } from "../tts/providers/openai.js";
|
||||
export { buildElevenLabsSpeechProvider } from "../../extensions/elevenlabs/speech-provider.js";
|
||||
export { buildMicrosoftSpeechProvider } from "../../extensions/microsoft/speech-provider.js";
|
||||
export { buildOpenAISpeechProvider } from "../../extensions/openai/speech-provider.js";
|
||||
export { parseTtsDirectives } from "../tts/tts-core.js";
|
||||
export type { SpeechVoiceOption } from "../tts/provider-types.js";
|
||||
|
|
|
|||
|
|
@ -1,62 +1,84 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { createEmptyPluginRegistry } from "../plugins/registry.js";
|
||||
import { resetPluginRuntimeStateForTest, setActivePluginRegistry } from "../plugins/runtime.js";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const { loadOpenClawPluginsMock } = vi.hoisted(() => ({
|
||||
loadOpenClawPluginsMock: vi.fn(() => createEmptyPluginRegistry()),
|
||||
}));
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { createEmptyPluginRegistry } from "../plugins/registry-empty.js";
|
||||
import { resetPluginRuntimeStateForTest, setActivePluginRegistry } from "../plugins/runtime.js";
|
||||
import type { SpeechProviderPlugin } from "../plugins/types.js";
|
||||
import { getSpeechProvider, listSpeechProviders, normalizeSpeechProviderId } from "./provider-registry.js";
|
||||
|
||||
const loadOpenClawPluginsMock = vi.fn();
|
||||
|
||||
vi.mock("../plugins/loader.js", () => ({
|
||||
loadOpenClawPlugins: loadOpenClawPluginsMock,
|
||||
loadOpenClawPlugins: (...args: Parameters<typeof loadOpenClawPluginsMock>) =>
|
||||
loadOpenClawPluginsMock(...args),
|
||||
}));
|
||||
|
||||
import { getSpeechProvider, listSpeechProviders } from "./provider-registry.js";
|
||||
function createSpeechProvider(id: string, aliases?: string[]): SpeechProviderPlugin {
|
||||
return {
|
||||
id,
|
||||
...(aliases ? { aliases } : {}),
|
||||
isConfigured: () => true,
|
||||
synthesize: async () => ({
|
||||
audioBuffer: Buffer.from("audio"),
|
||||
outputFormat: "mp3",
|
||||
voiceCompatible: false,
|
||||
fileExtension: ".mp3",
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
describe("speech provider registry", () => {
|
||||
afterEach(() => {
|
||||
beforeEach(() => {
|
||||
resetPluginRuntimeStateForTest();
|
||||
loadOpenClawPluginsMock.mockReset();
|
||||
loadOpenClawPluginsMock.mockReturnValue(createEmptyPluginRegistry());
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
resetPluginRuntimeStateForTest();
|
||||
});
|
||||
|
||||
it("does not load plugins for builtin provider lookup", () => {
|
||||
const provider = getSpeechProvider("openai", {} as OpenClawConfig);
|
||||
it("uses active plugin speech providers without reloading plugins", () => {
|
||||
setActivePluginRegistry({
|
||||
...createEmptyPluginRegistry(),
|
||||
speechProviders: [
|
||||
{
|
||||
pluginId: "test-openai",
|
||||
provider: createSpeechProvider("openai"),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(provider?.id).toBe("openai");
|
||||
expect(loadOpenClawPluginsMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("does not load plugins when listing without config", () => {
|
||||
const providers = listSpeechProviders();
|
||||
|
||||
expect(providers.map((provider) => provider.id)).toEqual(["openai", "elevenlabs", "microsoft"]);
|
||||
expect(providers.map((provider) => provider.id)).toEqual(["openai"]);
|
||||
expect(loadOpenClawPluginsMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("uses active plugin speech providers without loading from disk", () => {
|
||||
const registry = createEmptyPluginRegistry();
|
||||
registry.speechProviders.push({
|
||||
pluginId: "custom-speech",
|
||||
pluginName: "Custom Speech",
|
||||
source: "test",
|
||||
provider: {
|
||||
id: "custom-speech",
|
||||
label: "Custom Speech",
|
||||
isConfigured: () => true,
|
||||
synthesize: async () => ({
|
||||
audioBuffer: Buffer.from("audio"),
|
||||
outputFormat: "mp3",
|
||||
fileExtension: ".mp3",
|
||||
voiceCompatible: false,
|
||||
}),
|
||||
},
|
||||
it("loads speech providers from plugins when config is provided", () => {
|
||||
loadOpenClawPluginsMock.mockReturnValue({
|
||||
...createEmptyPluginRegistry(),
|
||||
speechProviders: [
|
||||
{
|
||||
pluginId: "test-microsoft",
|
||||
provider: createSpeechProvider("microsoft", ["edge"]),
|
||||
},
|
||||
],
|
||||
});
|
||||
setActivePluginRegistry(registry);
|
||||
|
||||
const provider = getSpeechProvider("custom-speech");
|
||||
const cfg = {} as OpenClawConfig;
|
||||
|
||||
expect(provider?.id).toBe("custom-speech");
|
||||
expect(loadOpenClawPluginsMock).not.toHaveBeenCalled();
|
||||
expect(listSpeechProviders(cfg).map((provider) => provider.id)).toEqual(["microsoft"]);
|
||||
expect(getSpeechProvider("edge", cfg)?.id).toBe("microsoft");
|
||||
expect(loadOpenClawPluginsMock).toHaveBeenCalledWith({ config: cfg });
|
||||
});
|
||||
|
||||
it("returns no providers when neither plugins nor active registry provide speech support", () => {
|
||||
expect(listSpeechProviders()).toEqual([]);
|
||||
expect(getSpeechProvider("openai")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("normalizes the legacy edge alias to microsoft", () => {
|
||||
expect(normalizeSpeechProviderId("edge")).toBe("microsoft");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -3,15 +3,6 @@ import { loadOpenClawPlugins } from "../plugins/loader.js";
|
|||
import { getActivePluginRegistry } from "../plugins/runtime.js";
|
||||
import type { SpeechProviderPlugin } from "../plugins/types.js";
|
||||
import type { SpeechProviderId } from "./provider-types.js";
|
||||
import { buildElevenLabsSpeechProvider } from "./providers/elevenlabs.js";
|
||||
import { buildMicrosoftSpeechProvider } from "./providers/microsoft.js";
|
||||
import { buildOpenAISpeechProvider } from "./providers/openai.js";
|
||||
|
||||
const BUILTIN_SPEECH_PROVIDER_BUILDERS = [
|
||||
buildOpenAISpeechProvider,
|
||||
buildElevenLabsSpeechProvider,
|
||||
buildMicrosoftSpeechProvider,
|
||||
] as const satisfies readonly (() => SpeechProviderPlugin)[];
|
||||
|
||||
function trimToUndefined(value: string | undefined): string | undefined {
|
||||
const trimmed = value?.trim().toLowerCase();
|
||||
|
|
@ -66,9 +57,6 @@ function buildProviderMaps(cfg?: OpenClawConfig): {
|
|||
const aliases = new Map<string, SpeechProviderPlugin>();
|
||||
const maps = { canonical, aliases };
|
||||
|
||||
for (const buildProvider of BUILTIN_SPEECH_PROVIDER_BUILDERS) {
|
||||
registerSpeechProvider(maps, buildProvider());
|
||||
}
|
||||
for (const provider of resolveSpeechProviderPluginEntries(cfg)) {
|
||||
registerSpeechProvider(maps, provider);
|
||||
}
|
||||
|
|
@ -88,10 +76,5 @@ export function getSpeechProvider(
|
|||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const local = buildProviderMaps().aliases.get(normalized);
|
||||
if (local || !cfg) {
|
||||
return local;
|
||||
}
|
||||
return buildProviderMaps(cfg).aliases.get(normalized);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,66 +0,0 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { withFetchPreconnect } from "../../test-utils/fetch-mock.js";
|
||||
import { listMicrosoftVoices } from "./microsoft.js";
|
||||
|
||||
describe("listMicrosoftVoices", () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("maps Microsoft voice metadata into speech voice options", async () => {
|
||||
globalThis.fetch = withFetchPreconnect(
|
||||
vi.fn().mockResolvedValue(
|
||||
new Response(
|
||||
JSON.stringify([
|
||||
{
|
||||
ShortName: "en-US-AvaNeural",
|
||||
FriendlyName: "Microsoft Ava Online (Natural) - English (United States)",
|
||||
Locale: "en-US",
|
||||
Gender: "Female",
|
||||
VoiceTag: {
|
||||
ContentCategories: ["General"],
|
||||
VoicePersonalities: ["Friendly", "Positive"],
|
||||
},
|
||||
},
|
||||
]),
|
||||
{ status: 200 },
|
||||
),
|
||||
),
|
||||
);
|
||||
|
||||
const voices = await listMicrosoftVoices();
|
||||
|
||||
expect(voices).toEqual([
|
||||
{
|
||||
id: "en-US-AvaNeural",
|
||||
name: "Microsoft Ava Online (Natural) - English (United States)",
|
||||
category: "General",
|
||||
description: "Friendly, Positive",
|
||||
locale: "en-US",
|
||||
gender: "Female",
|
||||
personalities: ["Friendly", "Positive"],
|
||||
},
|
||||
]);
|
||||
expect(globalThis.fetch).toHaveBeenCalledWith(
|
||||
expect.stringContaining("/voices/list?trustedclienttoken="),
|
||||
expect.objectContaining({
|
||||
headers: expect.objectContaining({
|
||||
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
||||
"Sec-MS-GEC": expect.any(String),
|
||||
"Sec-MS-GEC-Version": expect.stringContaining("1-"),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("throws on Microsoft voice list failures", async () => {
|
||||
globalThis.fetch = withFetchPreconnect(
|
||||
vi.fn().mockResolvedValue(new Response("nope", { status: 503 })),
|
||||
);
|
||||
|
||||
await expect(listMicrosoftVoices()).rejects.toThrow("Microsoft voices API error (503)");
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue