diff --git a/src/extension-host/tts-runtime-setup.test.ts b/src/extension-host/tts-runtime-setup.test.ts new file mode 100644 index 00000000000..4f02fa60325 --- /dev/null +++ b/src/extension-host/tts-runtime-setup.test.ts @@ -0,0 +1,128 @@ +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { withEnv } from "../test-utils/env.js"; +import type { ResolvedTtsConfig } from "../tts/tts.js"; +import { + resolveExtensionHostTtsProvider, + resolveExtensionHostTtsRequestSetup, +} from "./tts-runtime-setup.js"; + +const tempDirs: string[] = []; + +function createPrefsPath(contents: object): string { + const tempDir = mkdtempSync(path.join(os.tmpdir(), "openclaw-tts-setup-")); + tempDirs.push(tempDir); + const prefsPath = path.join(tempDir, "tts.json"); + writeFileSync(prefsPath, JSON.stringify(contents), "utf8"); + return prefsPath; +} + +function createResolvedConfig(overrides?: Partial): ResolvedTtsConfig { + return { + auto: "off", + mode: "final", + provider: "edge", + providerSource: "default", + modelOverrides: { + enabled: true, + allowText: true, + allowProvider: false, + allowVoice: true, + allowModelId: true, + allowVoiceSettings: true, + allowNormalization: true, + allowSeed: true, + }, + elevenlabs: { + baseUrl: "https://api.elevenlabs.io", + voiceId: "voice-id", + modelId: "eleven_multilingual_v2", + voiceSettings: { + stability: 0.5, + similarityBoost: 0.75, + style: 0, + useSpeakerBoost: true, + speed: 1, + }, + }, + openai: { + baseUrl: "https://api.openai.com/v1", + model: "gpt-4o-mini-tts", + voice: "alloy", + }, + edge: { + enabled: true, + voice: "en-US-MichelleNeural", + lang: "en-US", + outputFormat: "audio-24khz-48kbitrate-mono-mp3", + outputFormatConfigured: false, + saveSubtitles: false, + }, + maxTextLength: 4096, + timeoutMs: 30_000, + ...overrides, + }; +} + +afterEach(() => { + for (const tempDir of tempDirs.splice(0)) { + rmSync(tempDir, { recursive: true, force: true }); + } +}); + +describe("tts-runtime-setup", () => { + it("prefers the stored provider over config and environment", () => { + const prefsPath = createPrefsPath({ tts: { provider: "elevenlabs" } }); + const config = createResolvedConfig({ + provider: "openai", + providerSource: "config", + openai: { + baseUrl: "https://api.openai.com/v1", + model: "gpt-4o-mini-tts", + voice: "alloy", + apiKey: "config-openai-key", + }, + }); + + withEnv({ OPENAI_API_KEY: "env-openai-key", ELEVENLABS_API_KEY: undefined }, () => { + expect(resolveExtensionHostTtsProvider(config, prefsPath)).toBe("elevenlabs"); + }); + }); + + it("returns a validation error when text exceeds the configured hard limit", () => { + const config = createResolvedConfig({ maxTextLength: 5 }); + const prefsPath = createPrefsPath({}); + + expect( + resolveExtensionHostTtsRequestSetup({ + text: "too-long", + config, + prefsPath, + }), + ).toEqual({ + error: "Text too long (8 chars, max 5)", + }); + }); + + it("uses the override provider to build the host-owned fallback order", () => { + const config = createResolvedConfig({ + provider: "edge", + providerSource: "config", + }); + const prefsPath = createPrefsPath({}); + + expect( + resolveExtensionHostTtsRequestSetup({ + text: "hello world", + config, + prefsPath, + providerOverride: "elevenlabs", + }), + ).toEqual({ + config, + providers: ["elevenlabs", "openai", "edge"], + }); + }); +}); diff --git a/src/extension-host/tts-runtime-setup.ts b/src/extension-host/tts-runtime-setup.ts new file mode 100644 index 00000000000..f9521070dba --- /dev/null +++ b/src/extension-host/tts-runtime-setup.ts @@ -0,0 +1,74 @@ +import { existsSync, readFileSync } from "node:fs"; +import type { TtsProvider } from "../config/types.tts.js"; +import type { ResolvedTtsConfig } from "../tts/tts.js"; +import { + resolveExtensionHostTtsApiKey, + resolveExtensionHostTtsProviderOrder, +} from "./tts-runtime-registry.js"; + +type TtsUserPrefs = { + tts?: { + provider?: TtsProvider; + }; +}; + +function readExtensionHostTtsPrefs(prefsPath: string): TtsUserPrefs { + try { + if (!existsSync(prefsPath)) { + return {}; + } + const raw = readFileSync(prefsPath, "utf8"); + const parsed = JSON.parse(raw) as TtsUserPrefs; + return parsed && typeof parsed === "object" ? parsed : {}; + } catch { + return {}; + } +} + +export function resolveExtensionHostTtsProvider( + config: ResolvedTtsConfig, + prefsPath: string, +): TtsProvider { + const prefs = readExtensionHostTtsPrefs(prefsPath); + if (prefs.tts?.provider) { + return prefs.tts.provider; + } + if (config.providerSource === "config") { + return config.provider; + } + + if (resolveExtensionHostTtsApiKey(config, "openai")) { + return "openai"; + } + if (resolveExtensionHostTtsApiKey(config, "elevenlabs")) { + return "elevenlabs"; + } + return "edge"; +} + +export function resolveExtensionHostTtsRequestSetup(params: { + text: string; + config: ResolvedTtsConfig; + prefsPath: string; + providerOverride?: TtsProvider; +}): + | { + config: ResolvedTtsConfig; + providers: TtsProvider[]; + } + | { + error: string; + } { + if (params.text.length > params.config.maxTextLength) { + return { + error: `Text too long (${params.text.length} chars, max ${params.config.maxTextLength})`, + }; + } + + const provider = + params.providerOverride ?? resolveExtensionHostTtsProvider(params.config, params.prefsPath); + return { + config: params.config, + providers: resolveExtensionHostTtsProviderOrder(provider), + }; +} diff --git a/src/tts/tts.ts b/src/tts/tts.ts index 619820cdd5f..2fd61dbb3ea 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -24,6 +24,10 @@ import { resolveExtensionHostTtsApiKey, resolveExtensionHostTtsProviderOrder, } from "../extension-host/tts-runtime-registry.js"; +import { + resolveExtensionHostTtsProvider, + resolveExtensionHostTtsRequestSetup, +} from "../extension-host/tts-runtime-setup.js"; import { logVerbose } from "../globals.js"; import { stripMarkdown } from "../line/markdown-to-line.js"; import { CONFIG_DIR, resolveUserPath } from "../utils.js"; @@ -420,23 +424,7 @@ export function setTtsEnabled(prefsPath: string, enabled: boolean): void { setTtsAutoMode(prefsPath, enabled ? "always" : "off"); } -export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): TtsProvider { - const prefs = readPrefs(prefsPath); - if (prefs.tts?.provider) { - return prefs.tts.provider; - } - if (config.providerSource === "config") { - return config.provider; - } - - if (resolveTtsApiKey(config, "openai")) { - return "openai"; - } - if (resolveTtsApiKey(config, "elevenlabs")) { - return "elevenlabs"; - } - return "edge"; -} +export const getTtsProvider = resolveExtensionHostTtsProvider; export function setTtsProvider(prefsPath: string, provider: TtsProvider): void { updatePrefs(prefsPath, (prefs) => { @@ -482,35 +470,6 @@ export const resolveTtsProviderOrder = resolveExtensionHostTtsProviderOrder; export const isTtsProviderConfigured = isExtensionHostTtsProviderConfigured; -function resolveTtsRequestSetup(params: { - text: string; - cfg: OpenClawConfig; - prefsPath?: string; - providerOverride?: TtsProvider; -}): - | { - config: ResolvedTtsConfig; - providers: TtsProvider[]; - } - | { - error: string; - } { - const config = resolveTtsConfig(params.cfg); - const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config); - if (params.text.length > config.maxTextLength) { - return { - error: `Text too long (${params.text.length} chars, max ${config.maxTextLength})`, - }; - } - - const userProvider = getTtsProvider(config, prefsPath); - const provider = params.providerOverride ?? userProvider; - return { - config, - providers: resolveExtensionHostTtsProviderOrder(provider), - }; -} - export async function textToSpeech(params: { text: string; cfg: OpenClawConfig; @@ -518,10 +477,12 @@ export async function textToSpeech(params: { channel?: string; overrides?: TtsDirectiveOverrides; }): Promise { - const setup = resolveTtsRequestSetup({ + const config = resolveTtsConfig(params.cfg); + const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config); + const setup = resolveExtensionHostTtsRequestSetup({ text: params.text, - cfg: params.cfg, - prefsPath: params.prefsPath, + config, + prefsPath, providerOverride: params.overrides?.provider, }); if ("error" in setup) { @@ -542,10 +503,12 @@ export async function textToSpeechTelephony(params: { cfg: OpenClawConfig; prefsPath?: string; }): Promise { - const setup = resolveTtsRequestSetup({ + const config = resolveTtsConfig(params.cfg); + const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config); + const setup = resolveExtensionHostTtsRequestSetup({ text: params.text, - cfg: params.cfg, - prefsPath: params.prefsPath, + config, + prefsPath, }); if ("error" in setup) { return { success: false, error: setup.error };