TTS: extract runtime setup

This commit is contained in:
Gustavo Madeira Santana 2026-03-15 19:30:44 +00:00
parent aa5cdff3cf
commit fa4f53896e
3 changed files with 217 additions and 52 deletions

View File

@ -0,0 +1,128 @@
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import { withEnv } from "../test-utils/env.js";
import type { ResolvedTtsConfig } from "../tts/tts.js";
import {
resolveExtensionHostTtsProvider,
resolveExtensionHostTtsRequestSetup,
} from "./tts-runtime-setup.js";
const tempDirs: string[] = [];
function createPrefsPath(contents: object): string {
const tempDir = mkdtempSync(path.join(os.tmpdir(), "openclaw-tts-setup-"));
tempDirs.push(tempDir);
const prefsPath = path.join(tempDir, "tts.json");
writeFileSync(prefsPath, JSON.stringify(contents), "utf8");
return prefsPath;
}
function createResolvedConfig(overrides?: Partial<ResolvedTtsConfig>): ResolvedTtsConfig {
return {
auto: "off",
mode: "final",
provider: "edge",
providerSource: "default",
modelOverrides: {
enabled: true,
allowText: true,
allowProvider: false,
allowVoice: true,
allowModelId: true,
allowVoiceSettings: true,
allowNormalization: true,
allowSeed: true,
},
elevenlabs: {
baseUrl: "https://api.elevenlabs.io",
voiceId: "voice-id",
modelId: "eleven_multilingual_v2",
voiceSettings: {
stability: 0.5,
similarityBoost: 0.75,
style: 0,
useSpeakerBoost: true,
speed: 1,
},
},
openai: {
baseUrl: "https://api.openai.com/v1",
model: "gpt-4o-mini-tts",
voice: "alloy",
},
edge: {
enabled: true,
voice: "en-US-MichelleNeural",
lang: "en-US",
outputFormat: "audio-24khz-48kbitrate-mono-mp3",
outputFormatConfigured: false,
saveSubtitles: false,
},
maxTextLength: 4096,
timeoutMs: 30_000,
...overrides,
};
}
afterEach(() => {
for (const tempDir of tempDirs.splice(0)) {
rmSync(tempDir, { recursive: true, force: true });
}
});
describe("tts-runtime-setup", () => {
it("prefers the stored provider over config and environment", () => {
const prefsPath = createPrefsPath({ tts: { provider: "elevenlabs" } });
const config = createResolvedConfig({
provider: "openai",
providerSource: "config",
openai: {
baseUrl: "https://api.openai.com/v1",
model: "gpt-4o-mini-tts",
voice: "alloy",
apiKey: "config-openai-key",
},
});
withEnv({ OPENAI_API_KEY: "env-openai-key", ELEVENLABS_API_KEY: undefined }, () => {
expect(resolveExtensionHostTtsProvider(config, prefsPath)).toBe("elevenlabs");
});
});
it("returns a validation error when text exceeds the configured hard limit", () => {
const config = createResolvedConfig({ maxTextLength: 5 });
const prefsPath = createPrefsPath({});
expect(
resolveExtensionHostTtsRequestSetup({
text: "too-long",
config,
prefsPath,
}),
).toEqual({
error: "Text too long (8 chars, max 5)",
});
});
it("uses the override provider to build the host-owned fallback order", () => {
const config = createResolvedConfig({
provider: "edge",
providerSource: "config",
});
const prefsPath = createPrefsPath({});
expect(
resolveExtensionHostTtsRequestSetup({
text: "hello world",
config,
prefsPath,
providerOverride: "elevenlabs",
}),
).toEqual({
config,
providers: ["elevenlabs", "openai", "edge"],
});
});
});

View File

@ -0,0 +1,74 @@
import { existsSync, readFileSync } from "node:fs";
import type { TtsProvider } from "../config/types.tts.js";
import type { ResolvedTtsConfig } from "../tts/tts.js";
import {
resolveExtensionHostTtsApiKey,
resolveExtensionHostTtsProviderOrder,
} from "./tts-runtime-registry.js";
type TtsUserPrefs = {
tts?: {
provider?: TtsProvider;
};
};
function readExtensionHostTtsPrefs(prefsPath: string): TtsUserPrefs {
try {
if (!existsSync(prefsPath)) {
return {};
}
const raw = readFileSync(prefsPath, "utf8");
const parsed = JSON.parse(raw) as TtsUserPrefs;
return parsed && typeof parsed === "object" ? parsed : {};
} catch {
return {};
}
}
export function resolveExtensionHostTtsProvider(
config: ResolvedTtsConfig,
prefsPath: string,
): TtsProvider {
const prefs = readExtensionHostTtsPrefs(prefsPath);
if (prefs.tts?.provider) {
return prefs.tts.provider;
}
if (config.providerSource === "config") {
return config.provider;
}
if (resolveExtensionHostTtsApiKey(config, "openai")) {
return "openai";
}
if (resolveExtensionHostTtsApiKey(config, "elevenlabs")) {
return "elevenlabs";
}
return "edge";
}
export function resolveExtensionHostTtsRequestSetup(params: {
text: string;
config: ResolvedTtsConfig;
prefsPath: string;
providerOverride?: TtsProvider;
}):
| {
config: ResolvedTtsConfig;
providers: TtsProvider[];
}
| {
error: string;
} {
if (params.text.length > params.config.maxTextLength) {
return {
error: `Text too long (${params.text.length} chars, max ${params.config.maxTextLength})`,
};
}
const provider =
params.providerOverride ?? resolveExtensionHostTtsProvider(params.config, params.prefsPath);
return {
config: params.config,
providers: resolveExtensionHostTtsProviderOrder(provider),
};
}

View File

@ -24,6 +24,10 @@ import {
resolveExtensionHostTtsApiKey,
resolveExtensionHostTtsProviderOrder,
} from "../extension-host/tts-runtime-registry.js";
import {
resolveExtensionHostTtsProvider,
resolveExtensionHostTtsRequestSetup,
} from "../extension-host/tts-runtime-setup.js";
import { logVerbose } from "../globals.js";
import { stripMarkdown } from "../line/markdown-to-line.js";
import { CONFIG_DIR, resolveUserPath } from "../utils.js";
@ -420,23 +424,7 @@ export function setTtsEnabled(prefsPath: string, enabled: boolean): void {
setTtsAutoMode(prefsPath, enabled ? "always" : "off");
}
export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): TtsProvider {
const prefs = readPrefs(prefsPath);
if (prefs.tts?.provider) {
return prefs.tts.provider;
}
if (config.providerSource === "config") {
return config.provider;
}
if (resolveTtsApiKey(config, "openai")) {
return "openai";
}
if (resolveTtsApiKey(config, "elevenlabs")) {
return "elevenlabs";
}
return "edge";
}
export const getTtsProvider = resolveExtensionHostTtsProvider;
export function setTtsProvider(prefsPath: string, provider: TtsProvider): void {
updatePrefs(prefsPath, (prefs) => {
@ -482,35 +470,6 @@ export const resolveTtsProviderOrder = resolveExtensionHostTtsProviderOrder;
export const isTtsProviderConfigured = isExtensionHostTtsProviderConfigured;
function resolveTtsRequestSetup(params: {
text: string;
cfg: OpenClawConfig;
prefsPath?: string;
providerOverride?: TtsProvider;
}):
| {
config: ResolvedTtsConfig;
providers: TtsProvider[];
}
| {
error: string;
} {
const config = resolveTtsConfig(params.cfg);
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
if (params.text.length > config.maxTextLength) {
return {
error: `Text too long (${params.text.length} chars, max ${config.maxTextLength})`,
};
}
const userProvider = getTtsProvider(config, prefsPath);
const provider = params.providerOverride ?? userProvider;
return {
config,
providers: resolveExtensionHostTtsProviderOrder(provider),
};
}
export async function textToSpeech(params: {
text: string;
cfg: OpenClawConfig;
@ -518,10 +477,12 @@ export async function textToSpeech(params: {
channel?: string;
overrides?: TtsDirectiveOverrides;
}): Promise<TtsResult> {
const setup = resolveTtsRequestSetup({
const config = resolveTtsConfig(params.cfg);
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
const setup = resolveExtensionHostTtsRequestSetup({
text: params.text,
cfg: params.cfg,
prefsPath: params.prefsPath,
config,
prefsPath,
providerOverride: params.overrides?.provider,
});
if ("error" in setup) {
@ -542,10 +503,12 @@ export async function textToSpeechTelephony(params: {
cfg: OpenClawConfig;
prefsPath?: string;
}): Promise<TtsTelephonyResult> {
const setup = resolveTtsRequestSetup({
const config = resolveTtsConfig(params.cfg);
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
const setup = resolveExtensionHostTtsRequestSetup({
text: params.text,
cfg: params.cfg,
prefsPath: params.prefsPath,
config,
prefsPath,
});
if ("error" in setup) {
return { success: false, error: setup.error };