diff --git a/src/extension-host/tts-preferences.test.ts b/src/extension-host/tts-preferences.test.ts new file mode 100644 index 00000000000..2b56552d9d0 --- /dev/null +++ b/src/extension-host/tts-preferences.test.ts @@ -0,0 +1,121 @@ +import { mkdtempSync, readFileSync, rmSync } from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { withEnv } from "../test-utils/env.js"; +import type { ResolvedTtsConfig } from "../tts/tts.js"; +import { + getExtensionHostTtsMaxLength, + isExtensionHostTtsEnabled, + isExtensionHostTtsSummarizationEnabled, + resolveExtensionHostTtsAutoMode, + resolveExtensionHostTtsPrefsPath, + setExtensionHostTtsAutoMode, + setExtensionHostTtsMaxLength, + setExtensionHostTtsSummarizationEnabled, +} from "./tts-preferences.js"; + +const tempDirs: string[] = []; + +function createPrefsPath(): string { + const tempDir = mkdtempSync(path.join(os.tmpdir(), "openclaw-tts-prefs-")); + tempDirs.push(tempDir); + return path.join(tempDir, "tts.json"); +} + +function createResolvedConfig(overrides?: Partial): ResolvedTtsConfig { + return { + auto: "off", + mode: "final", + provider: "edge", + providerSource: "default", + modelOverrides: { + enabled: true, + allowText: true, + allowProvider: false, + allowVoice: true, + allowModelId: true, + allowVoiceSettings: true, + allowNormalization: true, + allowSeed: true, + }, + elevenlabs: { + baseUrl: "https://api.elevenlabs.io", + voiceId: "voice-id", + modelId: "eleven_multilingual_v2", + voiceSettings: { + stability: 0.5, + similarityBoost: 0.75, + style: 0, + useSpeakerBoost: true, + speed: 1, + }, + }, + openai: { + baseUrl: "https://api.openai.com/v1", + model: "gpt-4o-mini-tts", + voice: "alloy", + }, + edge: { + enabled: true, + voice: "en-US-MichelleNeural", + lang: "en-US", + outputFormat: "audio-24khz-48kbitrate-mono-mp3", + outputFormatConfigured: false, + saveSubtitles: false, + }, + maxTextLength: 4096, + timeoutMs: 30_000, + ...overrides, + }; +} + +afterEach(() => { + for (const tempDir of tempDirs.splice(0)) { + rmSync(tempDir, { recursive: true, force: true }); + } +}); + +describe("tts-preferences", () => { + it("prefers config prefsPath over env and default locations", () => { + const config = createResolvedConfig({ prefsPath: "~/custom-tts.json" }); + + withEnv({ OPENCLAW_TTS_PREFS: "/tmp/ignored-tts.json" }, () => { + expect(resolveExtensionHostTtsPrefsPath(config)).toContain("custom-tts.json"); + }); + }); + + it("resolves session, persisted, and config auto modes in precedence order", () => { + const prefsPath = createPrefsPath(); + const config = createResolvedConfig({ auto: "inbound" }); + + setExtensionHostTtsAutoMode(prefsPath, "tagged"); + + expect( + resolveExtensionHostTtsAutoMode({ + config, + prefsPath, + sessionAuto: "always", + }), + ).toBe("always"); + expect(resolveExtensionHostTtsAutoMode({ config, prefsPath })).toBe("tagged"); + + const persisted = JSON.parse(readFileSync(prefsPath, "utf8")) as { + tts?: { auto?: string; enabled?: boolean }; + }; + expect(persisted.tts?.auto).toBe("tagged"); + expect("enabled" in (persisted.tts ?? {})).toBe(false); + }); + + it("persists max-length and summarization preferences through the host helper", () => { + const prefsPath = createPrefsPath(); + const config = createResolvedConfig({ auto: "always" }); + + setExtensionHostTtsMaxLength(prefsPath, 900); + setExtensionHostTtsSummarizationEnabled(prefsPath, false); + + expect(getExtensionHostTtsMaxLength(prefsPath)).toBe(900); + expect(isExtensionHostTtsSummarizationEnabled(prefsPath)).toBe(false); + expect(isExtensionHostTtsEnabled(config, prefsPath)).toBe(true); + }); +}); diff --git a/src/extension-host/tts-preferences.ts b/src/extension-host/tts-preferences.ts new file mode 100644 index 00000000000..429abd56c41 --- /dev/null +++ b/src/extension-host/tts-preferences.ts @@ -0,0 +1,162 @@ +import { randomBytes } from "node:crypto"; +import { + existsSync, + mkdirSync, + readFileSync, + renameSync, + unlinkSync, + writeFileSync, +} from "node:fs"; +import path from "node:path"; +import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js"; +import type { ResolvedTtsConfig } from "../tts/tts.js"; +import { CONFIG_DIR, resolveUserPath } from "../utils.js"; + +export const DEFAULT_EXTENSION_HOST_TTS_MAX_LENGTH = 1500; +export const DEFAULT_EXTENSION_HOST_TTS_SUMMARIZE = true; + +type TtsUserPrefs = { + tts?: { + auto?: TtsAutoMode; + enabled?: boolean; + provider?: TtsProvider; + maxLength?: number; + summarize?: boolean; + }; +}; + +function readExtensionHostTtsPrefs(prefsPath: string): TtsUserPrefs { + try { + if (!existsSync(prefsPath)) { + return {}; + } + return JSON.parse(readFileSync(prefsPath, "utf8")) as TtsUserPrefs; + } catch { + return {}; + } +} + +function atomicWriteExtensionHostTtsPrefs(filePath: string, content: string): void { + const tmpPath = `${filePath}.tmp.${Date.now()}.${randomBytes(8).toString("hex")}`; + writeFileSync(tmpPath, content, { mode: 0o600 }); + try { + renameSync(tmpPath, filePath); + } catch (err) { + try { + unlinkSync(tmpPath); + } catch {} + throw err; + } +} + +function updateExtensionHostTtsPrefs( + prefsPath: string, + update: (prefs: TtsUserPrefs) => void, +): void { + const prefs = readExtensionHostTtsPrefs(prefsPath); + update(prefs); + mkdirSync(path.dirname(prefsPath), { recursive: true }); + atomicWriteExtensionHostTtsPrefs(prefsPath, JSON.stringify(prefs, null, 2)); +} + +export function normalizeExtensionHostTtsAutoMode(value: unknown): TtsAutoMode | undefined { + if (typeof value !== "string") { + return undefined; + } + const normalized = value.trim().toLowerCase(); + return normalized === "off" || + normalized === "always" || + normalized === "inbound" || + normalized === "tagged" + ? normalized + : undefined; +} + +export function resolveExtensionHostTtsPrefsPath(config: ResolvedTtsConfig): string { + if (config.prefsPath?.trim()) { + return resolveUserPath(config.prefsPath.trim()); + } + const envPath = process.env.OPENCLAW_TTS_PREFS?.trim(); + if (envPath) { + return resolveUserPath(envPath); + } + return path.join(CONFIG_DIR, "settings", "tts.json"); +} + +function resolveExtensionHostTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefined { + const auto = normalizeExtensionHostTtsAutoMode(prefs.tts?.auto); + if (auto) { + return auto; + } + if (typeof prefs.tts?.enabled === "boolean") { + return prefs.tts.enabled ? "always" : "off"; + } + return undefined; +} + +export function resolveExtensionHostTtsAutoMode(params: { + config: ResolvedTtsConfig; + prefsPath: string; + sessionAuto?: string; +}): TtsAutoMode { + const sessionAuto = normalizeExtensionHostTtsAutoMode(params.sessionAuto); + if (sessionAuto) { + return sessionAuto; + } + const prefsAuto = resolveExtensionHostTtsAutoModeFromPrefs( + readExtensionHostTtsPrefs(params.prefsPath), + ); + if (prefsAuto) { + return prefsAuto; + } + return params.config.auto; +} + +export function isExtensionHostTtsEnabled( + config: ResolvedTtsConfig, + prefsPath: string, + sessionAuto?: string, +): boolean { + return resolveExtensionHostTtsAutoMode({ config, prefsPath, sessionAuto }) !== "off"; +} + +export function setExtensionHostTtsAutoMode(prefsPath: string, mode: TtsAutoMode): void { + updateExtensionHostTtsPrefs(prefsPath, (prefs) => { + const next = { ...prefs.tts }; + delete next.enabled; + next.auto = mode; + prefs.tts = next; + }); +} + +export function setExtensionHostTtsEnabled(prefsPath: string, enabled: boolean): void { + setExtensionHostTtsAutoMode(prefsPath, enabled ? "always" : "off"); +} + +export function setExtensionHostTtsProvider(prefsPath: string, provider: TtsProvider): void { + updateExtensionHostTtsPrefs(prefsPath, (prefs) => { + prefs.tts = { ...prefs.tts, provider }; + }); +} + +export function getExtensionHostTtsMaxLength(prefsPath: string): number { + const prefs = readExtensionHostTtsPrefs(prefsPath); + return prefs.tts?.maxLength ?? DEFAULT_EXTENSION_HOST_TTS_MAX_LENGTH; +} + +export function setExtensionHostTtsMaxLength(prefsPath: string, maxLength: number): void { + updateExtensionHostTtsPrefs(prefsPath, (prefs) => { + prefs.tts = { ...prefs.tts, maxLength }; + }); +} + +export function isExtensionHostTtsSummarizationEnabled(prefsPath: string): boolean { + const prefs = readExtensionHostTtsPrefs(prefsPath); + return prefs.tts?.summarize ?? DEFAULT_EXTENSION_HOST_TTS_SUMMARIZE; +} + +export function setExtensionHostTtsSummarizationEnabled(prefsPath: string, enabled: boolean): void { + updateExtensionHostTtsPrefs(prefsPath, (prefs) => { + prefs.tts = { ...prefs.tts, summarize: enabled }; + }); +} diff --git a/src/tts/tts.ts b/src/tts/tts.ts index 2fd61dbb3ea..3cb8bb579cd 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -1,6 +1,3 @@ -import { randomBytes } from "node:crypto"; -import { existsSync, readFileSync, writeFileSync, renameSync, unlinkSync } from "node:fs"; -import path from "node:path"; import type { ReplyPayload } from "../auto-reply/types.js"; import type { OpenClawConfig } from "../config/config.js"; import { normalizeResolvedSecretInputString } from "../config/types.secrets.js"; @@ -11,6 +8,19 @@ import type { TtsProvider, TtsModelOverrideConfig, } from "../config/types.tts.js"; +import { + getExtensionHostTtsMaxLength, + isExtensionHostTtsEnabled, + isExtensionHostTtsSummarizationEnabled, + normalizeExtensionHostTtsAutoMode, + resolveExtensionHostTtsAutoMode, + resolveExtensionHostTtsPrefsPath, + setExtensionHostTtsAutoMode, + setExtensionHostTtsEnabled, + setExtensionHostTtsMaxLength, + setExtensionHostTtsProvider, + setExtensionHostTtsSummarizationEnabled, +} from "../extension-host/tts-preferences.js"; import { executeExtensionHostTextToSpeech, executeExtensionHostTextToSpeechTelephony, @@ -30,7 +40,6 @@ import { } from "../extension-host/tts-runtime-setup.js"; import { logVerbose } from "../globals.js"; import { stripMarkdown } from "../line/markdown-to-line.js"; -import { CONFIG_DIR, resolveUserPath } from "../utils.js"; import { DEFAULT_OPENAI_BASE_URL, isValidOpenAIModel, @@ -45,8 +54,6 @@ import { export { OPENAI_TTS_MODELS, OPENAI_TTS_VOICES } from "./tts-core.js"; const DEFAULT_TIMEOUT_MS = 30_000; -const DEFAULT_TTS_MAX_LENGTH = 1500; -const DEFAULT_TTS_SUMMARIZE = true; const DEFAULT_MAX_TEXT_LENGTH = 4096; const DEFAULT_ELEVENLABS_BASE_URL = "https://api.elevenlabs.io"; @@ -66,8 +73,6 @@ const DEFAULT_ELEVENLABS_VOICE_SETTINGS = { speed: 1.0, }; -const TTS_AUTO_MODES = new Set(["off", "always", "inbound", "tagged"]); - export type ResolvedTtsConfig = { auto: TtsAutoMode; mode: TtsMode; @@ -117,16 +122,6 @@ export type ResolvedTtsConfig = { timeoutMs: number; }; -type TtsUserPrefs = { - tts?: { - auto?: TtsAutoMode; - enabled?: boolean; - provider?: TtsProvider; - maxLength?: number; - summarize?: boolean; - }; -}; - export type ResolvedTtsModelOverrides = { enabled: boolean; allowText: boolean; @@ -195,16 +190,7 @@ type TtsStatusEntry = { let lastTtsAttempt: TtsStatusEntry | undefined; -export function normalizeTtsAutoMode(value: unknown): TtsAutoMode | undefined { - if (typeof value !== "string") { - return undefined; - } - const normalized = value.trim().toLowerCase(); - if (TTS_AUTO_MODES.has(normalized as TtsAutoMode)) { - return normalized as TtsAutoMode; - } - return undefined; -} +export const normalizeTtsAutoMode = normalizeExtensionHostTtsAutoMode; function resolveModelOverridePolicy( overrides: TtsModelOverrideConfig | undefined, @@ -307,43 +293,9 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig { }; } -export function resolveTtsPrefsPath(config: ResolvedTtsConfig): string { - if (config.prefsPath?.trim()) { - return resolveUserPath(config.prefsPath.trim()); - } - const envPath = process.env.OPENCLAW_TTS_PREFS?.trim(); - if (envPath) { - return resolveUserPath(envPath); - } - return path.join(CONFIG_DIR, "settings", "tts.json"); -} +export const resolveTtsPrefsPath = resolveExtensionHostTtsPrefsPath; -function resolveTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefined { - const auto = normalizeTtsAutoMode(prefs.tts?.auto); - if (auto) { - return auto; - } - if (typeof prefs.tts?.enabled === "boolean") { - return prefs.tts.enabled ? "always" : "off"; - } - return undefined; -} - -export function resolveTtsAutoMode(params: { - config: ResolvedTtsConfig; - prefsPath: string; - sessionAuto?: string; -}): TtsAutoMode { - const sessionAuto = normalizeTtsAutoMode(params.sessionAuto); - if (sessionAuto) { - return sessionAuto; - } - const prefsAuto = resolveTtsAutoModeFromPrefs(readPrefs(params.prefsPath)); - if (prefsAuto) { - return prefsAuto; - } - return params.config.auto; -} +export const resolveTtsAutoMode = resolveExtensionHostTtsAutoMode; export function buildTtsSystemPromptHint(cfg: OpenClawConfig): string | undefined { const config = resolveTtsConfig(cfg); @@ -352,8 +304,8 @@ export function buildTtsSystemPromptHint(cfg: OpenClawConfig): string | undefine if (autoMode === "off") { return undefined; } - const maxLength = getTtsMaxLength(prefsPath); - const summarize = isSummarizationEnabled(prefsPath) ? "on" : "off"; + const maxLength = getExtensionHostTtsMaxLength(prefsPath); + const summarize = isExtensionHostTtsSummarizationEnabled(prefsPath) ? "on" : "off"; const autoHint = autoMode === "inbound" ? "Only use TTS when the user's last message includes audio/voice." @@ -370,89 +322,23 @@ export function buildTtsSystemPromptHint(cfg: OpenClawConfig): string | undefine .join("\n"); } -function readPrefs(prefsPath: string): TtsUserPrefs { - try { - if (!existsSync(prefsPath)) { - return {}; - } - return JSON.parse(readFileSync(prefsPath, "utf8")) as TtsUserPrefs; - } catch { - return {}; - } -} +export const isTtsEnabled = isExtensionHostTtsEnabled; -function atomicWriteFileSync(filePath: string, content: string): void { - const tmpPath = `${filePath}.tmp.${Date.now()}.${randomBytes(8).toString("hex")}`; - writeFileSync(tmpPath, content, { mode: 0o600 }); - try { - renameSync(tmpPath, filePath); - } catch (err) { - try { - unlinkSync(tmpPath); - } catch { - // ignore - } - throw err; - } -} +export const setTtsAutoMode = setExtensionHostTtsAutoMode; -function updatePrefs(prefsPath: string, update: (prefs: TtsUserPrefs) => void): void { - const prefs = readPrefs(prefsPath); - update(prefs); - mkdirSync(path.dirname(prefsPath), { recursive: true }); - atomicWriteFileSync(prefsPath, JSON.stringify(prefs, null, 2)); -} - -export function isTtsEnabled( - config: ResolvedTtsConfig, - prefsPath: string, - sessionAuto?: string, -): boolean { - return resolveTtsAutoMode({ config, prefsPath, sessionAuto }) !== "off"; -} - -export function setTtsAutoMode(prefsPath: string, mode: TtsAutoMode): void { - updatePrefs(prefsPath, (prefs) => { - const next = { ...prefs.tts }; - delete next.enabled; - next.auto = mode; - prefs.tts = next; - }); -} - -export function setTtsEnabled(prefsPath: string, enabled: boolean): void { - setTtsAutoMode(prefsPath, enabled ? "always" : "off"); -} +export const setTtsEnabled = setExtensionHostTtsEnabled; export const getTtsProvider = resolveExtensionHostTtsProvider; -export function setTtsProvider(prefsPath: string, provider: TtsProvider): void { - updatePrefs(prefsPath, (prefs) => { - prefs.tts = { ...prefs.tts, provider }; - }); -} +export const setTtsProvider = setExtensionHostTtsProvider; -export function getTtsMaxLength(prefsPath: string): number { - const prefs = readPrefs(prefsPath); - return prefs.tts?.maxLength ?? DEFAULT_TTS_MAX_LENGTH; -} +export const getTtsMaxLength = getExtensionHostTtsMaxLength; -export function setTtsMaxLength(prefsPath: string, maxLength: number): void { - updatePrefs(prefsPath, (prefs) => { - prefs.tts = { ...prefs.tts, maxLength }; - }); -} +export const setTtsMaxLength = setExtensionHostTtsMaxLength; -export function isSummarizationEnabled(prefsPath: string): boolean { - const prefs = readPrefs(prefsPath); - return prefs.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE; -} +export const isSummarizationEnabled = isExtensionHostTtsSummarizationEnabled; -export function setSummarizationEnabled(prefsPath: string, enabled: boolean): void { - updatePrefs(prefsPath, (prefs) => { - prefs.tts = { ...prefs.tts, summarize: enabled }; - }); -} +export const setSummarizationEnabled = setExtensionHostTtsSummarizationEnabled; export function getLastTtsAttempt(): TtsStatusEntry | undefined { return lastTtsAttempt;