TTS: extract preferences

This commit is contained in:
Gustavo Madeira Santana 2026-03-15 19:34:22 +00:00
parent 53cdec4ab9
commit 64353a2b16
3 changed files with 309 additions and 140 deletions

View File

@ -0,0 +1,121 @@
import { mkdtempSync, readFileSync, rmSync } from "node:fs";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import { withEnv } from "../test-utils/env.js";
import type { ResolvedTtsConfig } from "../tts/tts.js";
import {
getExtensionHostTtsMaxLength,
isExtensionHostTtsEnabled,
isExtensionHostTtsSummarizationEnabled,
resolveExtensionHostTtsAutoMode,
resolveExtensionHostTtsPrefsPath,
setExtensionHostTtsAutoMode,
setExtensionHostTtsMaxLength,
setExtensionHostTtsSummarizationEnabled,
} from "./tts-preferences.js";
const tempDirs: string[] = [];
function createPrefsPath(): string {
const tempDir = mkdtempSync(path.join(os.tmpdir(), "openclaw-tts-prefs-"));
tempDirs.push(tempDir);
return path.join(tempDir, "tts.json");
}
function createResolvedConfig(overrides?: Partial<ResolvedTtsConfig>): ResolvedTtsConfig {
return {
auto: "off",
mode: "final",
provider: "edge",
providerSource: "default",
modelOverrides: {
enabled: true,
allowText: true,
allowProvider: false,
allowVoice: true,
allowModelId: true,
allowVoiceSettings: true,
allowNormalization: true,
allowSeed: true,
},
elevenlabs: {
baseUrl: "https://api.elevenlabs.io",
voiceId: "voice-id",
modelId: "eleven_multilingual_v2",
voiceSettings: {
stability: 0.5,
similarityBoost: 0.75,
style: 0,
useSpeakerBoost: true,
speed: 1,
},
},
openai: {
baseUrl: "https://api.openai.com/v1",
model: "gpt-4o-mini-tts",
voice: "alloy",
},
edge: {
enabled: true,
voice: "en-US-MichelleNeural",
lang: "en-US",
outputFormat: "audio-24khz-48kbitrate-mono-mp3",
outputFormatConfigured: false,
saveSubtitles: false,
},
maxTextLength: 4096,
timeoutMs: 30_000,
...overrides,
};
}
afterEach(() => {
for (const tempDir of tempDirs.splice(0)) {
rmSync(tempDir, { recursive: true, force: true });
}
});
describe("tts-preferences", () => {
it("prefers config prefsPath over env and default locations", () => {
const config = createResolvedConfig({ prefsPath: "~/custom-tts.json" });
withEnv({ OPENCLAW_TTS_PREFS: "/tmp/ignored-tts.json" }, () => {
expect(resolveExtensionHostTtsPrefsPath(config)).toContain("custom-tts.json");
});
});
it("resolves session, persisted, and config auto modes in precedence order", () => {
const prefsPath = createPrefsPath();
const config = createResolvedConfig({ auto: "inbound" });
setExtensionHostTtsAutoMode(prefsPath, "tagged");
expect(
resolveExtensionHostTtsAutoMode({
config,
prefsPath,
sessionAuto: "always",
}),
).toBe("always");
expect(resolveExtensionHostTtsAutoMode({ config, prefsPath })).toBe("tagged");
const persisted = JSON.parse(readFileSync(prefsPath, "utf8")) as {
tts?: { auto?: string; enabled?: boolean };
};
expect(persisted.tts?.auto).toBe("tagged");
expect("enabled" in (persisted.tts ?? {})).toBe(false);
});
it("persists max-length and summarization preferences through the host helper", () => {
const prefsPath = createPrefsPath();
const config = createResolvedConfig({ auto: "always" });
setExtensionHostTtsMaxLength(prefsPath, 900);
setExtensionHostTtsSummarizationEnabled(prefsPath, false);
expect(getExtensionHostTtsMaxLength(prefsPath)).toBe(900);
expect(isExtensionHostTtsSummarizationEnabled(prefsPath)).toBe(false);
expect(isExtensionHostTtsEnabled(config, prefsPath)).toBe(true);
});
});

View File

@ -0,0 +1,162 @@
import { randomBytes } from "node:crypto";
import {
existsSync,
mkdirSync,
readFileSync,
renameSync,
unlinkSync,
writeFileSync,
} from "node:fs";
import path from "node:path";
import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js";
import type { ResolvedTtsConfig } from "../tts/tts.js";
import { CONFIG_DIR, resolveUserPath } from "../utils.js";
export const DEFAULT_EXTENSION_HOST_TTS_MAX_LENGTH = 1500;
export const DEFAULT_EXTENSION_HOST_TTS_SUMMARIZE = true;
type TtsUserPrefs = {
tts?: {
auto?: TtsAutoMode;
enabled?: boolean;
provider?: TtsProvider;
maxLength?: number;
summarize?: boolean;
};
};
function readExtensionHostTtsPrefs(prefsPath: string): TtsUserPrefs {
try {
if (!existsSync(prefsPath)) {
return {};
}
return JSON.parse(readFileSync(prefsPath, "utf8")) as TtsUserPrefs;
} catch {
return {};
}
}
function atomicWriteExtensionHostTtsPrefs(filePath: string, content: string): void {
const tmpPath = `${filePath}.tmp.${Date.now()}.${randomBytes(8).toString("hex")}`;
writeFileSync(tmpPath, content, { mode: 0o600 });
try {
renameSync(tmpPath, filePath);
} catch (err) {
try {
unlinkSync(tmpPath);
} catch {}
throw err;
}
}
function updateExtensionHostTtsPrefs(
prefsPath: string,
update: (prefs: TtsUserPrefs) => void,
): void {
const prefs = readExtensionHostTtsPrefs(prefsPath);
update(prefs);
mkdirSync(path.dirname(prefsPath), { recursive: true });
atomicWriteExtensionHostTtsPrefs(prefsPath, JSON.stringify(prefs, null, 2));
}
export function normalizeExtensionHostTtsAutoMode(value: unknown): TtsAutoMode | undefined {
if (typeof value !== "string") {
return undefined;
}
const normalized = value.trim().toLowerCase();
return normalized === "off" ||
normalized === "always" ||
normalized === "inbound" ||
normalized === "tagged"
? normalized
: undefined;
}
export function resolveExtensionHostTtsPrefsPath(config: ResolvedTtsConfig): string {
if (config.prefsPath?.trim()) {
return resolveUserPath(config.prefsPath.trim());
}
const envPath = process.env.OPENCLAW_TTS_PREFS?.trim();
if (envPath) {
return resolveUserPath(envPath);
}
return path.join(CONFIG_DIR, "settings", "tts.json");
}
function resolveExtensionHostTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefined {
const auto = normalizeExtensionHostTtsAutoMode(prefs.tts?.auto);
if (auto) {
return auto;
}
if (typeof prefs.tts?.enabled === "boolean") {
return prefs.tts.enabled ? "always" : "off";
}
return undefined;
}
export function resolveExtensionHostTtsAutoMode(params: {
config: ResolvedTtsConfig;
prefsPath: string;
sessionAuto?: string;
}): TtsAutoMode {
const sessionAuto = normalizeExtensionHostTtsAutoMode(params.sessionAuto);
if (sessionAuto) {
return sessionAuto;
}
const prefsAuto = resolveExtensionHostTtsAutoModeFromPrefs(
readExtensionHostTtsPrefs(params.prefsPath),
);
if (prefsAuto) {
return prefsAuto;
}
return params.config.auto;
}
export function isExtensionHostTtsEnabled(
config: ResolvedTtsConfig,
prefsPath: string,
sessionAuto?: string,
): boolean {
return resolveExtensionHostTtsAutoMode({ config, prefsPath, sessionAuto }) !== "off";
}
export function setExtensionHostTtsAutoMode(prefsPath: string, mode: TtsAutoMode): void {
updateExtensionHostTtsPrefs(prefsPath, (prefs) => {
const next = { ...prefs.tts };
delete next.enabled;
next.auto = mode;
prefs.tts = next;
});
}
export function setExtensionHostTtsEnabled(prefsPath: string, enabled: boolean): void {
setExtensionHostTtsAutoMode(prefsPath, enabled ? "always" : "off");
}
export function setExtensionHostTtsProvider(prefsPath: string, provider: TtsProvider): void {
updateExtensionHostTtsPrefs(prefsPath, (prefs) => {
prefs.tts = { ...prefs.tts, provider };
});
}
export function getExtensionHostTtsMaxLength(prefsPath: string): number {
const prefs = readExtensionHostTtsPrefs(prefsPath);
return prefs.tts?.maxLength ?? DEFAULT_EXTENSION_HOST_TTS_MAX_LENGTH;
}
export function setExtensionHostTtsMaxLength(prefsPath: string, maxLength: number): void {
updateExtensionHostTtsPrefs(prefsPath, (prefs) => {
prefs.tts = { ...prefs.tts, maxLength };
});
}
export function isExtensionHostTtsSummarizationEnabled(prefsPath: string): boolean {
const prefs = readExtensionHostTtsPrefs(prefsPath);
return prefs.tts?.summarize ?? DEFAULT_EXTENSION_HOST_TTS_SUMMARIZE;
}
export function setExtensionHostTtsSummarizationEnabled(prefsPath: string, enabled: boolean): void {
updateExtensionHostTtsPrefs(prefsPath, (prefs) => {
prefs.tts = { ...prefs.tts, summarize: enabled };
});
}

View File

@ -1,6 +1,3 @@
import { randomBytes } from "node:crypto";
import { existsSync, readFileSync, writeFileSync, renameSync, unlinkSync } from "node:fs";
import path from "node:path";
import type { ReplyPayload } from "../auto-reply/types.js";
import type { OpenClawConfig } from "../config/config.js";
import { normalizeResolvedSecretInputString } from "../config/types.secrets.js";
@ -11,6 +8,19 @@ import type {
TtsProvider,
TtsModelOverrideConfig,
} from "../config/types.tts.js";
import {
getExtensionHostTtsMaxLength,
isExtensionHostTtsEnabled,
isExtensionHostTtsSummarizationEnabled,
normalizeExtensionHostTtsAutoMode,
resolveExtensionHostTtsAutoMode,
resolveExtensionHostTtsPrefsPath,
setExtensionHostTtsAutoMode,
setExtensionHostTtsEnabled,
setExtensionHostTtsMaxLength,
setExtensionHostTtsProvider,
setExtensionHostTtsSummarizationEnabled,
} from "../extension-host/tts-preferences.js";
import {
executeExtensionHostTextToSpeech,
executeExtensionHostTextToSpeechTelephony,
@ -30,7 +40,6 @@ import {
} from "../extension-host/tts-runtime-setup.js";
import { logVerbose } from "../globals.js";
import { stripMarkdown } from "../line/markdown-to-line.js";
import { CONFIG_DIR, resolveUserPath } from "../utils.js";
import {
DEFAULT_OPENAI_BASE_URL,
isValidOpenAIModel,
@ -45,8 +54,6 @@ import {
export { OPENAI_TTS_MODELS, OPENAI_TTS_VOICES } from "./tts-core.js";
const DEFAULT_TIMEOUT_MS = 30_000;
const DEFAULT_TTS_MAX_LENGTH = 1500;
const DEFAULT_TTS_SUMMARIZE = true;
const DEFAULT_MAX_TEXT_LENGTH = 4096;
const DEFAULT_ELEVENLABS_BASE_URL = "https://api.elevenlabs.io";
@ -66,8 +73,6 @@ const DEFAULT_ELEVENLABS_VOICE_SETTINGS = {
speed: 1.0,
};
const TTS_AUTO_MODES = new Set<TtsAutoMode>(["off", "always", "inbound", "tagged"]);
export type ResolvedTtsConfig = {
auto: TtsAutoMode;
mode: TtsMode;
@ -117,16 +122,6 @@ export type ResolvedTtsConfig = {
timeoutMs: number;
};
type TtsUserPrefs = {
tts?: {
auto?: TtsAutoMode;
enabled?: boolean;
provider?: TtsProvider;
maxLength?: number;
summarize?: boolean;
};
};
export type ResolvedTtsModelOverrides = {
enabled: boolean;
allowText: boolean;
@ -195,16 +190,7 @@ type TtsStatusEntry = {
let lastTtsAttempt: TtsStatusEntry | undefined;
export function normalizeTtsAutoMode(value: unknown): TtsAutoMode | undefined {
if (typeof value !== "string") {
return undefined;
}
const normalized = value.trim().toLowerCase();
if (TTS_AUTO_MODES.has(normalized as TtsAutoMode)) {
return normalized as TtsAutoMode;
}
return undefined;
}
export const normalizeTtsAutoMode = normalizeExtensionHostTtsAutoMode;
function resolveModelOverridePolicy(
overrides: TtsModelOverrideConfig | undefined,
@ -307,43 +293,9 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
};
}
export function resolveTtsPrefsPath(config: ResolvedTtsConfig): string {
if (config.prefsPath?.trim()) {
return resolveUserPath(config.prefsPath.trim());
}
const envPath = process.env.OPENCLAW_TTS_PREFS?.trim();
if (envPath) {
return resolveUserPath(envPath);
}
return path.join(CONFIG_DIR, "settings", "tts.json");
}
export const resolveTtsPrefsPath = resolveExtensionHostTtsPrefsPath;
function resolveTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefined {
const auto = normalizeTtsAutoMode(prefs.tts?.auto);
if (auto) {
return auto;
}
if (typeof prefs.tts?.enabled === "boolean") {
return prefs.tts.enabled ? "always" : "off";
}
return undefined;
}
export function resolveTtsAutoMode(params: {
config: ResolvedTtsConfig;
prefsPath: string;
sessionAuto?: string;
}): TtsAutoMode {
const sessionAuto = normalizeTtsAutoMode(params.sessionAuto);
if (sessionAuto) {
return sessionAuto;
}
const prefsAuto = resolveTtsAutoModeFromPrefs(readPrefs(params.prefsPath));
if (prefsAuto) {
return prefsAuto;
}
return params.config.auto;
}
export const resolveTtsAutoMode = resolveExtensionHostTtsAutoMode;
export function buildTtsSystemPromptHint(cfg: OpenClawConfig): string | undefined {
const config = resolveTtsConfig(cfg);
@ -352,8 +304,8 @@ export function buildTtsSystemPromptHint(cfg: OpenClawConfig): string | undefine
if (autoMode === "off") {
return undefined;
}
const maxLength = getTtsMaxLength(prefsPath);
const summarize = isSummarizationEnabled(prefsPath) ? "on" : "off";
const maxLength = getExtensionHostTtsMaxLength(prefsPath);
const summarize = isExtensionHostTtsSummarizationEnabled(prefsPath) ? "on" : "off";
const autoHint =
autoMode === "inbound"
? "Only use TTS when the user's last message includes audio/voice."
@ -370,89 +322,23 @@ export function buildTtsSystemPromptHint(cfg: OpenClawConfig): string | undefine
.join("\n");
}
function readPrefs(prefsPath: string): TtsUserPrefs {
try {
if (!existsSync(prefsPath)) {
return {};
}
return JSON.parse(readFileSync(prefsPath, "utf8")) as TtsUserPrefs;
} catch {
return {};
}
}
export const isTtsEnabled = isExtensionHostTtsEnabled;
function atomicWriteFileSync(filePath: string, content: string): void {
const tmpPath = `${filePath}.tmp.${Date.now()}.${randomBytes(8).toString("hex")}`;
writeFileSync(tmpPath, content, { mode: 0o600 });
try {
renameSync(tmpPath, filePath);
} catch (err) {
try {
unlinkSync(tmpPath);
} catch {
// ignore
}
throw err;
}
}
export const setTtsAutoMode = setExtensionHostTtsAutoMode;
function updatePrefs(prefsPath: string, update: (prefs: TtsUserPrefs) => void): void {
const prefs = readPrefs(prefsPath);
update(prefs);
mkdirSync(path.dirname(prefsPath), { recursive: true });
atomicWriteFileSync(prefsPath, JSON.stringify(prefs, null, 2));
}
export function isTtsEnabled(
config: ResolvedTtsConfig,
prefsPath: string,
sessionAuto?: string,
): boolean {
return resolveTtsAutoMode({ config, prefsPath, sessionAuto }) !== "off";
}
export function setTtsAutoMode(prefsPath: string, mode: TtsAutoMode): void {
updatePrefs(prefsPath, (prefs) => {
const next = { ...prefs.tts };
delete next.enabled;
next.auto = mode;
prefs.tts = next;
});
}
export function setTtsEnabled(prefsPath: string, enabled: boolean): void {
setTtsAutoMode(prefsPath, enabled ? "always" : "off");
}
export const setTtsEnabled = setExtensionHostTtsEnabled;
export const getTtsProvider = resolveExtensionHostTtsProvider;
export function setTtsProvider(prefsPath: string, provider: TtsProvider): void {
updatePrefs(prefsPath, (prefs) => {
prefs.tts = { ...prefs.tts, provider };
});
}
export const setTtsProvider = setExtensionHostTtsProvider;
export function getTtsMaxLength(prefsPath: string): number {
const prefs = readPrefs(prefsPath);
return prefs.tts?.maxLength ?? DEFAULT_TTS_MAX_LENGTH;
}
export const getTtsMaxLength = getExtensionHostTtsMaxLength;
export function setTtsMaxLength(prefsPath: string, maxLength: number): void {
updatePrefs(prefsPath, (prefs) => {
prefs.tts = { ...prefs.tts, maxLength };
});
}
export const setTtsMaxLength = setExtensionHostTtsMaxLength;
export function isSummarizationEnabled(prefsPath: string): boolean {
const prefs = readPrefs(prefsPath);
return prefs.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE;
}
export const isSummarizationEnabled = isExtensionHostTtsSummarizationEnabled;
export function setSummarizationEnabled(prefsPath: string, enabled: boolean): void {
updatePrefs(prefsPath, (prefs) => {
prefs.tts = { ...prefs.tts, summarize: enabled };
});
}
export const setSummarizationEnabled = setExtensionHostTtsSummarizationEnabled;
export function getLastTtsAttempt(): TtsStatusEntry | undefined {
return lastTtsAttempt;