feat(tts): add structured provider diagnostics and fallback attempt analytics (#57954)

* feat(tts): add structured fallback diagnostics and attempt analytics

* docs(tts): document attempt-detail and provider error diagnostics

* TTS: harden fallback loops and share error helpers

* TTS: bound provider error-body reads

* tts: add double-prefix regression test and clean baseline drift

* tests(tts): satisfy error narrowing in double-prefix regression

* changelog

Signed-off-by: joshavant <830519+joshavant@users.noreply.github.com>

---------

Signed-off-by: joshavant <830519+joshavant@users.noreply.github.com>
This commit is contained in:
Josh Avant 2026-03-30 22:55:28 -05:00 committed by GitHub
parent 329d4bf1a8
commit 44674525f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 825 additions and 32 deletions

View File

@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai
- Matrix/history: add optional room history context for Matrix group triggers via `channels.matrix.historyLimit`, with per-agent watermarks and retry-safe snapshots so failed trigger retries do not drift into newer room messages. (#57022) thanks @chain710.
- Diffs: skip unused viewer-versus-file SSR preload work so `diffs` view-only and file-only runs do less render work while keeping mode outputs aligned. (#57909) thanks @gumadeiras.
- Matrix/threads: add per-DM `threadReplies` overrides and keep thread session isolation aligned with the effective room or DM thread policy from the triggering message onward. (#57995) thanks @teconomix.
- TTS: Add structured provider diagnostics and fallback attempt analytics. (#57954) Thanks @joshavant.
### Fixes
@ -122,6 +123,7 @@ Docs: https://docs.openclaw.ai
- Exec/env: block Python package index override variables from request-scoped host exec environment sanitization so package fetches cannot be redirected through a caller-supplied index. Thanks @nexrin and @vincentkoc.
- Telegram/audio: transcode Telegram voice-note `.ogg` attachments before the local `whisper-cli` auto fallback runs, and keep mention-preflight transcription enabled in auto mode when `tools.media.audio` is unset.
- Matrix/direct rooms: recover fresh auto-joined 1:1 DMs without eagerly persisting invite-only `m.direct` mappings, while keeping named, aliased, and explicitly configured rooms on the room path. (#58024) Thanks @gumadeiras.
- TTS: Restore 3.28 schema compatibility and fallback observability. (#57953) Thanks @joshavant.
## 2026.3.28

View File

@ -395,6 +395,8 @@ Notes:
- `/tts status` includes fallback visibility for the latest attempt:
- success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
- failure: `Error: ...` plus `Attempts: ...`
- detailed diagnostics: `Attempt details: provider:outcome(reasonCode) latency`
- OpenAI and ElevenLabs API failures now include parsed provider error detail and request id (when returned by the provider), which is surfaced in TTS errors/logs.
## Agent tool

View File

@ -395,6 +395,8 @@ Notes:
- `/tts status` includes fallback visibility for the latest attempt:
- success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
- failure: `Error: ...` plus `Attempts: ...`
- detailed diagnostics: `Attempt details: provider:outcome(reasonCode) latency`
- OpenAI and ElevenLabs API failures now include parsed provider error detail and request id (when returned by the provider), which is surfaced in TTS errors/logs.
## Agent tool

View File

@ -0,0 +1,133 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { elevenLabsTTS } from "./tts.js";
describe("elevenlabs tts diagnostics", () => {
const originalFetch = globalThis.fetch;
function createStreamingErrorResponse(params: {
status: number;
chunkCount: number;
chunkSize: number;
byte: number;
}): { response: Response; getReadCount: () => number } {
let reads = 0;
const stream = new ReadableStream<Uint8Array>({
pull(controller) {
if (reads >= params.chunkCount) {
controller.close();
return;
}
reads += 1;
controller.enqueue(new Uint8Array(params.chunkSize).fill(params.byte));
},
});
return {
response: new Response(stream, { status: params.status }),
getReadCount: () => reads,
};
}
afterEach(() => {
globalThis.fetch = originalFetch;
vi.restoreAllMocks();
});
it("includes parsed provider detail and request id for JSON API errors", async () => {
const fetchMock = vi.fn(
async () =>
new Response(
JSON.stringify({
detail: {
message: "Quota exceeded",
status: "quota_exceeded",
},
}),
{
status: 429,
headers: {
"Content-Type": "application/json",
"x-request-id": "el_req_456",
},
},
),
);
globalThis.fetch = fetchMock as unknown as typeof fetch;
await expect(
elevenLabsTTS({
text: "hello",
apiKey: "test-key",
baseUrl: "https://api.elevenlabs.io",
voiceId: "pMsXgVXv3BLzUgSXRplE",
modelId: "eleven_multilingual_v2",
outputFormat: "mp3_44100_128",
voiceSettings: {
stability: 0.5,
similarityBoost: 0.75,
style: 0,
useSpeakerBoost: true,
speed: 1.0,
},
timeoutMs: 5_000,
}),
).rejects.toThrow(
"ElevenLabs API error (429): Quota exceeded [code=quota_exceeded] [request_id=el_req_456]",
);
});
it("falls back to raw body text when the error body is non-JSON", async () => {
const fetchMock = vi.fn(async () => new Response("service unavailable", { status: 503 }));
globalThis.fetch = fetchMock as unknown as typeof fetch;
await expect(
elevenLabsTTS({
text: "hello",
apiKey: "test-key",
baseUrl: "https://api.elevenlabs.io",
voiceId: "pMsXgVXv3BLzUgSXRplE",
modelId: "eleven_multilingual_v2",
outputFormat: "mp3_44100_128",
voiceSettings: {
stability: 0.5,
similarityBoost: 0.75,
style: 0,
useSpeakerBoost: true,
speed: 1.0,
},
timeoutMs: 5_000,
}),
).rejects.toThrow("ElevenLabs API error (503): service unavailable");
});
it("caps streamed non-JSON error reads instead of consuming full response bodies", async () => {
const streamed = createStreamingErrorResponse({
status: 503,
chunkCount: 200,
chunkSize: 1024,
byte: 121,
});
const fetchMock = vi.fn(async () => streamed.response);
globalThis.fetch = fetchMock as unknown as typeof fetch;
await expect(
elevenLabsTTS({
text: "hello",
apiKey: "test-key",
baseUrl: "https://api.elevenlabs.io",
voiceId: "pMsXgVXv3BLzUgSXRplE",
modelId: "eleven_multilingual_v2",
outputFormat: "mp3_44100_128",
voiceSettings: {
stability: 0.5,
similarityBoost: 0.75,
style: 0,
useSpeakerBoost: true,
speed: 1.0,
},
timeoutMs: 5_000,
}),
).rejects.toThrow("ElevenLabs API error (503)");
expect(streamed.getReadCount()).toBeLessThan(200);
});
});

View File

@ -1,8 +1,12 @@
import {
asObject,
normalizeApplyTextNormalization,
normalizeLanguageCode,
normalizeSeed,
readResponseTextLimited,
requireInRange,
trimToUndefined,
truncateErrorDetail,
} from "openclaw/plugin-sdk/speech";
const DEFAULT_ELEVENLABS_BASE_URL = "https://api.elevenlabs.io";
@ -19,6 +23,45 @@ function normalizeElevenLabsBaseUrl(baseUrl?: string): string {
return trimmed.replace(/\/+$/, "");
}
function formatElevenLabsErrorPayload(payload: unknown): string | undefined {
const root = asObject(payload);
if (!root) {
return undefined;
}
const detailObject = asObject(root.detail);
const message =
trimToUndefined(root.message) ??
trimToUndefined(detailObject?.message) ??
trimToUndefined(detailObject?.detail) ??
trimToUndefined(root.error);
const code =
trimToUndefined(root.code) ??
trimToUndefined(detailObject?.code) ??
trimToUndefined(detailObject?.status);
if (message && code) {
return `${truncateErrorDetail(message)} [code=${code}]`;
}
if (message) {
return truncateErrorDetail(message);
}
if (code) {
return `[code=${code}]`;
}
return undefined;
}
async function extractElevenLabsErrorDetail(response: Response): Promise<string | undefined> {
const rawBody = trimToUndefined(await readResponseTextLimited(response));
if (!rawBody) {
return undefined;
}
try {
return formatElevenLabsErrorPayload(JSON.parse(rawBody)) ?? truncateErrorDetail(rawBody);
} catch {
return truncateErrorDetail(rawBody);
}
}
function assertElevenLabsVoiceSettings(settings: {
stability: number;
similarityBoost: number;
@ -106,7 +149,15 @@ export async function elevenLabsTTS(params: {
});
if (!response.ok) {
throw new Error(`ElevenLabs API error (${response.status})`);
const detail = await extractElevenLabsErrorDetail(response);
const requestId =
trimToUndefined(response.headers.get("x-request-id")) ??
trimToUndefined(response.headers.get("request-id"));
throw new Error(
`ElevenLabs API error (${response.status})` +
(detail ? `: ${detail}` : "") +
(requestId ? ` [request_id=${requestId}]` : ""),
);
}
return Buffer.from(await response.arrayBuffer());

View File

@ -1,13 +1,21 @@
import { describe, expect, it } from "vitest";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
isValidOpenAIModel,
isValidOpenAIVoice,
OPENAI_TTS_MODELS,
OPENAI_TTS_VOICES,
openaiTTS,
resolveOpenAITtsInstructions,
} from "./tts.js";
describe("openai tts", () => {
const originalFetch = globalThis.fetch;
afterEach(() => {
globalThis.fetch = originalFetch;
vi.restoreAllMocks();
});
describe("isValidOpenAIVoice", () => {
it("accepts all valid OpenAI voices including newer additions", () => {
for (const voice of OPENAI_TTS_VOICES) {
@ -70,4 +78,110 @@ describe("openai tts", () => {
expect(resolveOpenAITtsInstructions("gpt-4o-mini-tts", " ")).toBeUndefined();
});
});
describe("openaiTTS diagnostics", () => {
function createStreamingErrorResponse(params: {
status: number;
chunkCount: number;
chunkSize: number;
byte: number;
}): { response: Response; getReadCount: () => number } {
let reads = 0;
const stream = new ReadableStream<Uint8Array>({
pull(controller) {
if (reads >= params.chunkCount) {
controller.close();
return;
}
reads += 1;
controller.enqueue(new Uint8Array(params.chunkSize).fill(params.byte));
},
});
return {
response: new Response(stream, { status: params.status }),
getReadCount: () => reads,
};
}
it("includes parsed provider detail and request id for JSON API errors", async () => {
const fetchMock = vi.fn(
async () =>
new Response(
JSON.stringify({
error: {
message: "Invalid API key",
type: "invalid_request_error",
code: "invalid_api_key",
},
}),
{
status: 401,
headers: {
"Content-Type": "application/json",
"x-request-id": "req_123",
},
},
),
);
globalThis.fetch = fetchMock as unknown as typeof fetch;
await expect(
openaiTTS({
text: "hello",
apiKey: "bad-key",
baseUrl: "https://api.openai.com/v1",
model: "gpt-4o-mini-tts",
voice: "alloy",
responseFormat: "mp3",
timeoutMs: 5_000,
}),
).rejects.toThrow(
"OpenAI TTS API error (401): Invalid API key [type=invalid_request_error, code=invalid_api_key] [request_id=req_123]",
);
});
it("falls back to raw body text when the error body is non-JSON", async () => {
const fetchMock = vi.fn(
async () => new Response("temporary upstream outage", { status: 503 }),
);
globalThis.fetch = fetchMock as unknown as typeof fetch;
await expect(
openaiTTS({
text: "hello",
apiKey: "test-key",
baseUrl: "https://api.openai.com/v1",
model: "gpt-4o-mini-tts",
voice: "alloy",
responseFormat: "mp3",
timeoutMs: 5_000,
}),
).rejects.toThrow("OpenAI TTS API error (503): temporary upstream outage");
});
it("caps streamed non-JSON error reads instead of consuming full response bodies", async () => {
const streamed = createStreamingErrorResponse({
status: 503,
chunkCount: 200,
chunkSize: 1024,
byte: 120,
});
const fetchMock = vi.fn(async () => streamed.response);
globalThis.fetch = fetchMock as unknown as typeof fetch;
await expect(
openaiTTS({
text: "hello",
apiKey: "test-key",
baseUrl: "https://api.openai.com/v1",
model: "gpt-4o-mini-tts",
voice: "alloy",
responseFormat: "mp3",
timeoutMs: 5_000,
}),
).rejects.toThrow("OpenAI TTS API error (503)");
expect(streamed.getReadCount()).toBeLessThan(200);
});
});
});

View File

@ -1,3 +1,10 @@
import {
asObject,
readResponseTextLimited,
trimToUndefined,
truncateErrorDetail,
} from "openclaw/plugin-sdk/speech";
export const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"] as const;
@ -58,6 +65,45 @@ export function resolveOpenAITtsInstructions(
return next && model.includes("gpt-4o-mini-tts") ? next : undefined;
}
function formatOpenAiErrorPayload(payload: unknown): string | undefined {
const root = asObject(payload);
const subject = asObject(root?.error) ?? root;
if (!subject) {
return undefined;
}
const message =
trimToUndefined(subject.message) ??
trimToUndefined(subject.detail) ??
trimToUndefined(root?.message);
const type = trimToUndefined(subject.type);
const code = trimToUndefined(subject.code);
const metadata = [type ? `type=${type}` : undefined, code ? `code=${code}` : undefined]
.filter((value): value is string => Boolean(value))
.join(", ");
if (message && metadata) {
return `${truncateErrorDetail(message)} [${metadata}]`;
}
if (message) {
return truncateErrorDetail(message);
}
if (metadata) {
return `[${metadata}]`;
}
return undefined;
}
async function extractOpenAiErrorDetail(response: Response): Promise<string | undefined> {
const rawBody = trimToUndefined(await readResponseTextLimited(response));
if (!rawBody) {
return undefined;
}
try {
return formatOpenAiErrorPayload(JSON.parse(rawBody)) ?? truncateErrorDetail(rawBody);
} catch {
return truncateErrorDetail(rawBody);
}
}
export async function openaiTTS(params: {
text: string;
apiKey: string;
@ -102,7 +148,15 @@ export async function openaiTTS(params: {
});
if (!response.ok) {
throw new Error(`OpenAI TTS API error (${response.status})`);
const detail = await extractOpenAiErrorDetail(response);
const requestId =
trimToUndefined(response.headers.get("x-request-id")) ??
trimToUndefined(response.headers.get("request-id"));
throw new Error(
`OpenAI TTS API error (${response.status})` +
(detail ? `: ${detail}` : "") +
(requestId ? ` [request_id=${requestId}]` : ""),
);
}
return Buffer.from(await response.arrayBuffer());

View File

@ -74,6 +74,22 @@ type TtsUserPrefs = {
export type ResolvedTtsModelOverrides = SpeechModelOverridePolicy;
export type TtsAttemptReasonCode =
| "success"
| "no_provider_registered"
| "not_configured"
| "unsupported_for_telephony"
| "timeout"
| "provider_error";
export type TtsProviderAttempt = {
provider: string;
outcome: "success" | "skipped" | "failed";
reasonCode: TtsAttemptReasonCode;
latencyMs?: number;
error?: string;
};
export type TtsResult = {
success: boolean;
audioPath?: string;
@ -82,6 +98,7 @@ export type TtsResult = {
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
outputFormat?: string;
voiceCompatible?: boolean;
};
@ -94,6 +111,7 @@ export type TtsSynthesisResult = {
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
outputFormat?: string;
voiceCompatible?: boolean;
fileExtension?: string;
@ -107,6 +125,7 @@ export type TtsTelephonyResult = {
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
outputFormat?: string;
sampleRate?: number;
};
@ -119,6 +138,7 @@ type TtsStatusEntry = {
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
latencyMs?: number;
error?: string;
};
@ -556,25 +576,46 @@ function sanitizeTtsErrorForLog(err: unknown): string {
function buildTtsFailureResult(
errors: string[],
attemptedProviders?: string[],
): { success: false; error: string; attemptedProviders?: string[] } {
attempts?: TtsProviderAttempt[],
): {
success: false;
error: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
} {
return {
success: false,
error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`,
attemptedProviders,
attempts,
};
}
type TtsProviderReadyResolution =
| {
kind: "ready";
provider: NonNullable<ReturnType<typeof getSpeechProvider>>;
providerConfig: SpeechProviderConfig;
}
| {
kind: "skip";
reasonCode: "no_provider_registered" | "not_configured" | "unsupported_for_telephony";
message: string;
};
function resolveReadySpeechProvider(params: {
provider: TtsProvider;
cfg: OpenClawConfig;
config: ResolvedTtsConfig;
errors: string[];
requireTelephony?: boolean;
}): NonNullable<ReturnType<typeof getSpeechProvider>> | null {
}): TtsProviderReadyResolution {
const resolvedProvider = getSpeechProvider(params.provider, params.cfg);
if (!resolvedProvider) {
params.errors.push(`${params.provider}: no provider registered`);
return null;
return {
kind: "skip",
reasonCode: "no_provider_registered",
message: `${params.provider}: no provider registered`,
};
}
const providerConfig = getResolvedSpeechProviderConfig(
params.config,
@ -588,14 +629,24 @@ function resolveReadySpeechProvider(params: {
timeoutMs: params.config.timeoutMs,
})
) {
params.errors.push(`${params.provider}: not configured`);
return null;
return {
kind: "skip",
reasonCode: "not_configured",
message: `${params.provider}: not configured`,
};
}
if (params.requireTelephony && !resolvedProvider.synthesizeTelephony) {
params.errors.push(`${params.provider}: unsupported for telephony`);
return null;
return {
kind: "skip",
reasonCode: "unsupported_for_telephony",
message: `${params.provider}: unsupported for telephony`,
};
}
return resolvedProvider;
return {
kind: "ready",
provider: resolvedProvider,
providerConfig,
};
}
function resolveTtsRequestSetup(params: {
@ -639,10 +690,12 @@ export async function textToSpeech(params: {
}): Promise<TtsResult> {
const synthesis = await synthesizeSpeech(params);
if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
return buildTtsFailureResult(
[synthesis.error ?? "TTS conversion failed"],
synthesis.attemptedProviders,
);
return {
success: false,
error: synthesis.error ?? "TTS conversion failed",
attemptedProviders: synthesis.attemptedProviders,
attempts: synthesis.attempts,
};
}
const tempRoot = resolvePreferredOpenClawTmpDir();
@ -659,6 +712,7 @@ export async function textToSpeech(params: {
provider: synthesis.provider,
fallbackFrom: synthesis.fallbackFrom,
attemptedProviders: synthesis.attemptedProviders,
attempts: synthesis.attempts,
outputFormat: synthesis.outputFormat,
voiceCompatible: synthesis.voiceCompatible,
};
@ -689,6 +743,7 @@ export async function synthesizeSpeech(params: {
const errors: string[] = [];
const attemptedProviders: string[] = [];
const attempts: TtsProviderAttempt[] = [];
const primaryProvider = providers[0];
logVerbose(
`TTS: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
@ -702,34 +757,57 @@ export async function synthesizeSpeech(params: {
provider,
cfg: params.cfg,
config,
errors,
});
if (!resolvedProvider) {
logVerbose(`TTS: provider ${provider} skipped (${errors[errors.length - 1]})`);
if (resolvedProvider.kind === "skip") {
errors.push(resolvedProvider.message);
attempts.push({
provider,
outcome: "skipped",
reasonCode: resolvedProvider.reasonCode,
error: resolvedProvider.message,
});
logVerbose(`TTS: provider ${provider} skipped (${resolvedProvider.message})`);
continue;
}
const synthesis = await resolvedProvider.synthesize({
const synthesis = await resolvedProvider.provider.synthesize({
text: params.text,
cfg: params.cfg,
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg),
providerConfig: resolvedProvider.providerConfig,
target,
providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.id],
providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.provider.id],
timeoutMs: config.timeoutMs,
});
const latencyMs = Date.now() - providerStart;
attempts.push({
provider,
outcome: "success",
reasonCode: "success",
latencyMs,
});
return {
success: true,
audioBuffer: synthesis.audioBuffer,
latencyMs: Date.now() - providerStart,
latencyMs,
provider,
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
attemptedProviders,
attempts,
outputFormat: synthesis.outputFormat,
voiceCompatible: synthesis.voiceCompatible,
fileExtension: synthesis.fileExtension,
};
} catch (err) {
const errorMsg = formatTtsProviderError(provider, err);
const latencyMs = Date.now() - providerStart;
errors.push(errorMsg);
attempts.push({
provider,
outcome: "failed",
reasonCode:
err instanceof Error && err.name === "AbortError" ? "timeout" : "provider_error",
latencyMs,
error: errorMsg,
});
const rawError = sanitizeTtsErrorForLog(err);
if (provider === primaryProvider) {
const hasFallbacks = providers.length > 1;
@ -742,7 +820,7 @@ export async function synthesizeSpeech(params: {
}
}
return buildTtsFailureResult(errors, attemptedProviders);
return buildTtsFailureResult(errors, attemptedProviders, attempts);
}
export async function textToSpeechTelephony(params: {
@ -762,7 +840,11 @@ export async function textToSpeechTelephony(params: {
const { config, providers } = setup;
const errors: string[] = [];
const attemptedProviders: string[] = [];
const attempts: TtsProviderAttempt[] = [];
const primaryProvider = providers[0];
logVerbose(
`TTS telephony: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
);
for (const provider of providers) {
attemptedProviders.push(provider);
@ -772,35 +854,72 @@ export async function textToSpeechTelephony(params: {
provider,
cfg: params.cfg,
config,
errors,
requireTelephony: true,
});
if (!resolvedProvider?.synthesizeTelephony) {
if (resolvedProvider.kind === "skip") {
errors.push(resolvedProvider.message);
attempts.push({
provider,
outcome: "skipped",
reasonCode: resolvedProvider.reasonCode,
error: resolvedProvider.message,
});
logVerbose(`TTS telephony: provider ${provider} skipped (${resolvedProvider.message})`);
continue;
}
const synthesis = await resolvedProvider.synthesizeTelephony({
const synthesizeTelephony = resolvedProvider.provider.synthesizeTelephony as NonNullable<
typeof resolvedProvider.provider.synthesizeTelephony
>;
const synthesis = await synthesizeTelephony({
text: params.text,
cfg: params.cfg,
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg),
providerConfig: resolvedProvider.providerConfig,
timeoutMs: config.timeoutMs,
});
const latencyMs = Date.now() - providerStart;
attempts.push({
provider,
outcome: "success",
reasonCode: "success",
latencyMs,
});
return {
success: true,
audioBuffer: synthesis.audioBuffer,
latencyMs: Date.now() - providerStart,
latencyMs,
provider,
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
attemptedProviders,
attempts,
outputFormat: synthesis.outputFormat,
sampleRate: synthesis.sampleRate,
};
} catch (err) {
errors.push(formatTtsProviderError(provider, err));
const errorMsg = formatTtsProviderError(provider, err);
const latencyMs = Date.now() - providerStart;
errors.push(errorMsg);
attempts.push({
provider,
outcome: "failed",
reasonCode:
err instanceof Error && err.name === "AbortError" ? "timeout" : "provider_error",
latencyMs,
error: errorMsg,
});
const rawError = sanitizeTtsErrorForLog(err);
if (provider === primaryProvider) {
const hasFallbacks = providers.length > 1;
logVerbose(
`TTS telephony: primary provider ${provider} failed (${rawError})${hasFallbacks ? "; trying fallback providers." : "; no fallback providers configured."}`,
);
} else {
logVerbose(`TTS telephony: ${provider} failed (${rawError}); trying next provider.`);
}
}
}
return buildTtsFailureResult(errors, attemptedProviders);
return buildTtsFailureResult(errors, attemptedProviders, attempts);
}
export async function listSpeechVoices(params: {
@ -969,6 +1088,7 @@ export async function maybeApplyTtsToPayload(params: {
provider: result.provider,
fallbackFrom: result.fallbackFrom,
attemptedProviders: result.attemptedProviders,
attempts: result.attempts,
latencyMs: result.latencyMs,
};
@ -988,6 +1108,7 @@ export async function maybeApplyTtsToPayload(params: {
textLength: text.length,
summarized: wasSummarized,
attemptedProviders: result.attemptedProviders,
attempts: result.attempts,
error: result.error,
};

View File

@ -65,6 +65,20 @@ describe("handleTtsCommands status fallback reporting", () => {
provider: "microsoft",
fallbackFrom: "elevenlabs",
attemptedProviders: ["elevenlabs", "microsoft"],
attempts: [
{
provider: "elevenlabs",
outcome: "failed",
reasonCode: "provider_error",
latencyMs: 73,
},
{
provider: "microsoft",
outcome: "success",
reasonCode: "success",
latencyMs: 420,
},
],
latencyMs: 420,
});
@ -72,6 +86,9 @@ describe("handleTtsCommands status fallback reporting", () => {
expect(result?.shouldContinue).toBe(false);
expect(result?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
expect(result?.reply?.text).toContain(
"Attempt details: elevenlabs:failed(provider_error) 73ms, microsoft:success(ok) 420ms",
);
});
it("shows attempted provider chain for failed attempts", async () => {
@ -82,6 +99,14 @@ describe("handleTtsCommands status fallback reporting", () => {
summarized: false,
error: "TTS conversion failed",
attemptedProviders: ["elevenlabs", "microsoft"],
attempts: [
{
provider: "elevenlabs",
outcome: "failed",
reasonCode: "timeout",
latencyMs: 999,
},
],
latencyMs: 420,
});
@ -89,6 +114,7 @@ describe("handleTtsCommands status fallback reporting", () => {
expect(result?.shouldContinue).toBe(false);
expect(result?.reply?.text).toContain("Error: TTS conversion failed");
expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
expect(result?.reply?.text).toContain("Attempt details: elevenlabs:failed(timeout) 999ms");
});
it("persists fallback metadata from /tts audio and renders it in /tts status", async () => {
@ -103,6 +129,20 @@ describe("handleTtsCommands status fallback reporting", () => {
provider: "microsoft",
fallbackFrom: "elevenlabs",
attemptedProviders: ["elevenlabs", "microsoft"],
attempts: [
{
provider: "elevenlabs",
outcome: "failed",
reasonCode: "provider_error",
latencyMs: 65,
},
{
provider: "microsoft",
outcome: "success",
reasonCode: "success",
latencyMs: 175,
},
],
latencyMs: 175,
voiceCompatible: true,
});
@ -116,5 +156,8 @@ describe("handleTtsCommands status fallback reporting", () => {
expect(statusResult?.reply?.text).toContain("Provider: microsoft");
expect(statusResult?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
expect(statusResult?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
expect(statusResult?.reply?.text).toContain(
"Attempt details: elevenlabs:failed(provider_error) 65ms, microsoft:success(ok) 175ms",
);
});
});

View File

@ -29,6 +29,10 @@ type ParsedTtsCommand = {
args: string;
};
type TtsAttemptDetail = NonNullable<
NonNullable<ReturnType<typeof getLastTtsAttempt>>["attempts"]
>[number];
function parseTtsCommand(normalized: string): ParsedTtsCommand | null {
// Accept `/tts` and `/tts <action> [args]` as a single control surface.
if (normalized === "/tts") {
@ -45,6 +49,19 @@ function parseTtsCommand(normalized: string): ParsedTtsCommand | null {
return { action: action.toLowerCase(), args: tail.join(" ").trim() };
}
function formatAttemptDetails(attempts: TtsAttemptDetail[] | undefined): string | undefined {
if (!attempts || attempts.length === 0) {
return undefined;
}
return attempts
.map((attempt) => {
const reason = attempt.reasonCode === "success" ? "ok" : attempt.reasonCode;
const latency = Number.isFinite(attempt.latencyMs) ? ` ${attempt.latencyMs}ms` : "";
return `${attempt.provider}:${attempt.outcome}(${reason})${latency}`;
})
.join(", ");
}
function ttsUsage(): ReplyPayload {
// Keep usage in one place so help/validation stays consistent.
return {
@ -137,6 +154,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
provider: result.provider,
fallbackFrom: result.fallbackFrom,
attemptedProviders: result.attemptedProviders,
attempts: result.attempts,
latencyMs: result.latencyMs,
});
const payload: ReplyPayload = {
@ -153,6 +171,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
textLength: args.length,
summarized: false,
attemptedProviders: result.attemptedProviders,
attempts: result.attempts,
error: result.error,
latencyMs: Date.now() - start,
});
@ -294,12 +313,20 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
if (last.attemptedProviders && last.attemptedProviders.length > 1) {
lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
}
const details = formatAttemptDetails(last.attempts);
if (details) {
lines.push(`Attempt details: ${details}`);
}
lines.push(`Latency: ${last.latencyMs ?? 0}ms`);
} else if (last.error) {
lines.push(`Error: ${last.error}`);
if (last.attemptedProviders && last.attemptedProviders.length > 0) {
lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
}
const details = formatAttemptDetails(last.attempts);
if (details) {
lines.push(`Attempt details: ${details}`);
}
}
}
return { shouldContinue: false, reply: { text: lines.join("\n") } };

View File

@ -38,3 +38,9 @@ export {
normalizeSpeechProviderId,
} from "../tts/provider-registry.js";
export { normalizeTtsAutoMode, TTS_AUTO_MODES } from "../tts/tts-auto-mode.js";
export {
asObject,
readResponseTextLimited,
trimToUndefined,
truncateErrorDetail,
} from "../tts/provider-error-utils.js";

View File

@ -680,6 +680,182 @@ describe("tts", () => {
});
});
describe("fallback readiness errors", () => {
it("continues synthesize fallback when primary readiness checks throw", async () => {
const throwingPrimary: SpeechProviderPlugin = {
id: "openai",
label: "OpenAI",
autoSelectOrder: 10,
resolveConfig: () => ({}),
isConfigured: () => {
throw new Error("Authorization: Bearer sk-readiness-throw-token-1234567890\nboom");
},
synthesize: async () => {
throw new Error("unexpected synthesize call");
},
};
const fallback: SpeechProviderPlugin = {
id: "microsoft",
label: "Microsoft",
autoSelectOrder: 20,
resolveConfig: () => ({}),
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: createAudioBuffer(2),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: true,
}),
};
const registry = createEmptyPluginRegistry();
registry.speechProviders = [
{ pluginId: "openai", provider: throwingPrimary, source: "test" },
{ pluginId: "microsoft", provider: fallback, source: "test" },
];
const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} });
setActivePluginRegistry(registry, cacheKey);
const result = await tts.synthesizeSpeech({
text: "hello fallback",
cfg: {
messages: {
tts: {
provider: "openai",
},
},
},
});
expect(result.success).toBe(true);
if (!result.success) {
throw new Error("expected fallback synthesis success");
}
expect(result.provider).toBe("microsoft");
expect(result.fallbackFrom).toBe("openai");
expect(result.attemptedProviders).toEqual(["openai", "microsoft"]);
expect(result.attempts?.[0]).toMatchObject({
provider: "openai",
outcome: "failed",
reasonCode: "provider_error",
});
expect(result.attempts?.[1]).toMatchObject({
provider: "microsoft",
outcome: "success",
reasonCode: "success",
});
});
it("continues telephony fallback when primary readiness checks throw", async () => {
const throwingPrimary: SpeechProviderPlugin = {
id: "primary-throws",
label: "PrimaryThrows",
autoSelectOrder: 10,
resolveConfig: () => ({}),
isConfigured: () => {
throw new Error("Authorization: Bearer sk-telephony-throw-token-1234567890\tboom");
},
synthesize: async () => {
throw new Error("unexpected synthesize call");
},
};
const fallback: SpeechProviderPlugin = {
id: "microsoft",
label: "Microsoft",
autoSelectOrder: 20,
resolveConfig: () => ({}),
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: createAudioBuffer(2),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: true,
}),
synthesizeTelephony: async () => ({
audioBuffer: createAudioBuffer(2),
outputFormat: "mp3",
sampleRate: 24000,
}),
};
const registry = createEmptyPluginRegistry();
registry.speechProviders = [
{ pluginId: "primary-throws", provider: throwingPrimary, source: "test" },
{ pluginId: "microsoft", provider: fallback, source: "test" },
];
const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} });
setActivePluginRegistry(registry, cacheKey);
const result = await tts.textToSpeechTelephony({
text: "hello telephony fallback",
cfg: {
messages: {
tts: {
provider: "primary-throws",
},
},
},
});
expect(result.success).toBe(true);
if (!result.success) {
throw new Error("expected telephony fallback success");
}
expect(result.provider).toBe("microsoft");
expect(result.fallbackFrom).toBe("primary-throws");
expect(result.attemptedProviders).toEqual(["primary-throws", "microsoft"]);
expect(result.attempts?.[0]).toMatchObject({
provider: "primary-throws",
outcome: "failed",
reasonCode: "provider_error",
});
expect(result.attempts?.[1]).toMatchObject({
provider: "microsoft",
outcome: "success",
reasonCode: "success",
});
});
it("does not double-prefix textToSpeech failure messages", async () => {
const failingProvider: SpeechProviderPlugin = {
id: "openai",
label: "OpenAI",
autoSelectOrder: 10,
resolveConfig: () => ({}),
isConfigured: () => true,
synthesize: async () => {
throw new Error("provider failed");
},
};
const registry = createEmptyPluginRegistry();
registry.speechProviders = [
{ pluginId: "openai", provider: failingProvider, source: "test" },
];
const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} });
setActivePluginRegistry(registry, cacheKey);
const result = await tts.textToSpeech({
text: "hello",
cfg: {
messages: {
tts: {
provider: "openai",
},
},
},
disableFallback: true,
});
expect(result.success).toBe(false);
if (result.success) {
throw new Error("expected synthesis failure");
}
expect(result.error).toBeDefined();
const errorMessage = result.error ?? "";
expect(errorMessage).toBe("TTS conversion failed: openai: provider failed");
expect(errorMessage).not.toContain("TTS conversion failed: TTS conversion failed:");
expect(errorMessage.match(/TTS conversion failed:/g)).toHaveLength(1);
});
});
describe("resolveTtsConfig openai.baseUrl", () => {
const baseCfg: OpenClawConfig = {
agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } },

View File

@ -0,0 +1,62 @@
export function trimToUndefined(value: unknown): string | undefined {
return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
}
export function asObject(value: unknown): Record<string, unknown> | undefined {
return typeof value === "object" && value !== null && !Array.isArray(value)
? (value as Record<string, unknown>)
: undefined;
}
export function truncateErrorDetail(detail: string, limit = 220): string {
return detail.length <= limit ? detail : `${detail.slice(0, limit - 1)}`;
}
export async function readResponseTextLimited(
response: Response,
limitBytes = 16 * 1024,
): Promise<string> {
if (limitBytes <= 0) {
return "";
}
const reader = response.body?.getReader();
if (!reader) {
return "";
}
const decoder = new TextDecoder();
let total = 0;
let text = "";
let reachedLimit = false;
try {
while (true) {
const { value, done } = await reader.read();
if (done) {
break;
}
if (!value || value.byteLength === 0) {
continue;
}
const remaining = limitBytes - total;
if (remaining <= 0) {
reachedLimit = true;
break;
}
const chunk = value.byteLength > remaining ? value.subarray(0, remaining) : value;
total += chunk.byteLength;
text += decoder.decode(chunk, { stream: true });
if (total >= limitBytes) {
reachedLimit = true;
break;
}
}
text += decoder.decode();
} finally {
if (reachedLimit) {
await reader.cancel().catch(() => {});
}
}
return text;
}