mirror of https://github.com/openclaw/openclaw.git
feat(tts): add structured provider diagnostics and fallback attempt analytics (#57954)
* feat(tts): add structured fallback diagnostics and attempt analytics * docs(tts): document attempt-detail and provider error diagnostics * TTS: harden fallback loops and share error helpers * TTS: bound provider error-body reads * tts: add double-prefix regression test and clean baseline drift * tests(tts): satisfy error narrowing in double-prefix regression * changelog Signed-off-by: joshavant <830519+joshavant@users.noreply.github.com> --------- Signed-off-by: joshavant <830519+joshavant@users.noreply.github.com>
This commit is contained in:
parent
329d4bf1a8
commit
44674525f2
|
|
@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai
|
|||
- Matrix/history: add optional room history context for Matrix group triggers via `channels.matrix.historyLimit`, with per-agent watermarks and retry-safe snapshots so failed trigger retries do not drift into newer room messages. (#57022) thanks @chain710.
|
||||
- Diffs: skip unused viewer-versus-file SSR preload work so `diffs` view-only and file-only runs do less render work while keeping mode outputs aligned. (#57909) thanks @gumadeiras.
|
||||
- Matrix/threads: add per-DM `threadReplies` overrides and keep thread session isolation aligned with the effective room or DM thread policy from the triggering message onward. (#57995) thanks @teconomix.
|
||||
- TTS: Add structured provider diagnostics and fallback attempt analytics. (#57954) Thanks @joshavant.
|
||||
|
||||
### Fixes
|
||||
|
||||
|
|
@ -122,6 +123,7 @@ Docs: https://docs.openclaw.ai
|
|||
- Exec/env: block Python package index override variables from request-scoped host exec environment sanitization so package fetches cannot be redirected through a caller-supplied index. Thanks @nexrin and @vincentkoc.
|
||||
- Telegram/audio: transcode Telegram voice-note `.ogg` attachments before the local `whisper-cli` auto fallback runs, and keep mention-preflight transcription enabled in auto mode when `tools.media.audio` is unset.
|
||||
- Matrix/direct rooms: recover fresh auto-joined 1:1 DMs without eagerly persisting invite-only `m.direct` mappings, while keeping named, aliased, and explicitly configured rooms on the room path. (#58024) Thanks @gumadeiras.
|
||||
- TTS: Restore 3.28 schema compatibility and fallback observability. (#57953) Thanks @joshavant.
|
||||
|
||||
## 2026.3.28
|
||||
|
||||
|
|
|
|||
|
|
@ -395,6 +395,8 @@ Notes:
|
|||
- `/tts status` includes fallback visibility for the latest attempt:
|
||||
- success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
|
||||
- failure: `Error: ...` plus `Attempts: ...`
|
||||
- detailed diagnostics: `Attempt details: provider:outcome(reasonCode) latency`
|
||||
- OpenAI and ElevenLabs API failures now include parsed provider error detail and request id (when returned by the provider), which is surfaced in TTS errors/logs.
|
||||
|
||||
## Agent tool
|
||||
|
||||
|
|
|
|||
|
|
@ -395,6 +395,8 @@ Notes:
|
|||
- `/tts status` includes fallback visibility for the latest attempt:
|
||||
- success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
|
||||
- failure: `Error: ...` plus `Attempts: ...`
|
||||
- detailed diagnostics: `Attempt details: provider:outcome(reasonCode) latency`
|
||||
- OpenAI and ElevenLabs API failures now include parsed provider error detail and request id (when returned by the provider), which is surfaced in TTS errors/logs.
|
||||
|
||||
## Agent tool
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,133 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { elevenLabsTTS } from "./tts.js";
|
||||
|
||||
describe("elevenlabs tts diagnostics", () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
function createStreamingErrorResponse(params: {
|
||||
status: number;
|
||||
chunkCount: number;
|
||||
chunkSize: number;
|
||||
byte: number;
|
||||
}): { response: Response; getReadCount: () => number } {
|
||||
let reads = 0;
|
||||
const stream = new ReadableStream<Uint8Array>({
|
||||
pull(controller) {
|
||||
if (reads >= params.chunkCount) {
|
||||
controller.close();
|
||||
return;
|
||||
}
|
||||
reads += 1;
|
||||
controller.enqueue(new Uint8Array(params.chunkSize).fill(params.byte));
|
||||
},
|
||||
});
|
||||
return {
|
||||
response: new Response(stream, { status: params.status }),
|
||||
getReadCount: () => reads,
|
||||
};
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("includes parsed provider detail and request id for JSON API errors", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
detail: {
|
||||
message: "Quota exceeded",
|
||||
status: "quota_exceeded",
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 429,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"x-request-id": "el_req_456",
|
||||
},
|
||||
},
|
||||
),
|
||||
);
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await expect(
|
||||
elevenLabsTTS({
|
||||
text: "hello",
|
||||
apiKey: "test-key",
|
||||
baseUrl: "https://api.elevenlabs.io",
|
||||
voiceId: "pMsXgVXv3BLzUgSXRplE",
|
||||
modelId: "eleven_multilingual_v2",
|
||||
outputFormat: "mp3_44100_128",
|
||||
voiceSettings: {
|
||||
stability: 0.5,
|
||||
similarityBoost: 0.75,
|
||||
style: 0,
|
||||
useSpeakerBoost: true,
|
||||
speed: 1.0,
|
||||
},
|
||||
timeoutMs: 5_000,
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
"ElevenLabs API error (429): Quota exceeded [code=quota_exceeded] [request_id=el_req_456]",
|
||||
);
|
||||
});
|
||||
|
||||
it("falls back to raw body text when the error body is non-JSON", async () => {
|
||||
const fetchMock = vi.fn(async () => new Response("service unavailable", { status: 503 }));
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await expect(
|
||||
elevenLabsTTS({
|
||||
text: "hello",
|
||||
apiKey: "test-key",
|
||||
baseUrl: "https://api.elevenlabs.io",
|
||||
voiceId: "pMsXgVXv3BLzUgSXRplE",
|
||||
modelId: "eleven_multilingual_v2",
|
||||
outputFormat: "mp3_44100_128",
|
||||
voiceSettings: {
|
||||
stability: 0.5,
|
||||
similarityBoost: 0.75,
|
||||
style: 0,
|
||||
useSpeakerBoost: true,
|
||||
speed: 1.0,
|
||||
},
|
||||
timeoutMs: 5_000,
|
||||
}),
|
||||
).rejects.toThrow("ElevenLabs API error (503): service unavailable");
|
||||
});
|
||||
|
||||
it("caps streamed non-JSON error reads instead of consuming full response bodies", async () => {
|
||||
const streamed = createStreamingErrorResponse({
|
||||
status: 503,
|
||||
chunkCount: 200,
|
||||
chunkSize: 1024,
|
||||
byte: 121,
|
||||
});
|
||||
const fetchMock = vi.fn(async () => streamed.response);
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await expect(
|
||||
elevenLabsTTS({
|
||||
text: "hello",
|
||||
apiKey: "test-key",
|
||||
baseUrl: "https://api.elevenlabs.io",
|
||||
voiceId: "pMsXgVXv3BLzUgSXRplE",
|
||||
modelId: "eleven_multilingual_v2",
|
||||
outputFormat: "mp3_44100_128",
|
||||
voiceSettings: {
|
||||
stability: 0.5,
|
||||
similarityBoost: 0.75,
|
||||
style: 0,
|
||||
useSpeakerBoost: true,
|
||||
speed: 1.0,
|
||||
},
|
||||
timeoutMs: 5_000,
|
||||
}),
|
||||
).rejects.toThrow("ElevenLabs API error (503)");
|
||||
|
||||
expect(streamed.getReadCount()).toBeLessThan(200);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,8 +1,12 @@
|
|||
import {
|
||||
asObject,
|
||||
normalizeApplyTextNormalization,
|
||||
normalizeLanguageCode,
|
||||
normalizeSeed,
|
||||
readResponseTextLimited,
|
||||
requireInRange,
|
||||
trimToUndefined,
|
||||
truncateErrorDetail,
|
||||
} from "openclaw/plugin-sdk/speech";
|
||||
|
||||
const DEFAULT_ELEVENLABS_BASE_URL = "https://api.elevenlabs.io";
|
||||
|
|
@ -19,6 +23,45 @@ function normalizeElevenLabsBaseUrl(baseUrl?: string): string {
|
|||
return trimmed.replace(/\/+$/, "");
|
||||
}
|
||||
|
||||
function formatElevenLabsErrorPayload(payload: unknown): string | undefined {
|
||||
const root = asObject(payload);
|
||||
if (!root) {
|
||||
return undefined;
|
||||
}
|
||||
const detailObject = asObject(root.detail);
|
||||
const message =
|
||||
trimToUndefined(root.message) ??
|
||||
trimToUndefined(detailObject?.message) ??
|
||||
trimToUndefined(detailObject?.detail) ??
|
||||
trimToUndefined(root.error);
|
||||
const code =
|
||||
trimToUndefined(root.code) ??
|
||||
trimToUndefined(detailObject?.code) ??
|
||||
trimToUndefined(detailObject?.status);
|
||||
if (message && code) {
|
||||
return `${truncateErrorDetail(message)} [code=${code}]`;
|
||||
}
|
||||
if (message) {
|
||||
return truncateErrorDetail(message);
|
||||
}
|
||||
if (code) {
|
||||
return `[code=${code}]`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async function extractElevenLabsErrorDetail(response: Response): Promise<string | undefined> {
|
||||
const rawBody = trimToUndefined(await readResponseTextLimited(response));
|
||||
if (!rawBody) {
|
||||
return undefined;
|
||||
}
|
||||
try {
|
||||
return formatElevenLabsErrorPayload(JSON.parse(rawBody)) ?? truncateErrorDetail(rawBody);
|
||||
} catch {
|
||||
return truncateErrorDetail(rawBody);
|
||||
}
|
||||
}
|
||||
|
||||
function assertElevenLabsVoiceSettings(settings: {
|
||||
stability: number;
|
||||
similarityBoost: number;
|
||||
|
|
@ -106,7 +149,15 @@ export async function elevenLabsTTS(params: {
|
|||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`ElevenLabs API error (${response.status})`);
|
||||
const detail = await extractElevenLabsErrorDetail(response);
|
||||
const requestId =
|
||||
trimToUndefined(response.headers.get("x-request-id")) ??
|
||||
trimToUndefined(response.headers.get("request-id"));
|
||||
throw new Error(
|
||||
`ElevenLabs API error (${response.status})` +
|
||||
(detail ? `: ${detail}` : "") +
|
||||
(requestId ? ` [request_id=${requestId}]` : ""),
|
||||
);
|
||||
}
|
||||
|
||||
return Buffer.from(await response.arrayBuffer());
|
||||
|
|
|
|||
|
|
@ -1,13 +1,21 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
isValidOpenAIModel,
|
||||
isValidOpenAIVoice,
|
||||
OPENAI_TTS_MODELS,
|
||||
OPENAI_TTS_VOICES,
|
||||
openaiTTS,
|
||||
resolveOpenAITtsInstructions,
|
||||
} from "./tts.js";
|
||||
|
||||
describe("openai tts", () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe("isValidOpenAIVoice", () => {
|
||||
it("accepts all valid OpenAI voices including newer additions", () => {
|
||||
for (const voice of OPENAI_TTS_VOICES) {
|
||||
|
|
@ -70,4 +78,110 @@ describe("openai tts", () => {
|
|||
expect(resolveOpenAITtsInstructions("gpt-4o-mini-tts", " ")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("openaiTTS diagnostics", () => {
|
||||
function createStreamingErrorResponse(params: {
|
||||
status: number;
|
||||
chunkCount: number;
|
||||
chunkSize: number;
|
||||
byte: number;
|
||||
}): { response: Response; getReadCount: () => number } {
|
||||
let reads = 0;
|
||||
const stream = new ReadableStream<Uint8Array>({
|
||||
pull(controller) {
|
||||
if (reads >= params.chunkCount) {
|
||||
controller.close();
|
||||
return;
|
||||
}
|
||||
reads += 1;
|
||||
controller.enqueue(new Uint8Array(params.chunkSize).fill(params.byte));
|
||||
},
|
||||
});
|
||||
return {
|
||||
response: new Response(stream, { status: params.status }),
|
||||
getReadCount: () => reads,
|
||||
};
|
||||
}
|
||||
|
||||
it("includes parsed provider detail and request id for JSON API errors", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
error: {
|
||||
message: "Invalid API key",
|
||||
type: "invalid_request_error",
|
||||
code: "invalid_api_key",
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 401,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"x-request-id": "req_123",
|
||||
},
|
||||
},
|
||||
),
|
||||
);
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await expect(
|
||||
openaiTTS({
|
||||
text: "hello",
|
||||
apiKey: "bad-key",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "alloy",
|
||||
responseFormat: "mp3",
|
||||
timeoutMs: 5_000,
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
"OpenAI TTS API error (401): Invalid API key [type=invalid_request_error, code=invalid_api_key] [request_id=req_123]",
|
||||
);
|
||||
});
|
||||
|
||||
it("falls back to raw body text when the error body is non-JSON", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () => new Response("temporary upstream outage", { status: 503 }),
|
||||
);
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await expect(
|
||||
openaiTTS({
|
||||
text: "hello",
|
||||
apiKey: "test-key",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "alloy",
|
||||
responseFormat: "mp3",
|
||||
timeoutMs: 5_000,
|
||||
}),
|
||||
).rejects.toThrow("OpenAI TTS API error (503): temporary upstream outage");
|
||||
});
|
||||
|
||||
it("caps streamed non-JSON error reads instead of consuming full response bodies", async () => {
|
||||
const streamed = createStreamingErrorResponse({
|
||||
status: 503,
|
||||
chunkCount: 200,
|
||||
chunkSize: 1024,
|
||||
byte: 120,
|
||||
});
|
||||
const fetchMock = vi.fn(async () => streamed.response);
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await expect(
|
||||
openaiTTS({
|
||||
text: "hello",
|
||||
apiKey: "test-key",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "alloy",
|
||||
responseFormat: "mp3",
|
||||
timeoutMs: 5_000,
|
||||
}),
|
||||
).rejects.toThrow("OpenAI TTS API error (503)");
|
||||
|
||||
expect(streamed.getReadCount()).toBeLessThan(200);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,3 +1,10 @@
|
|||
import {
|
||||
asObject,
|
||||
readResponseTextLimited,
|
||||
trimToUndefined,
|
||||
truncateErrorDetail,
|
||||
} from "openclaw/plugin-sdk/speech";
|
||||
|
||||
export const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
||||
|
||||
export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"] as const;
|
||||
|
|
@ -58,6 +65,45 @@ export function resolveOpenAITtsInstructions(
|
|||
return next && model.includes("gpt-4o-mini-tts") ? next : undefined;
|
||||
}
|
||||
|
||||
function formatOpenAiErrorPayload(payload: unknown): string | undefined {
|
||||
const root = asObject(payload);
|
||||
const subject = asObject(root?.error) ?? root;
|
||||
if (!subject) {
|
||||
return undefined;
|
||||
}
|
||||
const message =
|
||||
trimToUndefined(subject.message) ??
|
||||
trimToUndefined(subject.detail) ??
|
||||
trimToUndefined(root?.message);
|
||||
const type = trimToUndefined(subject.type);
|
||||
const code = trimToUndefined(subject.code);
|
||||
const metadata = [type ? `type=${type}` : undefined, code ? `code=${code}` : undefined]
|
||||
.filter((value): value is string => Boolean(value))
|
||||
.join(", ");
|
||||
if (message && metadata) {
|
||||
return `${truncateErrorDetail(message)} [${metadata}]`;
|
||||
}
|
||||
if (message) {
|
||||
return truncateErrorDetail(message);
|
||||
}
|
||||
if (metadata) {
|
||||
return `[${metadata}]`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async function extractOpenAiErrorDetail(response: Response): Promise<string | undefined> {
|
||||
const rawBody = trimToUndefined(await readResponseTextLimited(response));
|
||||
if (!rawBody) {
|
||||
return undefined;
|
||||
}
|
||||
try {
|
||||
return formatOpenAiErrorPayload(JSON.parse(rawBody)) ?? truncateErrorDetail(rawBody);
|
||||
} catch {
|
||||
return truncateErrorDetail(rawBody);
|
||||
}
|
||||
}
|
||||
|
||||
export async function openaiTTS(params: {
|
||||
text: string;
|
||||
apiKey: string;
|
||||
|
|
@ -102,7 +148,15 @@ export async function openaiTTS(params: {
|
|||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`OpenAI TTS API error (${response.status})`);
|
||||
const detail = await extractOpenAiErrorDetail(response);
|
||||
const requestId =
|
||||
trimToUndefined(response.headers.get("x-request-id")) ??
|
||||
trimToUndefined(response.headers.get("request-id"));
|
||||
throw new Error(
|
||||
`OpenAI TTS API error (${response.status})` +
|
||||
(detail ? `: ${detail}` : "") +
|
||||
(requestId ? ` [request_id=${requestId}]` : ""),
|
||||
);
|
||||
}
|
||||
|
||||
return Buffer.from(await response.arrayBuffer());
|
||||
|
|
|
|||
|
|
@ -74,6 +74,22 @@ type TtsUserPrefs = {
|
|||
|
||||
export type ResolvedTtsModelOverrides = SpeechModelOverridePolicy;
|
||||
|
||||
export type TtsAttemptReasonCode =
|
||||
| "success"
|
||||
| "no_provider_registered"
|
||||
| "not_configured"
|
||||
| "unsupported_for_telephony"
|
||||
| "timeout"
|
||||
| "provider_error";
|
||||
|
||||
export type TtsProviderAttempt = {
|
||||
provider: string;
|
||||
outcome: "success" | "skipped" | "failed";
|
||||
reasonCode: TtsAttemptReasonCode;
|
||||
latencyMs?: number;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
export type TtsResult = {
|
||||
success: boolean;
|
||||
audioPath?: string;
|
||||
|
|
@ -82,6 +98,7 @@ export type TtsResult = {
|
|||
provider?: string;
|
||||
fallbackFrom?: string;
|
||||
attemptedProviders?: string[];
|
||||
attempts?: TtsProviderAttempt[];
|
||||
outputFormat?: string;
|
||||
voiceCompatible?: boolean;
|
||||
};
|
||||
|
|
@ -94,6 +111,7 @@ export type TtsSynthesisResult = {
|
|||
provider?: string;
|
||||
fallbackFrom?: string;
|
||||
attemptedProviders?: string[];
|
||||
attempts?: TtsProviderAttempt[];
|
||||
outputFormat?: string;
|
||||
voiceCompatible?: boolean;
|
||||
fileExtension?: string;
|
||||
|
|
@ -107,6 +125,7 @@ export type TtsTelephonyResult = {
|
|||
provider?: string;
|
||||
fallbackFrom?: string;
|
||||
attemptedProviders?: string[];
|
||||
attempts?: TtsProviderAttempt[];
|
||||
outputFormat?: string;
|
||||
sampleRate?: number;
|
||||
};
|
||||
|
|
@ -119,6 +138,7 @@ type TtsStatusEntry = {
|
|||
provider?: string;
|
||||
fallbackFrom?: string;
|
||||
attemptedProviders?: string[];
|
||||
attempts?: TtsProviderAttempt[];
|
||||
latencyMs?: number;
|
||||
error?: string;
|
||||
};
|
||||
|
|
@ -556,25 +576,46 @@ function sanitizeTtsErrorForLog(err: unknown): string {
|
|||
function buildTtsFailureResult(
|
||||
errors: string[],
|
||||
attemptedProviders?: string[],
|
||||
): { success: false; error: string; attemptedProviders?: string[] } {
|
||||
attempts?: TtsProviderAttempt[],
|
||||
): {
|
||||
success: false;
|
||||
error: string;
|
||||
attemptedProviders?: string[];
|
||||
attempts?: TtsProviderAttempt[];
|
||||
} {
|
||||
return {
|
||||
success: false,
|
||||
error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`,
|
||||
attemptedProviders,
|
||||
attempts,
|
||||
};
|
||||
}
|
||||
|
||||
type TtsProviderReadyResolution =
|
||||
| {
|
||||
kind: "ready";
|
||||
provider: NonNullable<ReturnType<typeof getSpeechProvider>>;
|
||||
providerConfig: SpeechProviderConfig;
|
||||
}
|
||||
| {
|
||||
kind: "skip";
|
||||
reasonCode: "no_provider_registered" | "not_configured" | "unsupported_for_telephony";
|
||||
message: string;
|
||||
};
|
||||
|
||||
function resolveReadySpeechProvider(params: {
|
||||
provider: TtsProvider;
|
||||
cfg: OpenClawConfig;
|
||||
config: ResolvedTtsConfig;
|
||||
errors: string[];
|
||||
requireTelephony?: boolean;
|
||||
}): NonNullable<ReturnType<typeof getSpeechProvider>> | null {
|
||||
}): TtsProviderReadyResolution {
|
||||
const resolvedProvider = getSpeechProvider(params.provider, params.cfg);
|
||||
if (!resolvedProvider) {
|
||||
params.errors.push(`${params.provider}: no provider registered`);
|
||||
return null;
|
||||
return {
|
||||
kind: "skip",
|
||||
reasonCode: "no_provider_registered",
|
||||
message: `${params.provider}: no provider registered`,
|
||||
};
|
||||
}
|
||||
const providerConfig = getResolvedSpeechProviderConfig(
|
||||
params.config,
|
||||
|
|
@ -588,14 +629,24 @@ function resolveReadySpeechProvider(params: {
|
|||
timeoutMs: params.config.timeoutMs,
|
||||
})
|
||||
) {
|
||||
params.errors.push(`${params.provider}: not configured`);
|
||||
return null;
|
||||
return {
|
||||
kind: "skip",
|
||||
reasonCode: "not_configured",
|
||||
message: `${params.provider}: not configured`,
|
||||
};
|
||||
}
|
||||
if (params.requireTelephony && !resolvedProvider.synthesizeTelephony) {
|
||||
params.errors.push(`${params.provider}: unsupported for telephony`);
|
||||
return null;
|
||||
return {
|
||||
kind: "skip",
|
||||
reasonCode: "unsupported_for_telephony",
|
||||
message: `${params.provider}: unsupported for telephony`,
|
||||
};
|
||||
}
|
||||
return resolvedProvider;
|
||||
return {
|
||||
kind: "ready",
|
||||
provider: resolvedProvider,
|
||||
providerConfig,
|
||||
};
|
||||
}
|
||||
|
||||
function resolveTtsRequestSetup(params: {
|
||||
|
|
@ -639,10 +690,12 @@ export async function textToSpeech(params: {
|
|||
}): Promise<TtsResult> {
|
||||
const synthesis = await synthesizeSpeech(params);
|
||||
if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
|
||||
return buildTtsFailureResult(
|
||||
[synthesis.error ?? "TTS conversion failed"],
|
||||
synthesis.attemptedProviders,
|
||||
);
|
||||
return {
|
||||
success: false,
|
||||
error: synthesis.error ?? "TTS conversion failed",
|
||||
attemptedProviders: synthesis.attemptedProviders,
|
||||
attempts: synthesis.attempts,
|
||||
};
|
||||
}
|
||||
|
||||
const tempRoot = resolvePreferredOpenClawTmpDir();
|
||||
|
|
@ -659,6 +712,7 @@ export async function textToSpeech(params: {
|
|||
provider: synthesis.provider,
|
||||
fallbackFrom: synthesis.fallbackFrom,
|
||||
attemptedProviders: synthesis.attemptedProviders,
|
||||
attempts: synthesis.attempts,
|
||||
outputFormat: synthesis.outputFormat,
|
||||
voiceCompatible: synthesis.voiceCompatible,
|
||||
};
|
||||
|
|
@ -689,6 +743,7 @@ export async function synthesizeSpeech(params: {
|
|||
|
||||
const errors: string[] = [];
|
||||
const attemptedProviders: string[] = [];
|
||||
const attempts: TtsProviderAttempt[] = [];
|
||||
const primaryProvider = providers[0];
|
||||
logVerbose(
|
||||
`TTS: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
|
||||
|
|
@ -702,34 +757,57 @@ export async function synthesizeSpeech(params: {
|
|||
provider,
|
||||
cfg: params.cfg,
|
||||
config,
|
||||
errors,
|
||||
});
|
||||
if (!resolvedProvider) {
|
||||
logVerbose(`TTS: provider ${provider} skipped (${errors[errors.length - 1]})`);
|
||||
if (resolvedProvider.kind === "skip") {
|
||||
errors.push(resolvedProvider.message);
|
||||
attempts.push({
|
||||
provider,
|
||||
outcome: "skipped",
|
||||
reasonCode: resolvedProvider.reasonCode,
|
||||
error: resolvedProvider.message,
|
||||
});
|
||||
logVerbose(`TTS: provider ${provider} skipped (${resolvedProvider.message})`);
|
||||
continue;
|
||||
}
|
||||
const synthesis = await resolvedProvider.synthesize({
|
||||
const synthesis = await resolvedProvider.provider.synthesize({
|
||||
text: params.text,
|
||||
cfg: params.cfg,
|
||||
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg),
|
||||
providerConfig: resolvedProvider.providerConfig,
|
||||
target,
|
||||
providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.id],
|
||||
providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.provider.id],
|
||||
timeoutMs: config.timeoutMs,
|
||||
});
|
||||
const latencyMs = Date.now() - providerStart;
|
||||
attempts.push({
|
||||
provider,
|
||||
outcome: "success",
|
||||
reasonCode: "success",
|
||||
latencyMs,
|
||||
});
|
||||
return {
|
||||
success: true,
|
||||
audioBuffer: synthesis.audioBuffer,
|
||||
latencyMs: Date.now() - providerStart,
|
||||
latencyMs,
|
||||
provider,
|
||||
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
|
||||
attemptedProviders,
|
||||
attempts,
|
||||
outputFormat: synthesis.outputFormat,
|
||||
voiceCompatible: synthesis.voiceCompatible,
|
||||
fileExtension: synthesis.fileExtension,
|
||||
};
|
||||
} catch (err) {
|
||||
const errorMsg = formatTtsProviderError(provider, err);
|
||||
const latencyMs = Date.now() - providerStart;
|
||||
errors.push(errorMsg);
|
||||
attempts.push({
|
||||
provider,
|
||||
outcome: "failed",
|
||||
reasonCode:
|
||||
err instanceof Error && err.name === "AbortError" ? "timeout" : "provider_error",
|
||||
latencyMs,
|
||||
error: errorMsg,
|
||||
});
|
||||
const rawError = sanitizeTtsErrorForLog(err);
|
||||
if (provider === primaryProvider) {
|
||||
const hasFallbacks = providers.length > 1;
|
||||
|
|
@ -742,7 +820,7 @@ export async function synthesizeSpeech(params: {
|
|||
}
|
||||
}
|
||||
|
||||
return buildTtsFailureResult(errors, attemptedProviders);
|
||||
return buildTtsFailureResult(errors, attemptedProviders, attempts);
|
||||
}
|
||||
|
||||
export async function textToSpeechTelephony(params: {
|
||||
|
|
@ -762,7 +840,11 @@ export async function textToSpeechTelephony(params: {
|
|||
const { config, providers } = setup;
|
||||
const errors: string[] = [];
|
||||
const attemptedProviders: string[] = [];
|
||||
const attempts: TtsProviderAttempt[] = [];
|
||||
const primaryProvider = providers[0];
|
||||
logVerbose(
|
||||
`TTS telephony: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
|
||||
);
|
||||
|
||||
for (const provider of providers) {
|
||||
attemptedProviders.push(provider);
|
||||
|
|
@ -772,35 +854,72 @@ export async function textToSpeechTelephony(params: {
|
|||
provider,
|
||||
cfg: params.cfg,
|
||||
config,
|
||||
errors,
|
||||
requireTelephony: true,
|
||||
});
|
||||
if (!resolvedProvider?.synthesizeTelephony) {
|
||||
if (resolvedProvider.kind === "skip") {
|
||||
errors.push(resolvedProvider.message);
|
||||
attempts.push({
|
||||
provider,
|
||||
outcome: "skipped",
|
||||
reasonCode: resolvedProvider.reasonCode,
|
||||
error: resolvedProvider.message,
|
||||
});
|
||||
logVerbose(`TTS telephony: provider ${provider} skipped (${resolvedProvider.message})`);
|
||||
continue;
|
||||
}
|
||||
const synthesis = await resolvedProvider.synthesizeTelephony({
|
||||
const synthesizeTelephony = resolvedProvider.provider.synthesizeTelephony as NonNullable<
|
||||
typeof resolvedProvider.provider.synthesizeTelephony
|
||||
>;
|
||||
const synthesis = await synthesizeTelephony({
|
||||
text: params.text,
|
||||
cfg: params.cfg,
|
||||
providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg),
|
||||
providerConfig: resolvedProvider.providerConfig,
|
||||
timeoutMs: config.timeoutMs,
|
||||
});
|
||||
const latencyMs = Date.now() - providerStart;
|
||||
attempts.push({
|
||||
provider,
|
||||
outcome: "success",
|
||||
reasonCode: "success",
|
||||
latencyMs,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioBuffer: synthesis.audioBuffer,
|
||||
latencyMs: Date.now() - providerStart,
|
||||
latencyMs,
|
||||
provider,
|
||||
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
|
||||
attemptedProviders,
|
||||
attempts,
|
||||
outputFormat: synthesis.outputFormat,
|
||||
sampleRate: synthesis.sampleRate,
|
||||
};
|
||||
} catch (err) {
|
||||
errors.push(formatTtsProviderError(provider, err));
|
||||
const errorMsg = formatTtsProviderError(provider, err);
|
||||
const latencyMs = Date.now() - providerStart;
|
||||
errors.push(errorMsg);
|
||||
attempts.push({
|
||||
provider,
|
||||
outcome: "failed",
|
||||
reasonCode:
|
||||
err instanceof Error && err.name === "AbortError" ? "timeout" : "provider_error",
|
||||
latencyMs,
|
||||
error: errorMsg,
|
||||
});
|
||||
const rawError = sanitizeTtsErrorForLog(err);
|
||||
if (provider === primaryProvider) {
|
||||
const hasFallbacks = providers.length > 1;
|
||||
logVerbose(
|
||||
`TTS telephony: primary provider ${provider} failed (${rawError})${hasFallbacks ? "; trying fallback providers." : "; no fallback providers configured."}`,
|
||||
);
|
||||
} else {
|
||||
logVerbose(`TTS telephony: ${provider} failed (${rawError}); trying next provider.`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return buildTtsFailureResult(errors, attemptedProviders);
|
||||
return buildTtsFailureResult(errors, attemptedProviders, attempts);
|
||||
}
|
||||
|
||||
export async function listSpeechVoices(params: {
|
||||
|
|
@ -969,6 +1088,7 @@ export async function maybeApplyTtsToPayload(params: {
|
|||
provider: result.provider,
|
||||
fallbackFrom: result.fallbackFrom,
|
||||
attemptedProviders: result.attemptedProviders,
|
||||
attempts: result.attempts,
|
||||
latencyMs: result.latencyMs,
|
||||
};
|
||||
|
||||
|
|
@ -988,6 +1108,7 @@ export async function maybeApplyTtsToPayload(params: {
|
|||
textLength: text.length,
|
||||
summarized: wasSummarized,
|
||||
attemptedProviders: result.attemptedProviders,
|
||||
attempts: result.attempts,
|
||||
error: result.error,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -65,6 +65,20 @@ describe("handleTtsCommands status fallback reporting", () => {
|
|||
provider: "microsoft",
|
||||
fallbackFrom: "elevenlabs",
|
||||
attemptedProviders: ["elevenlabs", "microsoft"],
|
||||
attempts: [
|
||||
{
|
||||
provider: "elevenlabs",
|
||||
outcome: "failed",
|
||||
reasonCode: "provider_error",
|
||||
latencyMs: 73,
|
||||
},
|
||||
{
|
||||
provider: "microsoft",
|
||||
outcome: "success",
|
||||
reasonCode: "success",
|
||||
latencyMs: 420,
|
||||
},
|
||||
],
|
||||
latencyMs: 420,
|
||||
});
|
||||
|
||||
|
|
@ -72,6 +86,9 @@ describe("handleTtsCommands status fallback reporting", () => {
|
|||
expect(result?.shouldContinue).toBe(false);
|
||||
expect(result?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
|
||||
expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
|
||||
expect(result?.reply?.text).toContain(
|
||||
"Attempt details: elevenlabs:failed(provider_error) 73ms, microsoft:success(ok) 420ms",
|
||||
);
|
||||
});
|
||||
|
||||
it("shows attempted provider chain for failed attempts", async () => {
|
||||
|
|
@ -82,6 +99,14 @@ describe("handleTtsCommands status fallback reporting", () => {
|
|||
summarized: false,
|
||||
error: "TTS conversion failed",
|
||||
attemptedProviders: ["elevenlabs", "microsoft"],
|
||||
attempts: [
|
||||
{
|
||||
provider: "elevenlabs",
|
||||
outcome: "failed",
|
||||
reasonCode: "timeout",
|
||||
latencyMs: 999,
|
||||
},
|
||||
],
|
||||
latencyMs: 420,
|
||||
});
|
||||
|
||||
|
|
@ -89,6 +114,7 @@ describe("handleTtsCommands status fallback reporting", () => {
|
|||
expect(result?.shouldContinue).toBe(false);
|
||||
expect(result?.reply?.text).toContain("Error: TTS conversion failed");
|
||||
expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
|
||||
expect(result?.reply?.text).toContain("Attempt details: elevenlabs:failed(timeout) 999ms");
|
||||
});
|
||||
|
||||
it("persists fallback metadata from /tts audio and renders it in /tts status", async () => {
|
||||
|
|
@ -103,6 +129,20 @@ describe("handleTtsCommands status fallback reporting", () => {
|
|||
provider: "microsoft",
|
||||
fallbackFrom: "elevenlabs",
|
||||
attemptedProviders: ["elevenlabs", "microsoft"],
|
||||
attempts: [
|
||||
{
|
||||
provider: "elevenlabs",
|
||||
outcome: "failed",
|
||||
reasonCode: "provider_error",
|
||||
latencyMs: 65,
|
||||
},
|
||||
{
|
||||
provider: "microsoft",
|
||||
outcome: "success",
|
||||
reasonCode: "success",
|
||||
latencyMs: 175,
|
||||
},
|
||||
],
|
||||
latencyMs: 175,
|
||||
voiceCompatible: true,
|
||||
});
|
||||
|
|
@ -116,5 +156,8 @@ describe("handleTtsCommands status fallback reporting", () => {
|
|||
expect(statusResult?.reply?.text).toContain("Provider: microsoft");
|
||||
expect(statusResult?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
|
||||
expect(statusResult?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
|
||||
expect(statusResult?.reply?.text).toContain(
|
||||
"Attempt details: elevenlabs:failed(provider_error) 65ms, microsoft:success(ok) 175ms",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -29,6 +29,10 @@ type ParsedTtsCommand = {
|
|||
args: string;
|
||||
};
|
||||
|
||||
type TtsAttemptDetail = NonNullable<
|
||||
NonNullable<ReturnType<typeof getLastTtsAttempt>>["attempts"]
|
||||
>[number];
|
||||
|
||||
function parseTtsCommand(normalized: string): ParsedTtsCommand | null {
|
||||
// Accept `/tts` and `/tts <action> [args]` as a single control surface.
|
||||
if (normalized === "/tts") {
|
||||
|
|
@ -45,6 +49,19 @@ function parseTtsCommand(normalized: string): ParsedTtsCommand | null {
|
|||
return { action: action.toLowerCase(), args: tail.join(" ").trim() };
|
||||
}
|
||||
|
||||
function formatAttemptDetails(attempts: TtsAttemptDetail[] | undefined): string | undefined {
|
||||
if (!attempts || attempts.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return attempts
|
||||
.map((attempt) => {
|
||||
const reason = attempt.reasonCode === "success" ? "ok" : attempt.reasonCode;
|
||||
const latency = Number.isFinite(attempt.latencyMs) ? ` ${attempt.latencyMs}ms` : "";
|
||||
return `${attempt.provider}:${attempt.outcome}(${reason})${latency}`;
|
||||
})
|
||||
.join(", ");
|
||||
}
|
||||
|
||||
function ttsUsage(): ReplyPayload {
|
||||
// Keep usage in one place so help/validation stays consistent.
|
||||
return {
|
||||
|
|
@ -137,6 +154,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
|
|||
provider: result.provider,
|
||||
fallbackFrom: result.fallbackFrom,
|
||||
attemptedProviders: result.attemptedProviders,
|
||||
attempts: result.attempts,
|
||||
latencyMs: result.latencyMs,
|
||||
});
|
||||
const payload: ReplyPayload = {
|
||||
|
|
@ -153,6 +171,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
|
|||
textLength: args.length,
|
||||
summarized: false,
|
||||
attemptedProviders: result.attemptedProviders,
|
||||
attempts: result.attempts,
|
||||
error: result.error,
|
||||
latencyMs: Date.now() - start,
|
||||
});
|
||||
|
|
@ -294,12 +313,20 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
|
|||
if (last.attemptedProviders && last.attemptedProviders.length > 1) {
|
||||
lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
|
||||
}
|
||||
const details = formatAttemptDetails(last.attempts);
|
||||
if (details) {
|
||||
lines.push(`Attempt details: ${details}`);
|
||||
}
|
||||
lines.push(`Latency: ${last.latencyMs ?? 0}ms`);
|
||||
} else if (last.error) {
|
||||
lines.push(`Error: ${last.error}`);
|
||||
if (last.attemptedProviders && last.attemptedProviders.length > 0) {
|
||||
lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
|
||||
}
|
||||
const details = formatAttemptDetails(last.attempts);
|
||||
if (details) {
|
||||
lines.push(`Attempt details: ${details}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
return { shouldContinue: false, reply: { text: lines.join("\n") } };
|
||||
|
|
|
|||
|
|
@ -38,3 +38,9 @@ export {
|
|||
normalizeSpeechProviderId,
|
||||
} from "../tts/provider-registry.js";
|
||||
export { normalizeTtsAutoMode, TTS_AUTO_MODES } from "../tts/tts-auto-mode.js";
|
||||
export {
|
||||
asObject,
|
||||
readResponseTextLimited,
|
||||
trimToUndefined,
|
||||
truncateErrorDetail,
|
||||
} from "../tts/provider-error-utils.js";
|
||||
|
|
|
|||
|
|
@ -680,6 +680,182 @@ describe("tts", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("fallback readiness errors", () => {
|
||||
it("continues synthesize fallback when primary readiness checks throw", async () => {
|
||||
const throwingPrimary: SpeechProviderPlugin = {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
autoSelectOrder: 10,
|
||||
resolveConfig: () => ({}),
|
||||
isConfigured: () => {
|
||||
throw new Error("Authorization: Bearer sk-readiness-throw-token-1234567890\nboom");
|
||||
},
|
||||
synthesize: async () => {
|
||||
throw new Error("unexpected synthesize call");
|
||||
},
|
||||
};
|
||||
const fallback: SpeechProviderPlugin = {
|
||||
id: "microsoft",
|
||||
label: "Microsoft",
|
||||
autoSelectOrder: 20,
|
||||
resolveConfig: () => ({}),
|
||||
isConfigured: () => true,
|
||||
synthesize: async () => ({
|
||||
audioBuffer: createAudioBuffer(2),
|
||||
outputFormat: "mp3",
|
||||
fileExtension: ".mp3",
|
||||
voiceCompatible: true,
|
||||
}),
|
||||
};
|
||||
const registry = createEmptyPluginRegistry();
|
||||
registry.speechProviders = [
|
||||
{ pluginId: "openai", provider: throwingPrimary, source: "test" },
|
||||
{ pluginId: "microsoft", provider: fallback, source: "test" },
|
||||
];
|
||||
const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} });
|
||||
setActivePluginRegistry(registry, cacheKey);
|
||||
|
||||
const result = await tts.synthesizeSpeech({
|
||||
text: "hello fallback",
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {
|
||||
provider: "openai",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
if (!result.success) {
|
||||
throw new Error("expected fallback synthesis success");
|
||||
}
|
||||
expect(result.provider).toBe("microsoft");
|
||||
expect(result.fallbackFrom).toBe("openai");
|
||||
expect(result.attemptedProviders).toEqual(["openai", "microsoft"]);
|
||||
expect(result.attempts?.[0]).toMatchObject({
|
||||
provider: "openai",
|
||||
outcome: "failed",
|
||||
reasonCode: "provider_error",
|
||||
});
|
||||
expect(result.attempts?.[1]).toMatchObject({
|
||||
provider: "microsoft",
|
||||
outcome: "success",
|
||||
reasonCode: "success",
|
||||
});
|
||||
});
|
||||
|
||||
it("continues telephony fallback when primary readiness checks throw", async () => {
|
||||
const throwingPrimary: SpeechProviderPlugin = {
|
||||
id: "primary-throws",
|
||||
label: "PrimaryThrows",
|
||||
autoSelectOrder: 10,
|
||||
resolveConfig: () => ({}),
|
||||
isConfigured: () => {
|
||||
throw new Error("Authorization: Bearer sk-telephony-throw-token-1234567890\tboom");
|
||||
},
|
||||
synthesize: async () => {
|
||||
throw new Error("unexpected synthesize call");
|
||||
},
|
||||
};
|
||||
const fallback: SpeechProviderPlugin = {
|
||||
id: "microsoft",
|
||||
label: "Microsoft",
|
||||
autoSelectOrder: 20,
|
||||
resolveConfig: () => ({}),
|
||||
isConfigured: () => true,
|
||||
synthesize: async () => ({
|
||||
audioBuffer: createAudioBuffer(2),
|
||||
outputFormat: "mp3",
|
||||
fileExtension: ".mp3",
|
||||
voiceCompatible: true,
|
||||
}),
|
||||
synthesizeTelephony: async () => ({
|
||||
audioBuffer: createAudioBuffer(2),
|
||||
outputFormat: "mp3",
|
||||
sampleRate: 24000,
|
||||
}),
|
||||
};
|
||||
const registry = createEmptyPluginRegistry();
|
||||
registry.speechProviders = [
|
||||
{ pluginId: "primary-throws", provider: throwingPrimary, source: "test" },
|
||||
{ pluginId: "microsoft", provider: fallback, source: "test" },
|
||||
];
|
||||
const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} });
|
||||
setActivePluginRegistry(registry, cacheKey);
|
||||
|
||||
const result = await tts.textToSpeechTelephony({
|
||||
text: "hello telephony fallback",
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {
|
||||
provider: "primary-throws",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
if (!result.success) {
|
||||
throw new Error("expected telephony fallback success");
|
||||
}
|
||||
expect(result.provider).toBe("microsoft");
|
||||
expect(result.fallbackFrom).toBe("primary-throws");
|
||||
expect(result.attemptedProviders).toEqual(["primary-throws", "microsoft"]);
|
||||
expect(result.attempts?.[0]).toMatchObject({
|
||||
provider: "primary-throws",
|
||||
outcome: "failed",
|
||||
reasonCode: "provider_error",
|
||||
});
|
||||
expect(result.attempts?.[1]).toMatchObject({
|
||||
provider: "microsoft",
|
||||
outcome: "success",
|
||||
reasonCode: "success",
|
||||
});
|
||||
});
|
||||
|
||||
it("does not double-prefix textToSpeech failure messages", async () => {
|
||||
const failingProvider: SpeechProviderPlugin = {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
autoSelectOrder: 10,
|
||||
resolveConfig: () => ({}),
|
||||
isConfigured: () => true,
|
||||
synthesize: async () => {
|
||||
throw new Error("provider failed");
|
||||
},
|
||||
};
|
||||
const registry = createEmptyPluginRegistry();
|
||||
registry.speechProviders = [
|
||||
{ pluginId: "openai", provider: failingProvider, source: "test" },
|
||||
];
|
||||
const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} });
|
||||
setActivePluginRegistry(registry, cacheKey);
|
||||
|
||||
const result = await tts.textToSpeech({
|
||||
text: "hello",
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {
|
||||
provider: "openai",
|
||||
},
|
||||
},
|
||||
},
|
||||
disableFallback: true,
|
||||
});
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
if (result.success) {
|
||||
throw new Error("expected synthesis failure");
|
||||
}
|
||||
expect(result.error).toBeDefined();
|
||||
const errorMessage = result.error ?? "";
|
||||
expect(errorMessage).toBe("TTS conversion failed: openai: provider failed");
|
||||
expect(errorMessage).not.toContain("TTS conversion failed: TTS conversion failed:");
|
||||
expect(errorMessage.match(/TTS conversion failed:/g)).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveTtsConfig – openai.baseUrl", () => {
|
||||
const baseCfg: OpenClawConfig = {
|
||||
agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } },
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
export function trimToUndefined(value: unknown): string | undefined {
|
||||
return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
|
||||
}
|
||||
|
||||
export function asObject(value: unknown): Record<string, unknown> | undefined {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value)
|
||||
? (value as Record<string, unknown>)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
export function truncateErrorDetail(detail: string, limit = 220): string {
|
||||
return detail.length <= limit ? detail : `${detail.slice(0, limit - 1)}…`;
|
||||
}
|
||||
|
||||
export async function readResponseTextLimited(
|
||||
response: Response,
|
||||
limitBytes = 16 * 1024,
|
||||
): Promise<string> {
|
||||
if (limitBytes <= 0) {
|
||||
return "";
|
||||
}
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let total = 0;
|
||||
let text = "";
|
||||
let reachedLimit = false;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
if (!value || value.byteLength === 0) {
|
||||
continue;
|
||||
}
|
||||
const remaining = limitBytes - total;
|
||||
if (remaining <= 0) {
|
||||
reachedLimit = true;
|
||||
break;
|
||||
}
|
||||
const chunk = value.byteLength > remaining ? value.subarray(0, remaining) : value;
|
||||
total += chunk.byteLength;
|
||||
text += decoder.decode(chunk, { stream: true });
|
||||
if (total >= limitBytes) {
|
||||
reachedLimit = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
text += decoder.decode();
|
||||
} finally {
|
||||
if (reachedLimit) {
|
||||
await reader.cancel().catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
Loading…
Reference in New Issue