feat(tts): add structured provider diagnostics and fallback attempt analytics (#57954)

* feat(tts): add structured fallback diagnostics and attempt analytics * docs(tts): document attempt-detail and provider error diagnostics * TTS: harden fallback loops and share error helpers * TTS: bound provider error-body reads * tts: add double-prefix regression test and clean baseline drift * tests(tts): satisfy error narrowing in double-prefix regression * changelog Signed-off-by: joshavant <830519+joshavant@users.noreply.github.com> --------- Signed-off-by: joshavant <830519+joshavant@users.noreply.github.com>
2026-03-30 22:55:28 -05:00 · 2026-03-30 22:55:28 -05:00 · 44674525f2
parent 329d4bf1a8
commit 44674525f2
13 changed files with 825 additions and 32 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai
 - Matrix/history: add optional room history context for Matrix group triggers via `channels.matrix.historyLimit`, with per-agent watermarks and retry-safe snapshots so failed trigger retries do not drift into newer room messages. (#57022) thanks @chain710.
 - Diffs: skip unused viewer-versus-file SSR preload work so `diffs` view-only and file-only runs do less render work while keeping mode outputs aligned. (#57909) thanks @gumadeiras.
 - Matrix/threads: add per-DM `threadReplies` overrides and keep thread session isolation aligned with the effective room or DM thread policy from the triggering message onward. (#57995) thanks @teconomix.
+- TTS: Add structured provider diagnostics and fallback attempt analytics. (#57954) Thanks @joshavant.

 ### Fixes

@ -122,6 +123,7 @@ Docs: https://docs.openclaw.ai
 - Exec/env: block Python package index override variables from request-scoped host exec environment sanitization so package fetches cannot be redirected through a caller-supplied index. Thanks @nexrin and @vincentkoc.
 - Telegram/audio: transcode Telegram voice-note `.ogg` attachments before the local `whisper-cli` auto fallback runs, and keep mention-preflight transcription enabled in auto mode when `tools.media.audio` is unset.
 - Matrix/direct rooms: recover fresh auto-joined 1:1 DMs without eagerly persisting invite-only `m.direct` mappings, while keeping named, aliased, and explicitly configured rooms on the room path. (#58024) Thanks @gumadeiras.
+- TTS: Restore 3.28 schema compatibility and fallback observability. (#57953) Thanks @joshavant.

 ## 2026.3.28

--- a/docs/tools/tts.md
+++ b/docs/tools/tts.md
@ -395,6 +395,8 @@ Notes:
 - `/tts status` includes fallback visibility for the latest attempt:
  - success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
  - failure: `Error: ...` plus `Attempts: ...`
+  - detailed diagnostics: `Attempt details: provider:outcome(reasonCode) latency`
+- OpenAI and ElevenLabs API failures now include parsed provider error detail and request id (when returned by the provider), which is surfaced in TTS errors/logs.

 ## Agent tool

--- a/docs/tts.md
+++ b/docs/tts.md
@ -395,6 +395,8 @@ Notes:
 - `/tts status` includes fallback visibility for the latest attempt:
  - success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
  - failure: `Error: ...` plus `Attempts: ...`
+  - detailed diagnostics: `Attempt details: provider:outcome(reasonCode) latency`
+- OpenAI and ElevenLabs API failures now include parsed provider error detail and request id (when returned by the provider), which is surfaced in TTS errors/logs.

 ## Agent tool

--- a/extensions/elevenlabs/tts.test.ts
+++ b/extensions/elevenlabs/tts.test.ts
@ -0,0 +1,133 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { elevenLabsTTS } from "./tts.js";
+
+describe("elevenlabs tts diagnostics", () => {
+  const originalFetch = globalThis.fetch;
+
+  function createStreamingErrorResponse(params: {
+    status: number;
+    chunkCount: number;
+    chunkSize: number;
+    byte: number;
+  }): { response: Response; getReadCount: () => number } {
+    let reads = 0;
+    const stream = new ReadableStream<Uint8Array>({
+      pull(controller) {
+        if (reads >= params.chunkCount) {
+          controller.close();
+          return;
+        }
+        reads += 1;
+        controller.enqueue(new Uint8Array(params.chunkSize).fill(params.byte));
+      },
+    });
+    return {
+      response: new Response(stream, { status: params.status }),
+      getReadCount: () => reads,
+    };
+  }
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+    vi.restoreAllMocks();
+  });
+
+  it("includes parsed provider detail and request id for JSON API errors", async () => {
+    const fetchMock = vi.fn(
+      async () =>
+        new Response(
+          JSON.stringify({
+            detail: {
+              message: "Quota exceeded",
+              status: "quota_exceeded",
+            },
+          }),
+          {
+            status: 429,
+            headers: {
+              "Content-Type": "application/json",
+              "x-request-id": "el_req_456",
+            },
+          },
+        ),
+    );
+    globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+    await expect(
+      elevenLabsTTS({
+        text: "hello",
+        apiKey: "test-key",
+        baseUrl: "https://api.elevenlabs.io",
+        voiceId: "pMsXgVXv3BLzUgSXRplE",
+        modelId: "eleven_multilingual_v2",
+        outputFormat: "mp3_44100_128",
+        voiceSettings: {
+          stability: 0.5,
+          similarityBoost: 0.75,
+          style: 0,
+          useSpeakerBoost: true,
+          speed: 1.0,
+        },
+        timeoutMs: 5_000,
+      }),
+    ).rejects.toThrow(
+      "ElevenLabs API error (429): Quota exceeded [code=quota_exceeded] [request_id=el_req_456]",
+    );
+  });
+
+  it("falls back to raw body text when the error body is non-JSON", async () => {
+    const fetchMock = vi.fn(async () => new Response("service unavailable", { status: 503 }));
+    globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+    await expect(
+      elevenLabsTTS({
+        text: "hello",
+        apiKey: "test-key",
+        baseUrl: "https://api.elevenlabs.io",
+        voiceId: "pMsXgVXv3BLzUgSXRplE",
+        modelId: "eleven_multilingual_v2",
+        outputFormat: "mp3_44100_128",
+        voiceSettings: {
+          stability: 0.5,
+          similarityBoost: 0.75,
+          style: 0,
+          useSpeakerBoost: true,
+          speed: 1.0,
+        },
+        timeoutMs: 5_000,
+      }),
+    ).rejects.toThrow("ElevenLabs API error (503): service unavailable");
+  });
+
+  it("caps streamed non-JSON error reads instead of consuming full response bodies", async () => {
+    const streamed = createStreamingErrorResponse({
+      status: 503,
+      chunkCount: 200,
+      chunkSize: 1024,
+      byte: 121,
+    });
+    const fetchMock = vi.fn(async () => streamed.response);
+    globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+    await expect(
+      elevenLabsTTS({
+        text: "hello",
+        apiKey: "test-key",
+        baseUrl: "https://api.elevenlabs.io",
+        voiceId: "pMsXgVXv3BLzUgSXRplE",
+        modelId: "eleven_multilingual_v2",
+        outputFormat: "mp3_44100_128",
+        voiceSettings: {
+          stability: 0.5,
+          similarityBoost: 0.75,
+          style: 0,
+          useSpeakerBoost: true,
+          speed: 1.0,
+        },
+        timeoutMs: 5_000,
+      }),
+    ).rejects.toThrow("ElevenLabs API error (503)");
+
+    expect(streamed.getReadCount()).toBeLessThan(200);
+  });
+});
--- a/extensions/elevenlabs/tts.ts
+++ b/extensions/elevenlabs/tts.ts
@ -1,8 +1,12 @@
 import {
+  asObject,
  normalizeApplyTextNormalization,
  normalizeLanguageCode,
  normalizeSeed,
+  readResponseTextLimited,
  requireInRange,
+  trimToUndefined,
+  truncateErrorDetail,
 } from "openclaw/plugin-sdk/speech";

 const DEFAULT_ELEVENLABS_BASE_URL = "https://api.elevenlabs.io";
@ -19,6 +23,45 @@ function normalizeElevenLabsBaseUrl(baseUrl?: string): string {
  return trimmed.replace(/\/+$/, "");
 }

+function formatElevenLabsErrorPayload(payload: unknown): string | undefined {
+  const root = asObject(payload);
+  if (!root) {
+    return undefined;
+  }
+  const detailObject = asObject(root.detail);
+  const message =
+    trimToUndefined(root.message) ??
+    trimToUndefined(detailObject?.message) ??
+    trimToUndefined(detailObject?.detail) ??
+    trimToUndefined(root.error);
+  const code =
+    trimToUndefined(root.code) ??
+    trimToUndefined(detailObject?.code) ??
+    trimToUndefined(detailObject?.status);
+  if (message && code) {
+    return `${truncateErrorDetail(message)} [code=${code}]`;
+  }
+  if (message) {
+    return truncateErrorDetail(message);
+  }
+  if (code) {
+    return `[code=${code}]`;
+  }
+  return undefined;
+}
+
+async function extractElevenLabsErrorDetail(response: Response): Promise<string | undefined> {
+  const rawBody = trimToUndefined(await readResponseTextLimited(response));
+  if (!rawBody) {
+    return undefined;
+  }
+  try {
+    return formatElevenLabsErrorPayload(JSON.parse(rawBody)) ?? truncateErrorDetail(rawBody);
+  } catch {
+    return truncateErrorDetail(rawBody);
+  }
+}
+
 function assertElevenLabsVoiceSettings(settings: {
  stability: number;
  similarityBoost: number;
@ -106,7 +149,15 @@ export async function elevenLabsTTS(params: {
    });

    if (!response.ok) {
-      throw new Error(`ElevenLabs API error (${response.status})`);
+      const detail = await extractElevenLabsErrorDetail(response);
+      const requestId =
+        trimToUndefined(response.headers.get("x-request-id")) ??
+        trimToUndefined(response.headers.get("request-id"));
+      throw new Error(
+        `ElevenLabs API error (${response.status})` +
+          (detail ? `: ${detail}` : "") +
+          (requestId ? ` [request_id=${requestId}]` : ""),
+      );
    }

    return Buffer.from(await response.arrayBuffer());
--- a/extensions/openai/tts.test.ts
+++ b/extensions/openai/tts.test.ts
@ -1,13 +1,21 @@
-import { describe, expect, it } from "vitest";
+import { afterEach, describe, expect, it, vi } from "vitest";
 import {
  isValidOpenAIModel,
  isValidOpenAIVoice,
  OPENAI_TTS_MODELS,
  OPENAI_TTS_VOICES,
+  openaiTTS,
  resolveOpenAITtsInstructions,
 } from "./tts.js";

 describe("openai tts", () => {
+  const originalFetch = globalThis.fetch;
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+    vi.restoreAllMocks();
+  });
+
  describe("isValidOpenAIVoice", () => {
    it("accepts all valid OpenAI voices including newer additions", () => {
      for (const voice of OPENAI_TTS_VOICES) {
@ -70,4 +78,110 @@ describe("openai tts", () => {
      expect(resolveOpenAITtsInstructions("gpt-4o-mini-tts", "   ")).toBeUndefined();
    });
  });
+
+  describe("openaiTTS diagnostics", () => {
+    function createStreamingErrorResponse(params: {
+      status: number;
+      chunkCount: number;
+      chunkSize: number;
+      byte: number;
+    }): { response: Response; getReadCount: () => number } {
+      let reads = 0;
+      const stream = new ReadableStream<Uint8Array>({
+        pull(controller) {
+          if (reads >= params.chunkCount) {
+            controller.close();
+            return;
+          }
+          reads += 1;
+          controller.enqueue(new Uint8Array(params.chunkSize).fill(params.byte));
+        },
+      });
+      return {
+        response: new Response(stream, { status: params.status }),
+        getReadCount: () => reads,
+      };
+    }
+
+    it("includes parsed provider detail and request id for JSON API errors", async () => {
+      const fetchMock = vi.fn(
+        async () =>
+          new Response(
+            JSON.stringify({
+              error: {
+                message: "Invalid API key",
+                type: "invalid_request_error",
+                code: "invalid_api_key",
+              },
+            }),
+            {
+              status: 401,
+              headers: {
+                "Content-Type": "application/json",
+                "x-request-id": "req_123",
+              },
+            },
+          ),
+      );
+      globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+      await expect(
+        openaiTTS({
+          text: "hello",
+          apiKey: "bad-key",
+          baseUrl: "https://api.openai.com/v1",
+          model: "gpt-4o-mini-tts",
+          voice: "alloy",
+          responseFormat: "mp3",
+          timeoutMs: 5_000,
+        }),
+      ).rejects.toThrow(
+        "OpenAI TTS API error (401): Invalid API key [type=invalid_request_error, code=invalid_api_key] [request_id=req_123]",
+      );
+    });
+
+    it("falls back to raw body text when the error body is non-JSON", async () => {
+      const fetchMock = vi.fn(
+        async () => new Response("temporary upstream outage", { status: 503 }),
+      );
+      globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+      await expect(
+        openaiTTS({
+          text: "hello",
+          apiKey: "test-key",
+          baseUrl: "https://api.openai.com/v1",
+          model: "gpt-4o-mini-tts",
+          voice: "alloy",
+          responseFormat: "mp3",
+          timeoutMs: 5_000,
+        }),
+      ).rejects.toThrow("OpenAI TTS API error (503): temporary upstream outage");
+    });
+
+    it("caps streamed non-JSON error reads instead of consuming full response bodies", async () => {
+      const streamed = createStreamingErrorResponse({
+        status: 503,
+        chunkCount: 200,
+        chunkSize: 1024,
+        byte: 120,
+      });
+      const fetchMock = vi.fn(async () => streamed.response);
+      globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+      await expect(
+        openaiTTS({
+          text: "hello",
+          apiKey: "test-key",
+          baseUrl: "https://api.openai.com/v1",
+          model: "gpt-4o-mini-tts",
+          voice: "alloy",
+          responseFormat: "mp3",
+          timeoutMs: 5_000,
+        }),
+      ).rejects.toThrow("OpenAI TTS API error (503)");
+
+      expect(streamed.getReadCount()).toBeLessThan(200);
+    });
+  });
 });
--- a/extensions/openai/tts.ts
+++ b/extensions/openai/tts.ts
@ -1,3 +1,10 @@
+import {
+  asObject,
+  readResponseTextLimited,
+  trimToUndefined,
+  truncateErrorDetail,
+} from "openclaw/plugin-sdk/speech";
+
 export const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";

 export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"] as const;
@ -58,6 +65,45 @@ export function resolveOpenAITtsInstructions(
  return next && model.includes("gpt-4o-mini-tts") ? next : undefined;
 }

+function formatOpenAiErrorPayload(payload: unknown): string | undefined {
+  const root = asObject(payload);
+  const subject = asObject(root?.error) ?? root;
+  if (!subject) {
+    return undefined;
+  }
+  const message =
+    trimToUndefined(subject.message) ??
+    trimToUndefined(subject.detail) ??
+    trimToUndefined(root?.message);
+  const type = trimToUndefined(subject.type);
+  const code = trimToUndefined(subject.code);
+  const metadata = [type ? `type=${type}` : undefined, code ? `code=${code}` : undefined]
+    .filter((value): value is string => Boolean(value))
+    .join(", ");
+  if (message && metadata) {
+    return `${truncateErrorDetail(message)} [${metadata}]`;
+  }
+  if (message) {
+    return truncateErrorDetail(message);
+  }
+  if (metadata) {
+    return `[${metadata}]`;
+  }
+  return undefined;
+}
+
+async function extractOpenAiErrorDetail(response: Response): Promise<string | undefined> {
+  const rawBody = trimToUndefined(await readResponseTextLimited(response));
+  if (!rawBody) {
+    return undefined;
+  }
+  try {
+    return formatOpenAiErrorPayload(JSON.parse(rawBody)) ?? truncateErrorDetail(rawBody);
+  } catch {
+    return truncateErrorDetail(rawBody);
+  }
+}
+
 export async function openaiTTS(params: {
  text: string;
  apiKey: string;
@ -102,7 +148,15 @@ export async function openaiTTS(params: {
    });

    if (!response.ok) {
-      throw new Error(`OpenAI TTS API error (${response.status})`);
+      const detail = await extractOpenAiErrorDetail(response);
+      const requestId =
+        trimToUndefined(response.headers.get("x-request-id")) ??
+        trimToUndefined(response.headers.get("request-id"));
+      throw new Error(
+        `OpenAI TTS API error (${response.status})` +
+          (detail ? `: ${detail}` : "") +
+          (requestId ? ` [request_id=${requestId}]` : ""),
+      );
    }

    return Buffer.from(await response.arrayBuffer());
--- a/extensions/speech-core/src/tts.ts
+++ b/extensions/speech-core/src/tts.ts
@ -74,6 +74,22 @@ type TtsUserPrefs = {

 export type ResolvedTtsModelOverrides = SpeechModelOverridePolicy;

+export type TtsAttemptReasonCode =
+  | "success"
+  | "no_provider_registered"
+  | "not_configured"
+  | "unsupported_for_telephony"
+  | "timeout"
+  | "provider_error";
+
+export type TtsProviderAttempt = {
+  provider: string;
+  outcome: "success" | "skipped" | "failed";
+  reasonCode: TtsAttemptReasonCode;
+  latencyMs?: number;
+  error?: string;
+};
+
 export type TtsResult = {
  success: boolean;
  audioPath?: string;
@ -82,6 +98,7 @@ export type TtsResult = {
  provider?: string;
  fallbackFrom?: string;
  attemptedProviders?: string[];
+  attempts?: TtsProviderAttempt[];
  outputFormat?: string;
  voiceCompatible?: boolean;
 };
@ -94,6 +111,7 @@ export type TtsSynthesisResult = {
  provider?: string;
  fallbackFrom?: string;
  attemptedProviders?: string[];
+  attempts?: TtsProviderAttempt[];
  outputFormat?: string;
  voiceCompatible?: boolean;
  fileExtension?: string;
@ -107,6 +125,7 @@ export type TtsTelephonyResult = {
  provider?: string;
  fallbackFrom?: string;
  attemptedProviders?: string[];
+  attempts?: TtsProviderAttempt[];
  outputFormat?: string;
  sampleRate?: number;
 };
@ -119,6 +138,7 @@ type TtsStatusEntry = {
  provider?: string;
  fallbackFrom?: string;
  attemptedProviders?: string[];
+  attempts?: TtsProviderAttempt[];
  latencyMs?: number;
  error?: string;
 };
@ -556,25 +576,46 @@ function sanitizeTtsErrorForLog(err: unknown): string {
 function buildTtsFailureResult(
  errors: string[],
  attemptedProviders?: string[],
-): { success: false; error: string; attemptedProviders?: string[] } {
+  attempts?: TtsProviderAttempt[],
+): {
+  success: false;
+  error: string;
+  attemptedProviders?: string[];
+  attempts?: TtsProviderAttempt[];
+} {
  return {
    success: false,
    error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`,
    attemptedProviders,
+    attempts,
  };
 }

+type TtsProviderReadyResolution =
+  | {
+      kind: "ready";
+      provider: NonNullable<ReturnType<typeof getSpeechProvider>>;
+      providerConfig: SpeechProviderConfig;
+    }
+  | {
+      kind: "skip";
+      reasonCode: "no_provider_registered" | "not_configured" | "unsupported_for_telephony";
+      message: string;
+    };
+
 function resolveReadySpeechProvider(params: {
  provider: TtsProvider;
  cfg: OpenClawConfig;
  config: ResolvedTtsConfig;
-  errors: string[];
  requireTelephony?: boolean;
-}): NonNullable<ReturnType<typeof getSpeechProvider>> | null {
+}): TtsProviderReadyResolution {
  const resolvedProvider = getSpeechProvider(params.provider, params.cfg);
  if (!resolvedProvider) {
-    params.errors.push(`${params.provider}: no provider registered`);
-    return null;
+    return {
+      kind: "skip",
+      reasonCode: "no_provider_registered",
+      message: `${params.provider}: no provider registered`,
+    };
  }
  const providerConfig = getResolvedSpeechProviderConfig(
    params.config,
@ -588,14 +629,24 @@ function resolveReadySpeechProvider(params: {
      timeoutMs: params.config.timeoutMs,
    })
  ) {
-    params.errors.push(`${params.provider}: not configured`);
-    return null;
+    return {
+      kind: "skip",
+      reasonCode: "not_configured",
+      message: `${params.provider}: not configured`,
+    };
  }
  if (params.requireTelephony && !resolvedProvider.synthesizeTelephony) {
-    params.errors.push(`${params.provider}: unsupported for telephony`);
-    return null;
+    return {
+      kind: "skip",
+      reasonCode: "unsupported_for_telephony",
+      message: `${params.provider}: unsupported for telephony`,
+    };
  }
-  return resolvedProvider;
+  return {
+    kind: "ready",
+    provider: resolvedProvider,
+    providerConfig,
+  };
 }

 function resolveTtsRequestSetup(params: {
@ -639,10 +690,12 @@ export async function textToSpeech(params: {
 }): Promise<TtsResult> {
  const synthesis = await synthesizeSpeech(params);
  if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
-    return buildTtsFailureResult(
-      [synthesis.error ?? "TTS conversion failed"],
-      synthesis.attemptedProviders,
-    );
+    return {
+      success: false,
+      error: synthesis.error ?? "TTS conversion failed",
+      attemptedProviders: synthesis.attemptedProviders,
+      attempts: synthesis.attempts,
+    };
  }

  const tempRoot = resolvePreferredOpenClawTmpDir();
@ -659,6 +712,7 @@ export async function textToSpeech(params: {
    provider: synthesis.provider,
    fallbackFrom: synthesis.fallbackFrom,
    attemptedProviders: synthesis.attemptedProviders,
+    attempts: synthesis.attempts,
    outputFormat: synthesis.outputFormat,
    voiceCompatible: synthesis.voiceCompatible,
  };
@ -689,6 +743,7 @@ export async function synthesizeSpeech(params: {

  const errors: string[] = [];
  const attemptedProviders: string[] = [];
+  const attempts: TtsProviderAttempt[] = [];
  const primaryProvider = providers[0];
  logVerbose(
    `TTS: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
@ -702,34 +757,57 @@ export async function synthesizeSpeech(params: {
        provider,
        cfg: params.cfg,
        config,
-        errors,
      });
-      if (!resolvedProvider) {
-        logVerbose(`TTS: provider ${provider} skipped (${errors[errors.length - 1]})`);
+      if (resolvedProvider.kind === "skip") {
+        errors.push(resolvedProvider.message);
+        attempts.push({
+          provider,
+          outcome: "skipped",
+          reasonCode: resolvedProvider.reasonCode,
+          error: resolvedProvider.message,
+        });
+        logVerbose(`TTS: provider ${provider} skipped (${resolvedProvider.message})`);
        continue;
      }
-      const synthesis = await resolvedProvider.synthesize({
+      const synthesis = await resolvedProvider.provider.synthesize({
        text: params.text,
        cfg: params.cfg,
-        providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg),
+        providerConfig: resolvedProvider.providerConfig,
        target,
-        providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.id],
+        providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.provider.id],
        timeoutMs: config.timeoutMs,
      });
+      const latencyMs = Date.now() - providerStart;
+      attempts.push({
+        provider,
+        outcome: "success",
+        reasonCode: "success",
+        latencyMs,
+      });
      return {
        success: true,
        audioBuffer: synthesis.audioBuffer,
-        latencyMs: Date.now() - providerStart,
+        latencyMs,
        provider,
        fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
        attemptedProviders,
+        attempts,
        outputFormat: synthesis.outputFormat,
        voiceCompatible: synthesis.voiceCompatible,
        fileExtension: synthesis.fileExtension,
      };
    } catch (err) {
      const errorMsg = formatTtsProviderError(provider, err);
+      const latencyMs = Date.now() - providerStart;
      errors.push(errorMsg);
+      attempts.push({
+        provider,
+        outcome: "failed",
+        reasonCode:
+          err instanceof Error && err.name === "AbortError" ? "timeout" : "provider_error",
+        latencyMs,
+        error: errorMsg,
+      });
      const rawError = sanitizeTtsErrorForLog(err);
      if (provider === primaryProvider) {
        const hasFallbacks = providers.length > 1;
@ -742,7 +820,7 @@ export async function synthesizeSpeech(params: {
    }
  }

-  return buildTtsFailureResult(errors, attemptedProviders);
+  return buildTtsFailureResult(errors, attemptedProviders, attempts);
 }

 export async function textToSpeechTelephony(params: {
@ -762,7 +840,11 @@ export async function textToSpeechTelephony(params: {
  const { config, providers } = setup;
  const errors: string[] = [];
  const attemptedProviders: string[] = [];
+  const attempts: TtsProviderAttempt[] = [];
  const primaryProvider = providers[0];
+  logVerbose(
+    `TTS telephony: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
+  );

  for (const provider of providers) {
    attemptedProviders.push(provider);
@ -772,35 +854,72 @@ export async function textToSpeechTelephony(params: {
        provider,
        cfg: params.cfg,
        config,
-        errors,
        requireTelephony: true,
      });
-      if (!resolvedProvider?.synthesizeTelephony) {
+      if (resolvedProvider.kind === "skip") {
+        errors.push(resolvedProvider.message);
+        attempts.push({
+          provider,
+          outcome: "skipped",
+          reasonCode: resolvedProvider.reasonCode,
+          error: resolvedProvider.message,
+        });
+        logVerbose(`TTS telephony: provider ${provider} skipped (${resolvedProvider.message})`);
        continue;
      }
-      const synthesis = await resolvedProvider.synthesizeTelephony({
+      const synthesizeTelephony = resolvedProvider.provider.synthesizeTelephony as NonNullable<
+        typeof resolvedProvider.provider.synthesizeTelephony
+      >;
+      const synthesis = await synthesizeTelephony({
        text: params.text,
        cfg: params.cfg,
-        providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg),
+        providerConfig: resolvedProvider.providerConfig,
        timeoutMs: config.timeoutMs,
      });
+      const latencyMs = Date.now() - providerStart;
+      attempts.push({
+        provider,
+        outcome: "success",
+        reasonCode: "success",
+        latencyMs,
+      });

      return {
        success: true,
        audioBuffer: synthesis.audioBuffer,
-        latencyMs: Date.now() - providerStart,
+        latencyMs,
        provider,
        fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
        attemptedProviders,
+        attempts,
        outputFormat: synthesis.outputFormat,
        sampleRate: synthesis.sampleRate,
      };
    } catch (err) {
-      errors.push(formatTtsProviderError(provider, err));
+      const errorMsg = formatTtsProviderError(provider, err);
+      const latencyMs = Date.now() - providerStart;
+      errors.push(errorMsg);
+      attempts.push({
+        provider,
+        outcome: "failed",
+        reasonCode:
+          err instanceof Error && err.name === "AbortError" ? "timeout" : "provider_error",
+        latencyMs,
+        error: errorMsg,
+      });
+      const rawError = sanitizeTtsErrorForLog(err);
+      if (provider === primaryProvider) {
+        const hasFallbacks = providers.length > 1;
+        logVerbose(
+          `TTS telephony: primary provider ${provider} failed (${rawError})${hasFallbacks ? "; trying fallback providers." : "; no fallback providers configured."}`,
+        );
+      } else {
+        logVerbose(`TTS telephony: ${provider} failed (${rawError}); trying next provider.`);
+      }
    }
  }

-  return buildTtsFailureResult(errors, attemptedProviders);
+  return buildTtsFailureResult(errors, attemptedProviders, attempts);
 }

 export async function listSpeechVoices(params: {
@ -969,6 +1088,7 @@ export async function maybeApplyTtsToPayload(params: {
      provider: result.provider,
      fallbackFrom: result.fallbackFrom,
      attemptedProviders: result.attemptedProviders,
+      attempts: result.attempts,
      latencyMs: result.latencyMs,
    };

@ -988,6 +1108,7 @@ export async function maybeApplyTtsToPayload(params: {
    textLength: text.length,
    summarized: wasSummarized,
    attemptedProviders: result.attemptedProviders,
+    attempts: result.attempts,
    error: result.error,
  };

--- a/src/auto-reply/reply/commands-tts.test.ts
+++ b/src/auto-reply/reply/commands-tts.test.ts
@ -65,6 +65,20 @@ describe("handleTtsCommands status fallback reporting", () => {
      provider: "microsoft",
      fallbackFrom: "elevenlabs",
      attemptedProviders: ["elevenlabs", "microsoft"],
+      attempts: [
+        {
+          provider: "elevenlabs",
+          outcome: "failed",
+          reasonCode: "provider_error",
+          latencyMs: 73,
+        },
+        {
+          provider: "microsoft",
+          outcome: "success",
+          reasonCode: "success",
+          latencyMs: 420,
+        },
+      ],
      latencyMs: 420,
    });

@ -72,6 +86,9 @@ describe("handleTtsCommands status fallback reporting", () => {
    expect(result?.shouldContinue).toBe(false);
    expect(result?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
    expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
+    expect(result?.reply?.text).toContain(
+      "Attempt details: elevenlabs:failed(provider_error) 73ms, microsoft:success(ok) 420ms",
+    );
  });

  it("shows attempted provider chain for failed attempts", async () => {
@ -82,6 +99,14 @@ describe("handleTtsCommands status fallback reporting", () => {
      summarized: false,
      error: "TTS conversion failed",
      attemptedProviders: ["elevenlabs", "microsoft"],
+      attempts: [
+        {
+          provider: "elevenlabs",
+          outcome: "failed",
+          reasonCode: "timeout",
+          latencyMs: 999,
+        },
+      ],
      latencyMs: 420,
    });

@ -89,6 +114,7 @@ describe("handleTtsCommands status fallback reporting", () => {
    expect(result?.shouldContinue).toBe(false);
    expect(result?.reply?.text).toContain("Error: TTS conversion failed");
    expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
+    expect(result?.reply?.text).toContain("Attempt details: elevenlabs:failed(timeout) 999ms");
  });

  it("persists fallback metadata from /tts audio and renders it in /tts status", async () => {
@ -103,6 +129,20 @@ describe("handleTtsCommands status fallback reporting", () => {
      provider: "microsoft",
      fallbackFrom: "elevenlabs",
      attemptedProviders: ["elevenlabs", "microsoft"],
+      attempts: [
+        {
+          provider: "elevenlabs",
+          outcome: "failed",
+          reasonCode: "provider_error",
+          latencyMs: 65,
+        },
+        {
+          provider: "microsoft",
+          outcome: "success",
+          reasonCode: "success",
+          latencyMs: 175,
+        },
+      ],
      latencyMs: 175,
      voiceCompatible: true,
    });
@ -116,5 +156,8 @@ describe("handleTtsCommands status fallback reporting", () => {
    expect(statusResult?.reply?.text).toContain("Provider: microsoft");
    expect(statusResult?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
    expect(statusResult?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
+    expect(statusResult?.reply?.text).toContain(
+      "Attempt details: elevenlabs:failed(provider_error) 65ms, microsoft:success(ok) 175ms",
+    );
  });
 });
--- a/src/auto-reply/reply/commands-tts.ts
+++ b/src/auto-reply/reply/commands-tts.ts
@ -29,6 +29,10 @@ type ParsedTtsCommand = {
  args: string;
 };

+type TtsAttemptDetail = NonNullable<
+  NonNullable<ReturnType<typeof getLastTtsAttempt>>["attempts"]
+>[number];
+
 function parseTtsCommand(normalized: string): ParsedTtsCommand | null {
  // Accept `/tts` and `/tts <action> [args]` as a single control surface.
  if (normalized === "/tts") {
@ -45,6 +49,19 @@ function parseTtsCommand(normalized: string): ParsedTtsCommand | null {
  return { action: action.toLowerCase(), args: tail.join(" ").trim() };
 }

+function formatAttemptDetails(attempts: TtsAttemptDetail[] | undefined): string | undefined {
+  if (!attempts || attempts.length === 0) {
+    return undefined;
+  }
+  return attempts
+    .map((attempt) => {
+      const reason = attempt.reasonCode === "success" ? "ok" : attempt.reasonCode;
+      const latency = Number.isFinite(attempt.latencyMs) ? ` ${attempt.latencyMs}ms` : "";
+      return `${attempt.provider}:${attempt.outcome}(${reason})${latency}`;
+    })
+    .join(", ");
+}
+
 function ttsUsage(): ReplyPayload {
  // Keep usage in one place so help/validation stays consistent.
  return {
@ -137,6 +154,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
        provider: result.provider,
        fallbackFrom: result.fallbackFrom,
        attemptedProviders: result.attemptedProviders,
+        attempts: result.attempts,
        latencyMs: result.latencyMs,
      });
      const payload: ReplyPayload = {
@ -153,6 +171,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
      textLength: args.length,
      summarized: false,
      attemptedProviders: result.attemptedProviders,
+      attempts: result.attempts,
      error: result.error,
      latencyMs: Date.now() - start,
    });
@ -294,12 +313,20 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
        if (last.attemptedProviders && last.attemptedProviders.length > 1) {
          lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
        }
+        const details = formatAttemptDetails(last.attempts);
+        if (details) {
+          lines.push(`Attempt details: ${details}`);
+        }
        lines.push(`Latency: ${last.latencyMs ?? 0}ms`);
      } else if (last.error) {
        lines.push(`Error: ${last.error}`);
        if (last.attemptedProviders && last.attemptedProviders.length > 0) {
          lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
        }
+        const details = formatAttemptDetails(last.attempts);
+        if (details) {
+          lines.push(`Attempt details: ${details}`);
+        }
      }
    }
    return { shouldContinue: false, reply: { text: lines.join("\n") } };
--- a/src/plugin-sdk/speech.ts
+++ b/src/plugin-sdk/speech.ts
@ -38,3 +38,9 @@ export {
  normalizeSpeechProviderId,
 } from "../tts/provider-registry.js";
 export { normalizeTtsAutoMode, TTS_AUTO_MODES } from "../tts/tts-auto-mode.js";
+export {
+  asObject,
+  readResponseTextLimited,
+  trimToUndefined,
+  truncateErrorDetail,
+} from "../tts/provider-error-utils.js";
--- a/src/plugins/contracts/tts.contract.test.ts
+++ b/src/plugins/contracts/tts.contract.test.ts
@ -680,6 +680,182 @@ describe("tts", () => {
    });
  });

+  describe("fallback readiness errors", () => {
+    it("continues synthesize fallback when primary readiness checks throw", async () => {
+      const throwingPrimary: SpeechProviderPlugin = {
+        id: "openai",
+        label: "OpenAI",
+        autoSelectOrder: 10,
+        resolveConfig: () => ({}),
+        isConfigured: () => {
+          throw new Error("Authorization: Bearer sk-readiness-throw-token-1234567890\nboom");
+        },
+        synthesize: async () => {
+          throw new Error("unexpected synthesize call");
+        },
+      };
+      const fallback: SpeechProviderPlugin = {
+        id: "microsoft",
+        label: "Microsoft",
+        autoSelectOrder: 20,
+        resolveConfig: () => ({}),
+        isConfigured: () => true,
+        synthesize: async () => ({
+          audioBuffer: createAudioBuffer(2),
+          outputFormat: "mp3",
+          fileExtension: ".mp3",
+          voiceCompatible: true,
+        }),
+      };
+      const registry = createEmptyPluginRegistry();
+      registry.speechProviders = [
+        { pluginId: "openai", provider: throwingPrimary, source: "test" },
+        { pluginId: "microsoft", provider: fallback, source: "test" },
+      ];
+      const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} });
+      setActivePluginRegistry(registry, cacheKey);
+
+      const result = await tts.synthesizeSpeech({
+        text: "hello fallback",
+        cfg: {
+          messages: {
+            tts: {
+              provider: "openai",
+            },
+          },
+        },
+      });
+
+      expect(result.success).toBe(true);
+      if (!result.success) {
+        throw new Error("expected fallback synthesis success");
+      }
+      expect(result.provider).toBe("microsoft");
+      expect(result.fallbackFrom).toBe("openai");
+      expect(result.attemptedProviders).toEqual(["openai", "microsoft"]);
+      expect(result.attempts?.[0]).toMatchObject({
+        provider: "openai",
+        outcome: "failed",
+        reasonCode: "provider_error",
+      });
+      expect(result.attempts?.[1]).toMatchObject({
+        provider: "microsoft",
+        outcome: "success",
+        reasonCode: "success",
+      });
+    });
+
+    it("continues telephony fallback when primary readiness checks throw", async () => {
+      const throwingPrimary: SpeechProviderPlugin = {
+        id: "primary-throws",
+        label: "PrimaryThrows",
+        autoSelectOrder: 10,
+        resolveConfig: () => ({}),
+        isConfigured: () => {
+          throw new Error("Authorization: Bearer sk-telephony-throw-token-1234567890\tboom");
+        },
+        synthesize: async () => {
+          throw new Error("unexpected synthesize call");
+        },
+      };
+      const fallback: SpeechProviderPlugin = {
+        id: "microsoft",
+        label: "Microsoft",
+        autoSelectOrder: 20,
+        resolveConfig: () => ({}),
+        isConfigured: () => true,
+        synthesize: async () => ({
+          audioBuffer: createAudioBuffer(2),
+          outputFormat: "mp3",
+          fileExtension: ".mp3",
+          voiceCompatible: true,
+        }),
+        synthesizeTelephony: async () => ({
+          audioBuffer: createAudioBuffer(2),
+          outputFormat: "mp3",
+          sampleRate: 24000,
+        }),
+      };
+      const registry = createEmptyPluginRegistry();
+      registry.speechProviders = [
+        { pluginId: "primary-throws", provider: throwingPrimary, source: "test" },
+        { pluginId: "microsoft", provider: fallback, source: "test" },
+      ];
+      const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} });
+      setActivePluginRegistry(registry, cacheKey);
+
+      const result = await tts.textToSpeechTelephony({
+        text: "hello telephony fallback",
+        cfg: {
+          messages: {
+            tts: {
+              provider: "primary-throws",
+            },
+          },
+        },
+      });
+
+      expect(result.success).toBe(true);
+      if (!result.success) {
+        throw new Error("expected telephony fallback success");
+      }
+      expect(result.provider).toBe("microsoft");
+      expect(result.fallbackFrom).toBe("primary-throws");
+      expect(result.attemptedProviders).toEqual(["primary-throws", "microsoft"]);
+      expect(result.attempts?.[0]).toMatchObject({
+        provider: "primary-throws",
+        outcome: "failed",
+        reasonCode: "provider_error",
+      });
+      expect(result.attempts?.[1]).toMatchObject({
+        provider: "microsoft",
+        outcome: "success",
+        reasonCode: "success",
+      });
+    });
+
+    it("does not double-prefix textToSpeech failure messages", async () => {
+      const failingProvider: SpeechProviderPlugin = {
+        id: "openai",
+        label: "OpenAI",
+        autoSelectOrder: 10,
+        resolveConfig: () => ({}),
+        isConfigured: () => true,
+        synthesize: async () => {
+          throw new Error("provider failed");
+        },
+      };
+      const registry = createEmptyPluginRegistry();
+      registry.speechProviders = [
+        { pluginId: "openai", provider: failingProvider, source: "test" },
+      ];
+      const { cacheKey } = pluginLoaderTesting.resolvePluginLoadCacheContext({ config: {} });
+      setActivePluginRegistry(registry, cacheKey);
+
+      const result = await tts.textToSpeech({
+        text: "hello",
+        cfg: {
+          messages: {
+            tts: {
+              provider: "openai",
+            },
+          },
+        },
+        disableFallback: true,
+      });
+
+      expect(result.success).toBe(false);
+      if (result.success) {
+        throw new Error("expected synthesis failure");
+      }
+      expect(result.error).toBeDefined();
+      const errorMessage = result.error ?? "";
+      expect(errorMessage).toBe("TTS conversion failed: openai: provider failed");
+      expect(errorMessage).not.toContain("TTS conversion failed: TTS conversion failed:");
+      expect(errorMessage.match(/TTS conversion failed:/g)).toHaveLength(1);
+    });
+  });
+
  describe("resolveTtsConfig – openai.baseUrl", () => {
    const baseCfg: OpenClawConfig = {
      agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } },
--- a/src/tts/provider-error-utils.ts
+++ b/src/tts/provider-error-utils.ts
@ -0,0 +1,62 @@
+export function trimToUndefined(value: unknown): string | undefined {
+  return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
+}
+
+export function asObject(value: unknown): Record<string, unknown> | undefined {
+  return typeof value === "object" && value !== null && !Array.isArray(value)
+    ? (value as Record<string, unknown>)
+    : undefined;
+}
+
+export function truncateErrorDetail(detail: string, limit = 220): string {
+  return detail.length <= limit ? detail : `${detail.slice(0, limit - 1)}…`;
+}
+
+export async function readResponseTextLimited(
+  response: Response,
+  limitBytes = 16 * 1024,
+): Promise<string> {
+  if (limitBytes <= 0) {
+    return "";
+  }
+  const reader = response.body?.getReader();
+  if (!reader) {
+    return "";
+  }
+
+  const decoder = new TextDecoder();
+  let total = 0;
+  let text = "";
+  let reachedLimit = false;
+
+  try {
+    while (true) {
+      const { value, done } = await reader.read();
+      if (done) {
+        break;
+      }
+      if (!value || value.byteLength === 0) {
+        continue;
+      }
+      const remaining = limitBytes - total;
+      if (remaining <= 0) {
+        reachedLimit = true;
+        break;
+      }
+      const chunk = value.byteLength > remaining ? value.subarray(0, remaining) : value;
+      total += chunk.byteLength;
+      text += decoder.decode(chunk, { stream: true });
+      if (total >= limitBytes) {
+        reachedLimit = true;
+        break;
+      }
+    }
+    text += decoder.decode();
+  } finally {
+    if (reachedLimit) {
+      await reader.cancel().catch(() => {});
+    }
+  }
+
+  return text;
+}