refactor: require legacy config migration on read

2026-03-26 23:22:35 +00:00 · 2026-03-26 23:22:35 +00:00 · 01bcbcf8d5
parent cad83db8b2
commit 01bcbcf8d5
18 changed files with 276 additions and 359 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
 - Agents/compaction: surface safeguard-specific cancel reasons and relabel benign manual `/compact` no-op cases as skipped instead of failed. (#51072) Thanks @afurm.
 - Plugins/CLI backends: move bundled Claude CLI, Codex CLI, and Gemini CLI inference defaults onto the plugin surface, add bundled Gemini CLI backend support, and replace `gateway run --claude-cli-logs` with generic `--cli-backend-logs` while keeping the old flag as a compatibility alias.
 - Plugins/startup: auto-load bundled provider and CLI-backend plugins from explicit config refs, so bundled Claude CLI, Codex CLI, and Gemini CLI message-provider setups no longer need manual `plugins.allow` entries.
+- Config/TTS: auto-migrate legacy speech config on normal reads and secret resolution, keep legacy diagnostics for Doctor, and remove regular-mode runtime fallback for old bundled `tts.<provider>` API-key shapes.

 ### Fixes

--- a/docs/tools/tts.md
+++ b/docs/tools/tts.md
@ -15,7 +15,7 @@ It works anywhere OpenClaw can send audio.
 ## Supported services

 - **ElevenLabs** (primary or fallback provider)
- **Microsoft** (primary or fallback provider; current bundled implementation uses `node-edge-tts`, default when no API keys)
+- **Microsoft** (primary or fallback provider; current bundled implementation uses `node-edge-tts`)
 - **OpenAI** (primary or fallback provider; also used for summaries)

 ### Microsoft speech notes
@ -38,9 +38,7 @@ If you want OpenAI or ElevenLabs:
 - `ELEVENLABS_API_KEY` (or `XI_API_KEY`)
 - `OPENAI_API_KEY`

-Microsoft speech does **not** require an API key. If no API keys are found,
-OpenClaw defaults to Microsoft (unless disabled via
-`messages.tts.microsoft.enabled=false` or `messages.tts.edge.enabled=false`).
+Microsoft speech does **not** require an API key.

 If multiple providers are configured, the selected provider is used first and the others are fallback options.
 Auto-summary uses the configured `summaryModel` (or `agents.defaults.model.primary`),
@ -60,8 +58,8 @@ so that provider must also be authenticated if you enable summaries.
 No. Auto‑TTS is **off** by default. Enable it in config with
 `messages.tts.auto` or per session with `/tts always` (alias: `/tts on`).

-Microsoft speech **is** enabled by default once TTS is on, and is used automatically
-when no OpenAI or ElevenLabs API keys are available.
+When `messages.tts.provider` is unset, OpenClaw picks the first configured
+speech provider in registry auto-select order.

 ## Config

@ -93,26 +91,28 @@ Full schema is in [Gateway configuration](/gateway/configuration).
      modelOverrides: {
        enabled: true,
      },
-      openai: {
-        apiKey: "openai_api_key",
-        baseUrl: "https://api.openai.com/v1",
-        model: "gpt-4o-mini-tts",
-        voice: "alloy",
-      },
-      elevenlabs: {
-        apiKey: "elevenlabs_api_key",
-        baseUrl: "https://api.elevenlabs.io",
-        voiceId: "voice_id",
-        modelId: "eleven_multilingual_v2",
-        seed: 42,
-        applyTextNormalization: "auto",
-        languageCode: "en",
-        voiceSettings: {
-          stability: 0.5,
-          similarityBoost: 0.75,
-          style: 0.0,
-          useSpeakerBoost: true,
-          speed: 1.0,
+      providers: {
+        openai: {
+          apiKey: "openai_api_key",
+          baseUrl: "https://api.openai.com/v1",
+          model: "gpt-4o-mini-tts",
+          voice: "alloy",
+        },
+        elevenlabs: {
+          apiKey: "elevenlabs_api_key",
+          baseUrl: "https://api.elevenlabs.io",
+          voiceId: "voice_id",
+          modelId: "eleven_multilingual_v2",
+          seed: 42,
+          applyTextNormalization: "auto",
+          languageCode: "en",
+          voiceSettings: {
+            stability: 0.5,
+            similarityBoost: 0.75,
+            style: 0.0,
+            useSpeakerBoost: true,
+            speed: 1.0,
+          },
        },
      },
    },
@ -128,13 +128,15 @@ Full schema is in [Gateway configuration](/gateway/configuration).
    tts: {
      auto: "always",
      provider: "microsoft",
-      microsoft: {
-        enabled: true,
-        voice: "en-US-MichelleNeural",
-        lang: "en-US",
-        outputFormat: "audio-24khz-48kbitrate-mono-mp3",
-        rate: "+10%",
-        pitch: "-5%",
+      providers: {
+        microsoft: {
+          enabled: true,
+          voice: "en-US-MichelleNeural",
+          lang: "en-US",
+          outputFormat: "audio-24khz-48kbitrate-mono-mp3",
+          rate: "+10%",
+          pitch: "-5%",
+        },
      },
    },
  },
@ -147,8 +149,10 @@ Full schema is in [Gateway configuration](/gateway/configuration).
 {
  messages: {
    tts: {
-      microsoft: {
-        enabled: false,
+      providers: {
+        microsoft: {
+          enabled: false,
+        },
      },
    },
  },
@ -208,37 +212,37 @@ Then run:
 - `enabled`: legacy toggle (doctor migrates this to `auto`).
 - `mode`: `"final"` (default) or `"all"` (includes tool/block replies).
 - `provider`: speech provider id such as `"elevenlabs"`, `"microsoft"`, or `"openai"` (fallback is automatic).
- If `provider` is **unset**, OpenClaw prefers `openai` (if key), then `elevenlabs` (if key),
-  otherwise `microsoft`.
+- If `provider` is **unset**, OpenClaw uses the first configured speech provider in registry auto-select order.
 - Legacy `provider: "edge"` still works and is normalized to `microsoft`.
 - `summaryModel`: optional cheap model for auto-summary; defaults to `agents.defaults.model.primary`.
  - Accepts `provider/model` or a configured model alias.
 - `modelOverrides`: allow the model to emit TTS directives (on by default).
  - `allowProvider` defaults to `false` (provider switching is opt-in).
+- `providers.<id>`: provider-owned settings keyed by speech provider id.
 - `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
 - `timeoutMs`: request timeout (ms).
 - `prefsPath`: override the local prefs JSON path (provider/limit/summary).
 - `apiKey` values fall back to env vars (`ELEVENLABS_API_KEY`/`XI_API_KEY`, `OPENAI_API_KEY`).
- `elevenlabs.baseUrl`: override ElevenLabs API base URL.
- `openai.baseUrl`: override the OpenAI TTS endpoint.
-  - Resolution order: `messages.tts.openai.baseUrl` -> `OPENAI_TTS_BASE_URL` -> `https://api.openai.com/v1`
+- `providers.elevenlabs.baseUrl`: override ElevenLabs API base URL.
+- `providers.openai.baseUrl`: override the OpenAI TTS endpoint.
+  - Resolution order: `messages.tts.providers.openai.baseUrl` -> `OPENAI_TTS_BASE_URL` -> `https://api.openai.com/v1`
  - Non-default values are treated as OpenAI-compatible TTS endpoints, so custom model and voice names are accepted.
- `elevenlabs.voiceSettings`:
+- `providers.elevenlabs.voiceSettings`:
  - `stability`, `similarityBoost`, `style`: `0..1`
  - `useSpeakerBoost`: `true|false`
  - `speed`: `0.5..2.0` (1.0 = normal)
- `elevenlabs.applyTextNormalization`: `auto|on|off`
- `elevenlabs.languageCode`: 2-letter ISO 639-1 (e.g. `en`, `de`)
- `elevenlabs.seed`: integer `0..4294967295` (best-effort determinism)
- `microsoft.enabled`: allow Microsoft speech usage (default `true`; no API key).
- `microsoft.voice`: Microsoft neural voice name (e.g. `en-US-MichelleNeural`).
- `microsoft.lang`: language code (e.g. `en-US`).
- `microsoft.outputFormat`: Microsoft output format (e.g. `audio-24khz-48kbitrate-mono-mp3`).
+- `providers.elevenlabs.applyTextNormalization`: `auto|on|off`
+- `providers.elevenlabs.languageCode`: 2-letter ISO 639-1 (e.g. `en`, `de`)
+- `providers.elevenlabs.seed`: integer `0..4294967295` (best-effort determinism)
+- `providers.microsoft.enabled`: allow Microsoft speech usage (default `true`; no API key).
+- `providers.microsoft.voice`: Microsoft neural voice name (e.g. `en-US-MichelleNeural`).
+- `providers.microsoft.lang`: language code (e.g. `en-US`).
+- `providers.microsoft.outputFormat`: Microsoft output format (e.g. `audio-24khz-48kbitrate-mono-mp3`).
  - See Microsoft Speech output formats for valid values; not all formats are supported by the bundled Edge-backed transport.
- `microsoft.rate` / `microsoft.pitch` / `microsoft.volume`: percent strings (e.g. `+10%`, `-5%`).
- `microsoft.saveSubtitles`: write JSON subtitles alongside the audio file.
- `microsoft.proxy`: proxy URL for Microsoft speech requests.
- `microsoft.timeoutMs`: request timeout override (ms).
+- `providers.microsoft.rate` / `providers.microsoft.pitch` / `providers.microsoft.volume`: percent strings (e.g. `+10%`, `-5%`).
+- `providers.microsoft.saveSubtitles`: write JSON subtitles alongside the audio file.
+- `providers.microsoft.proxy`: proxy URL for Microsoft speech requests.
+- `providers.microsoft.timeoutMs`: request timeout override (ms).
 - `edge.*`: legacy alias for the same Microsoft settings.

 ## Model-driven overrides (default on)
--- a/docs/tts.md
+++ b/docs/tts.md
@ -15,7 +15,7 @@ It works anywhere OpenClaw can send audio.
 ## Supported services

 - **ElevenLabs** (primary or fallback provider)
- **Microsoft** (primary or fallback provider; current bundled implementation uses `node-edge-tts`, default when no API keys)
+- **Microsoft** (primary or fallback provider; current bundled implementation uses `node-edge-tts`)
 - **OpenAI** (primary or fallback provider; also used for summaries)

 ### Microsoft speech notes
@ -38,9 +38,7 @@ If you want OpenAI or ElevenLabs:
 - `ELEVENLABS_API_KEY` (or `XI_API_KEY`)
 - `OPENAI_API_KEY`

-Microsoft speech does **not** require an API key. If no API keys are found,
-OpenClaw defaults to Microsoft (unless disabled via
-`messages.tts.microsoft.enabled=false` or `messages.tts.edge.enabled=false`).
+Microsoft speech does **not** require an API key.

 If multiple providers are configured, the selected provider is used first and the others are fallback options.
 Auto-summary uses the configured `summaryModel` (or `agents.defaults.model.primary`),
@ -60,8 +58,8 @@ so that provider must also be authenticated if you enable summaries.
 No. Auto‑TTS is **off** by default. Enable it in config with
 `messages.tts.auto` or per session with `/tts always` (alias: `/tts on`).

-Microsoft speech **is** enabled by default once TTS is on, and is used automatically
-when no OpenAI or ElevenLabs API keys are available.
+When `messages.tts.provider` is unset, OpenClaw picks the first configured
+speech provider in registry auto-select order.

 ## Config

@ -93,26 +91,28 @@ Full schema is in [Gateway configuration](/gateway/configuration).
      modelOverrides: {
        enabled: true,
      },
-      openai: {
-        apiKey: "openai_api_key",
-        baseUrl: "https://api.openai.com/v1",
-        model: "gpt-4o-mini-tts",
-        voice: "alloy",
-      },
-      elevenlabs: {
-        apiKey: "elevenlabs_api_key",
-        baseUrl: "https://api.elevenlabs.io",
-        voiceId: "voice_id",
-        modelId: "eleven_multilingual_v2",
-        seed: 42,
-        applyTextNormalization: "auto",
-        languageCode: "en",
-        voiceSettings: {
-          stability: 0.5,
-          similarityBoost: 0.75,
-          style: 0.0,
-          useSpeakerBoost: true,
-          speed: 1.0,
+      providers: {
+        openai: {
+          apiKey: "openai_api_key",
+          baseUrl: "https://api.openai.com/v1",
+          model: "gpt-4o-mini-tts",
+          voice: "alloy",
+        },
+        elevenlabs: {
+          apiKey: "elevenlabs_api_key",
+          baseUrl: "https://api.elevenlabs.io",
+          voiceId: "voice_id",
+          modelId: "eleven_multilingual_v2",
+          seed: 42,
+          applyTextNormalization: "auto",
+          languageCode: "en",
+          voiceSettings: {
+            stability: 0.5,
+            similarityBoost: 0.75,
+            style: 0.0,
+            useSpeakerBoost: true,
+            speed: 1.0,
+          },
        },
      },
    },
@ -128,13 +128,15 @@ Full schema is in [Gateway configuration](/gateway/configuration).
    tts: {
      auto: "always",
      provider: "microsoft",
-      microsoft: {
-        enabled: true,
-        voice: "en-US-MichelleNeural",
-        lang: "en-US",
-        outputFormat: "audio-24khz-48kbitrate-mono-mp3",
-        rate: "+10%",
-        pitch: "-5%",
+      providers: {
+        microsoft: {
+          enabled: true,
+          voice: "en-US-MichelleNeural",
+          lang: "en-US",
+          outputFormat: "audio-24khz-48kbitrate-mono-mp3",
+          rate: "+10%",
+          pitch: "-5%",
+        },
      },
    },
  },
@ -147,8 +149,10 @@ Full schema is in [Gateway configuration](/gateway/configuration).
 {
  messages: {
    tts: {
-      microsoft: {
-        enabled: false,
+      providers: {
+        microsoft: {
+          enabled: false,
+        },
      },
    },
  },
@ -208,37 +212,37 @@ Then run:
 - `enabled`: legacy toggle (doctor migrates this to `auto`).
 - `mode`: `"final"` (default) or `"all"` (includes tool/block replies).
 - `provider`: speech provider id such as `"elevenlabs"`, `"microsoft"`, or `"openai"` (fallback is automatic).
- If `provider` is **unset**, OpenClaw prefers `openai` (if key), then `elevenlabs` (if key),
-  otherwise `microsoft`.
+- If `provider` is **unset**, OpenClaw uses the first configured speech provider in registry auto-select order.
 - Legacy `provider: "edge"` still works and is normalized to `microsoft`.
 - `summaryModel`: optional cheap model for auto-summary; defaults to `agents.defaults.model.primary`.
  - Accepts `provider/model` or a configured model alias.
 - `modelOverrides`: allow the model to emit TTS directives (on by default).
  - `allowProvider` defaults to `false` (provider switching is opt-in).
+- `providers.<id>`: provider-owned settings keyed by speech provider id.
 - `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
 - `timeoutMs`: request timeout (ms).
 - `prefsPath`: override the local prefs JSON path (provider/limit/summary).
 - `apiKey` values fall back to env vars (`ELEVENLABS_API_KEY`/`XI_API_KEY`, `OPENAI_API_KEY`).
- `elevenlabs.baseUrl`: override ElevenLabs API base URL.
- `openai.baseUrl`: override the OpenAI TTS endpoint.
-  - Resolution order: `messages.tts.openai.baseUrl` -> `OPENAI_TTS_BASE_URL` -> `https://api.openai.com/v1`
+- `providers.elevenlabs.baseUrl`: override ElevenLabs API base URL.
+- `providers.openai.baseUrl`: override the OpenAI TTS endpoint.
+  - Resolution order: `messages.tts.providers.openai.baseUrl` -> `OPENAI_TTS_BASE_URL` -> `https://api.openai.com/v1`
  - Non-default values are treated as OpenAI-compatible TTS endpoints, so custom model and voice names are accepted.
- `elevenlabs.voiceSettings`:
+- `providers.elevenlabs.voiceSettings`:
  - `stability`, `similarityBoost`, `style`: `0..1`
  - `useSpeakerBoost`: `true|false`
  - `speed`: `0.5..2.0` (1.0 = normal)
- `elevenlabs.applyTextNormalization`: `auto|on|off`
- `elevenlabs.languageCode`: 2-letter ISO 639-1 (e.g. `en`, `de`)
- `elevenlabs.seed`: integer `0..4294967295` (best-effort determinism)
- `microsoft.enabled`: allow Microsoft speech usage (default `true`; no API key).
- `microsoft.voice`: Microsoft neural voice name (e.g. `en-US-MichelleNeural`).
- `microsoft.lang`: language code (e.g. `en-US`).
- `microsoft.outputFormat`: Microsoft output format (e.g. `audio-24khz-48kbitrate-mono-mp3`).
+- `providers.elevenlabs.applyTextNormalization`: `auto|on|off`
+- `providers.elevenlabs.languageCode`: 2-letter ISO 639-1 (e.g. `en`, `de`)
+- `providers.elevenlabs.seed`: integer `0..4294967295` (best-effort determinism)
+- `providers.microsoft.enabled`: allow Microsoft speech usage (default `true`; no API key).
+- `providers.microsoft.voice`: Microsoft neural voice name (e.g. `en-US-MichelleNeural`).
+- `providers.microsoft.lang`: language code (e.g. `en-US`).
+- `providers.microsoft.outputFormat`: Microsoft output format (e.g. `audio-24khz-48kbitrate-mono-mp3`).
  - See Microsoft Speech output formats for valid values; not all formats are supported by the bundled Edge-backed transport.
- `microsoft.rate` / `microsoft.pitch` / `microsoft.volume`: percent strings (e.g. `+10%`, `-5%`).
- `microsoft.saveSubtitles`: write JSON subtitles alongside the audio file.
- `microsoft.proxy`: proxy URL for Microsoft speech requests.
- `microsoft.timeoutMs`: request timeout override (ms).
+- `providers.microsoft.rate` / `providers.microsoft.pitch` / `providers.microsoft.volume`: percent strings (e.g. `+10%`, `-5%`).
+- `providers.microsoft.saveSubtitles`: write JSON subtitles alongside the audio file.
+- `providers.microsoft.proxy`: proxy URL for Microsoft speech requests.
+- `providers.microsoft.timeoutMs`: request timeout override (ms).
 - `edge.*`: legacy alias for the same Microsoft settings.

 ## Model-driven overrides (default on)
--- a/src/commands/doctor-legacy-config.migrations.test.ts
+++ b/src/commands/doctor-legacy-config.migrations.test.ts
@ -523,7 +523,6 @@ describe("normalizeCompatibilityConfigValues", () => {
    });

    expect(res.config.talk).toEqual({
-      provider: "elevenlabs",
      providers: {
        elevenlabs: {
          voiceId: "voice-123",
@ -545,9 +544,7 @@ describe("normalizeCompatibilityConfigValues", () => {
      interruptOnSpeech: false,
      silenceTimeoutMs: 1500,
    });
-    expect(res.changes).toEqual([
-      "Moved legacy talk flat fields → talk.provider/talk.providers.elevenlabs.",
-    ]);
+    expect(res.changes).toEqual(["Moved legacy talk flat fields → talk.providers.elevenlabs."]);
  });

  it("normalizes talk provider ids without overriding explicit provider config", () => {
--- a/src/commands/doctor-legacy-config.ts
+++ b/src/commands/doctor-legacy-config.ts
@ -11,7 +11,7 @@ import {
  resolveTelegramPreviewStreamMode,
 } from "../config/discord-preview-streaming.js";
 import { migrateLegacyWebSearchConfig } from "../config/legacy-web-search.js";
-import { DEFAULT_TALK_PROVIDER, normalizeTalkSection } from "../config/talk.js";
+import { LEGACY_TALK_PROVIDER_ID, normalizeTalkSection } from "../config/talk.js";
 import { DEFAULT_GOOGLE_API_BASE_URL } from "../infra/google-api-base-url.js";
 import { DEFAULT_ACCOUNT_ID } from "../routing/session-key.js";

@ -651,9 +651,7 @@ export function normalizeCompatibilityConfigValues(cfg: OpenClawConfig): {
      return;
    }

-    changes.push(
-      `Moved legacy talk flat fields → talk.provider/talk.providers.${DEFAULT_TALK_PROVIDER}.`,
-    );
+    changes.push(`Moved legacy talk flat fields → talk.providers.${LEGACY_TALK_PROVIDER_ID}.`);
  };

  const normalizeLegacyCrossContextMessageConfig = () => {
--- a/src/config/config-misc.test.ts
+++ b/src/config/config-misc.test.ts
@ -482,7 +482,7 @@ describe("config strict validation", () => {

      const snap = await readConfigFileSnapshot();

-      expect(snap.valid).toBe(false);
+      expect(snap.valid).toBe(true);
      expect(snap.legacyIssues).not.toHaveLength(0);
    });
  });
@ -517,7 +517,7 @@ describe("config strict validation", () => {
      });

      const snap = await readConfigFileSnapshot();
-      expect(snap.valid).toBe(false);
+      expect(snap.valid).toBe(true);
      expect(snap.legacyIssues.some((issue) => issue.path === "gateway.bind")).toBe(true);
    });
  });
--- a/src/config/config.legacy-config-detection.accepts-imessage-dmpolicy.test.ts
+++ b/src/config/config.legacy-config-detection.accepts-imessage-dmpolicy.test.ts
@ -68,7 +68,7 @@ function expectRoutingAllowFromLegacySnapshot(
  ctx: { snapshot: ConfigSnapshot; parsed: unknown },
  expectedAllowFrom: string[],
 ) {
-  expect(ctx.snapshot.valid).toBe(false);
+  expect(ctx.snapshot.valid).toBe(true);
  expect(ctx.snapshot.legacyIssues.some((issue) => issue.path === "routing.allowFrom")).toBe(true);
  const parsed = ctx.parsed as {
    routing?: { allowFrom?: string[] };
@ -269,7 +269,7 @@ describe("legacy config detection", () => {
    await withSnapshotForConfig(
      { memorySearch: { provider: "local", fallback: "none" } },
      async (ctx) => {
-        expect(ctx.snapshot.valid).toBe(false);
+        expect(ctx.snapshot.valid).toBe(true);
        expect(ctx.snapshot.legacyIssues.some((issue) => issue.path === "memorySearch")).toBe(true);
      },
    );
@ -278,14 +278,14 @@ describe("legacy config detection", () => {
    await withSnapshotForConfig(
      { heartbeat: { model: "anthropic/claude-3-5-haiku-20241022", every: "30m" } },
      async (ctx) => {
-        expect(ctx.snapshot.valid).toBe(false);
+        expect(ctx.snapshot.valid).toBe(true);
        expect(ctx.snapshot.legacyIssues.some((issue) => issue.path === "heartbeat")).toBe(true);
      },
    );
  });
  it("flags legacy provider sections in snapshot", async () => {
    await withSnapshotForConfig({ whatsapp: { allowFrom: ["+1555"] } }, async (ctx) => {
-      expect(ctx.snapshot.valid).toBe(false);
+      expect(ctx.snapshot.valid).toBe(true);
      expect(ctx.snapshot.legacyIssues.some((issue) => issue.path === "whatsapp")).toBe(true);

      const parsed = ctx.parsed as {
--- a/src/config/defaults.ts
+++ b/src/config/defaults.ts
@ -3,7 +3,7 @@ import { normalizeProviderId, parseModelRef } from "../agents/model-selection.js
 import { DEFAULT_AGENT_MAX_CONCURRENT, DEFAULT_SUBAGENT_MAX_CONCURRENT } from "./agent-limits.js";
 import { resolveAgentModelPrimaryValue } from "./model-input.js";
 import {
-  DEFAULT_TALK_PROVIDER,
+  LEGACY_TALK_PROVIDER_ID,
  normalizeTalkConfig,
  resolveActiveTalkProviderConfig,
  resolveTalkApiKey,
@ -204,7 +204,7 @@ export function applyTalkApiKey(config: OpenClawConfig): OpenClawConfig {

  const talk = normalized.talk;
  const active = resolveActiveTalkProviderConfig(talk);
-  if (active?.provider && active.provider !== DEFAULT_TALK_PROVIDER) {
+  if (!active || active.provider !== LEGACY_TALK_PROVIDER_ID) {
    return normalized;
  }

@ -214,7 +214,7 @@ export function applyTalkApiKey(config: OpenClawConfig): OpenClawConfig {
    return normalized;
  }

-  const providerId = active?.provider ?? DEFAULT_TALK_PROVIDER;
+  const providerId = active.provider;
  const providers = { ...talk?.providers };
  const providerConfig = { ...providers[providerId], apiKey: resolved };
  providers[providerId] = providerConfig;
@ -222,7 +222,6 @@ export function applyTalkApiKey(config: OpenClawConfig): OpenClawConfig {
  const nextTalk = {
    ...talk,
    apiKey: resolved,
-    provider: talk?.provider ?? providerId,
    providers,
  };

--- a/src/config/io.ts
+++ b/src/config/io.ts
@ -41,6 +41,7 @@ import {
  readConfigIncludeFileWithGuards,
  resolveConfigIncludes,
 } from "./includes.js";
+import { migrateLegacyConfig } from "./legacy-migrate.js";
 import { findLegacyConfigIssues } from "./legacy.js";
 import { applyMergePatch } from "./merge-patch.js";
 import { normalizeExecSafeBinProfilesInConfig } from "./normalize-exec-safe-bin.js";
@ -1185,6 +1186,11 @@ type ConfigReadResolution = {
  envWarnings: EnvSubstitutionWarning[];
 };

+type LegacyMigrationResolution = {
+  effectiveConfigRaw: unknown;
+  sourceLegacyIssues: LegacyConfigIssue[];
+};
+
 function resolveConfigIncludesForRead(
  parsed: unknown,
  configPath: string,
@ -1225,6 +1231,21 @@ function resolveConfigForRead(
  };
 }

+function resolveLegacyConfigForRead(
+  resolvedConfigRaw: unknown,
+  sourceRaw: unknown,
+): LegacyMigrationResolution {
+  const sourceLegacyIssues = findLegacyConfigIssues(resolvedConfigRaw, sourceRaw);
+  if (sourceLegacyIssues.length === 0) {
+    return { effectiveConfigRaw: resolvedConfigRaw, sourceLegacyIssues };
+  }
+  const migrated = migrateLegacyConfig(resolvedConfigRaw);
+  return {
+    effectiveConfigRaw: migrated.config ?? resolvedConfigRaw,
+    sourceLegacyIssues,
+  };
+}
+
 type ReadConfigFileSnapshotInternalResult = {
  snapshot: ConfigFileSnapshot;
  envSnapshotForRestore?: Record<string, string | undefined>;
@ -1275,13 +1296,15 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
        deps.env,
      );
      const resolvedConfig = readResolution.resolvedConfigRaw;
+      const legacyResolution = resolveLegacyConfigForRead(resolvedConfig, parsed);
+      const effectiveConfigRaw = legacyResolution.effectiveConfigRaw;
      for (const w of readResolution.envWarnings) {
        deps.logger.warn(
          `Config (${configPath}): missing env var "${w.varName}" at ${w.configPath} — feature using this value will be unavailable`,
        );
      }
-      warnOnConfigMiskeys(resolvedConfig, deps.logger);
-      if (typeof resolvedConfig !== "object" || resolvedConfig === null) {
+      warnOnConfigMiskeys(effectiveConfigRaw, deps.logger);
+      if (typeof effectiveConfigRaw !== "object" || effectiveConfigRaw === null) {
        observeLoadConfigSnapshot({
          path: configPath,
          exists: true,
@ -1293,31 +1316,31 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
          hash,
          issues: [],
          warnings: [],
-          legacyIssues: [],
+          legacyIssues: legacyResolution.sourceLegacyIssues,
        });
        return {};
      }
-      const preValidationDuplicates = findDuplicateAgentDirs(resolvedConfig as OpenClawConfig, {
+      const preValidationDuplicates = findDuplicateAgentDirs(effectiveConfigRaw as OpenClawConfig, {
        env: deps.env,
        homedir: deps.homedir,
      });
      if (preValidationDuplicates.length > 0) {
        throw new DuplicateAgentDirError(preValidationDuplicates);
      }
-      const validated = validateConfigObjectWithPlugins(resolvedConfig, { env: deps.env });
+      const validated = validateConfigObjectWithPlugins(effectiveConfigRaw, { env: deps.env });
      if (!validated.ok) {
        observeLoadConfigSnapshot({
          path: configPath,
          exists: true,
          raw,
          parsed,
-          resolved: coerceConfig(resolvedConfig),
+          resolved: coerceConfig(effectiveConfigRaw),
          valid: false,
-          config: coerceConfig(resolvedConfig),
+          config: coerceConfig(effectiveConfigRaw),
          hash,
          issues: validated.issues,
          warnings: validated.warnings,
-          legacyIssues: findLegacyConfigIssues(resolvedConfig, parsed),
+          legacyIssues: legacyResolution.sourceLegacyIssues,
        });
        const details = validated.issues
          .map(
@ -1362,13 +1385,13 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
        exists: true,
        raw,
        parsed,
-        resolved: coerceConfig(resolvedConfig),
+        resolved: coerceConfig(effectiveConfigRaw),
        valid: true,
        config: cfg,
        hash,
        issues: [],
        warnings: validated.warnings,
-        legacyIssues: findLegacyConfigIssues(resolvedConfig, parsed),
+        legacyIssues: legacyResolution.sourceLegacyIssues,
      });

      const duplicates = findDuplicateAgentDirs(cfg, {
@ -1536,11 +1559,10 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
      }));

      const resolvedConfigRaw = readResolution.resolvedConfigRaw;
-      // Detect legacy keys on resolved config, but only mark source-literal legacy
-      // entries (for auto-migration) when they are present in the parsed source.
-      const legacyIssues = findLegacyConfigIssues(resolvedConfigRaw, parsedRes.parsed);
+      const legacyResolution = resolveLegacyConfigForRead(resolvedConfigRaw, parsedRes.parsed);
+      const effectiveConfigRaw = legacyResolution.effectiveConfigRaw;

-      const validated = validateConfigObjectWithPlugins(resolvedConfigRaw, { env: deps.env });
+      const validated = validateConfigObjectWithPlugins(effectiveConfigRaw, { env: deps.env });
      if (!validated.ok) {
        return await finalizeReadConfigSnapshotInternalResult(deps, {
          snapshot: {
@ -1548,13 +1570,13 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
            exists: true,
            raw,
            parsed: parsedRes.parsed,
-            resolved: coerceConfig(resolvedConfigRaw),
+            resolved: coerceConfig(effectiveConfigRaw),
            valid: false,
-            config: coerceConfig(resolvedConfigRaw),
+            config: coerceConfig(effectiveConfigRaw),
            hash,
            issues: validated.issues,
            warnings: [...validated.warnings, ...envVarWarnings],
-            legacyIssues,
+            legacyIssues: legacyResolution.sourceLegacyIssues,
          },
        });
      }
@ -1580,13 +1602,13 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
          parsed: parsedRes.parsed,
          // Use resolvedConfigRaw (after $include and ${ENV} substitution but BEFORE runtime defaults)
          // for config set/unset operations (issue #6070)
-          resolved: coerceConfig(resolvedConfigRaw),
+          resolved: coerceConfig(effectiveConfigRaw),
          valid: true,
          config: snapshotConfig,
          hash,
          issues: [],
          warnings: [...validated.warnings, ...envVarWarnings],
-          legacyIssues,
+          legacyIssues: legacyResolution.sourceLegacyIssues,
        },
        envSnapshotForRestore: readResolution.envSnapshotForRestore,
      });
--- a/src/config/talk.normalize.test.ts
+++ b/src/config/talk.normalize.test.ts
@ -36,7 +36,6 @@ describe("talk normalization", () => {
    });

    expect(normalized).toEqual({
-      provider: "elevenlabs",
      providers: {
        elevenlabs: {
          voiceId: "voice-123",
@ -149,7 +148,7 @@ describe("talk normalization", () => {
        async (configPath) => {
          const io = createConfigIO({ configPath });
          const snapshot = await io.readConfigFileSnapshot();
-          expect(snapshot.config.talk?.provider).toBe("elevenlabs");
+          expect(snapshot.config.talk?.provider).toBeUndefined();
          expect(snapshot.config.talk?.providers?.elevenlabs?.voiceId).toBe("voice-123");
          expect(snapshot.config.talk?.providers?.elevenlabs?.apiKey).toBe(elevenLabsApiKey);
          expect(snapshot.config.talk?.apiKey).toBe(elevenLabsApiKey);
--- a/src/config/talk.ts
+++ b/src/config/talk.ts
@ -16,7 +16,7 @@ type TalkApiKeyDeps = {
  path?: typeof path;
 };

-export const DEFAULT_TALK_PROVIDER = "elevenlabs";
+export const LEGACY_TALK_PROVIDER_ID = "elevenlabs";

 function isPlainObject(value: unknown): value is Record<string, unknown> {
  return typeof value === "object" && value !== null && !Array.isArray(value);
@ -225,19 +225,13 @@ export function normalizeTalkSection(value: TalkConfig | undefined): TalkConfig
    }
    if (provider) {
      normalized.provider = provider;
-    } else if (providers) {
-      const ids = Object.keys(providers);
-      if (ids.length === 1) {
-        normalized.provider = ids[0];
-      }
    }
    return Object.keys(normalized).length > 0 ? normalized : undefined;
  }

  const legacyProviderConfig = legacyProviderConfigFromTalk(source);
  if (legacyProviderConfig) {
-    normalized.provider = DEFAULT_TALK_PROVIDER;
-    normalized.providers = { [DEFAULT_TALK_PROVIDER]: legacyProviderConfig };
+    normalized.providers = { [LEGACY_TALK_PROVIDER_ID]: legacyProviderConfig };
  }
  return Object.keys(normalized).length > 0 ? normalized : undefined;
 }
--- a/src/config/types.tts.ts
+++ b/src/config/types.tts.ts
@ -27,22 +27,8 @@ export type TtsModelOverrideConfig = {

 export type TtsProviderConfigMap = Record<string, Record<string, unknown>>;

-export type TtsConfig = {
-  /** Auto-TTS mode (preferred). */
-  auto?: TtsAutoMode;
-  /** Legacy: enable auto-TTS when `auto` is not set. */
-  enabled?: boolean;
-  /** Apply TTS to final replies only or to all replies (tool/block/final). */
-  mode?: TtsMode;
-  /** Primary TTS provider (fallbacks are automatic). */
-  provider?: TtsProvider;
-  /** Optional model override for TTS auto-summary (provider/model or alias). */
-  summaryModel?: string;
-  /** Allow the model to override TTS parameters. */
-  modelOverrides?: TtsModelOverrideConfig;
-  /** Provider-specific TTS settings keyed by speech provider id. */
-  providers?: TtsProviderConfigMap;
-  /** ElevenLabs configuration. */
+export type LegacyTtsConfigCompat = {
+  /** Legacy ElevenLabs configuration. Prefer providers.elevenlabs. */
  elevenlabs?: {
    apiKey?: SecretInput;
    baseUrl?: string;
@ -59,7 +45,7 @@ export type TtsConfig = {
      speed?: number;
    };
  };
-  /** OpenAI configuration. */
+  /** Legacy OpenAI configuration. Prefer providers.openai. */
  openai?: {
    apiKey?: SecretInput;
    baseUrl?: string;
@ -70,7 +56,7 @@ export type TtsConfig = {
    /** System-level instructions for the TTS model (gpt-4o-mini-tts only). */
    instructions?: string;
  };
-  /** Legacy alias for Microsoft speech configuration. */
+  /** Legacy alias for Microsoft speech configuration. Prefer providers.microsoft. */
  edge?: {
    /** Explicitly allow Microsoft speech usage (no API key required). */
    enabled?: boolean;
@ -84,7 +70,7 @@ export type TtsConfig = {
    proxy?: string;
    timeoutMs?: number;
  };
-  /** Preferred alias for Microsoft speech configuration. */
+  /** Legacy Microsoft speech configuration. Prefer providers.microsoft. */
  microsoft?: {
    enabled?: boolean;
    voice?: string;
@ -97,6 +83,23 @@ export type TtsConfig = {
    proxy?: string;
    timeoutMs?: number;
  };
+};
+
+export type TtsConfig = LegacyTtsConfigCompat & {
+  /** Auto-TTS mode (preferred). */
+  auto?: TtsAutoMode;
+  /** Legacy: enable auto-TTS when `auto` is not set. */
+  enabled?: boolean;
+  /** Apply TTS to final replies only or to all replies (tool/block/final). */
+  mode?: TtsMode;
+  /** Primary TTS provider (fallbacks are automatic). */
+  provider?: TtsProvider;
+  /** Optional model override for TTS auto-summary (provider/model or alias). */
+  summaryModel?: string;
+  /** Allow the model to override TTS parameters. */
+  modelOverrides?: TtsModelOverrideConfig;
+  /** Provider-specific TTS settings keyed by speech provider id. */
+  providers?: TtsProviderConfigMap;
  /** Optional path for local TTS user preferences JSON. */
  prefsPath?: string;
  /** Hard cap for text sent to TTS (chars). */
--- a/src/secrets/runtime-config-collectors-tts.ts
+++ b/src/secrets/runtime-config-collectors-tts.ts
@ -54,25 +54,4 @@ export function collectTtsApiKeyAssignments(params: {
    }
    return;
  }
-
-  // Legacy compatibility until migrated configs have been rewritten on disk.
-  const legacyProviders = ["elevenlabs", "openai"] as const;
-  for (const providerId of legacyProviders) {
-    const providerConfig = params.tts[providerId];
-    if (!isRecord(providerConfig)) {
-      continue;
-    }
-    collectSecretInputAssignment({
-      value: providerConfig.apiKey,
-      path: `${params.pathPrefix}.${providerId}.apiKey`,
-      expected: "string",
-      defaults: params.defaults,
-      context: params.context,
-      active: params.active,
-      inactiveReason: params.inactiveReason,
-      apply: (value) => {
-        providerConfig.apiKey = value;
-      },
-    });
-  }
 }
--- a/src/secrets/runtime.test.ts
+++ b/src/secrets/runtime.test.ts
@ -1949,20 +1949,22 @@ describe("secrets runtime snapshot", () => {
      loadAuthStore: () => ({ version: 1, profiles: {} }),
    });

-    expect(snapshot.config.channels?.discord?.voice?.tts?.openai?.apiKey).toEqual({
+    expect(snapshot.config.channels?.discord?.voice?.tts?.providers?.openai?.apiKey).toEqual({
      source: "env",
      provider: "default",
      id: "MISSING_DISCORD_VOICE_TTS_OPENAI",
    });
-    expect(snapshot.config.channels?.discord?.accounts?.work?.voice?.tts?.openai?.apiKey).toEqual({
+    expect(
+      snapshot.config.channels?.discord?.accounts?.work?.voice?.tts?.providers?.openai?.apiKey,
+    ).toEqual({
      source: "env",
      provider: "default",
      id: "MISSING_DISCORD_WORK_VOICE_TTS_OPENAI",
    });
    expect(snapshot.warnings.map((warning) => warning.path)).toEqual(
      expect.arrayContaining([
-        "channels.discord.voice.tts.openai.apiKey",
-        "channels.discord.accounts.work.voice.tts.openai.apiKey",
+        "channels.discord.voice.tts.providers.openai.apiKey",
+        "channels.discord.accounts.work.voice.tts.providers.openai.apiKey",
      ]),
    );
  });
@ -1974,8 +1976,10 @@ describe("secrets runtime snapshot", () => {
          discord: {
            voice: {
              tts: {
-                openai: {
-                  apiKey: { source: "env", provider: "default", id: "DISCORD_BASE_TTS_OPENAI" },
+                providers: {
+                  openai: {
+                    apiKey: { source: "env", provider: "default", id: "DISCORD_BASE_TTS_OPENAI" },
+                  },
                },
              },
            },
@ -1990,11 +1994,13 @@ describe("secrets runtime snapshot", () => {
                enabled: true,
                voice: {
                  tts: {
-                    openai: {
-                      apiKey: {
-                        source: "env",
-                        provider: "default",
-                        id: "DISCORD_ENABLED_OVERRIDE_TTS_OPENAI",
+                    providers: {
+                      openai: {
+                        apiKey: {
+                          source: "env",
+                          provider: "default",
+                          id: "DISCORD_ENABLED_OVERRIDE_TTS_OPENAI",
+                        },
                      },
                    },
                  },
@ -2004,11 +2010,13 @@ describe("secrets runtime snapshot", () => {
                enabled: false,
                voice: {
                  tts: {
-                    openai: {
-                      apiKey: {
-                        source: "env",
-                        provider: "default",
-                        id: "DISCORD_DISABLED_OVERRIDE_TTS_OPENAI",
+                    providers: {
+                      openai: {
+                        apiKey: {
+                          source: "env",
+                          provider: "default",
+                          id: "DISCORD_DISABLED_OVERRIDE_TTS_OPENAI",
+                        },
                      },
                    },
                  },
@ -2034,13 +2042,17 @@ describe("secrets runtime snapshot", () => {
      loadAuthStore: () => ({ version: 1, profiles: {} }),
    });

-    expect(snapshot.config.channels?.discord?.voice?.tts?.openai?.apiKey).toBe("base-tts-openai");
+    expect(snapshot.config.channels?.discord?.voice?.tts?.providers?.openai?.apiKey).toBe(
+      "base-tts-openai",
+    );
    expect(snapshot.config.channels?.discord?.pluralkit?.token).toBe("base-pk-token");
    expect(
-      snapshot.config.channels?.discord?.accounts?.enabledOverride?.voice?.tts?.openai?.apiKey,
+      snapshot.config.channels?.discord?.accounts?.enabledOverride?.voice?.tts?.providers?.openai
+        ?.apiKey,
    ).toBe("enabled-override-tts-openai");
    expect(
-      snapshot.config.channels?.discord?.accounts?.disabledOverride?.voice?.tts?.openai?.apiKey,
+      snapshot.config.channels?.discord?.accounts?.disabledOverride?.voice?.tts?.providers?.openai
+        ?.apiKey,
    ).toEqual({
      source: "env",
      provider: "default",
@ -2055,7 +2067,7 @@ describe("secrets runtime snapshot", () => {
    );
    expect(snapshot.warnings.map((warning) => warning.path)).toEqual(
      expect.arrayContaining([
-        "channels.discord.accounts.disabledOverride.voice.tts.openai.apiKey",
+        "channels.discord.accounts.disabledOverride.voice.tts.providers.openai.apiKey",
        "channels.discord.accounts.disabledOverride.pluralkit.token",
      ]),
    );
@ -2068,11 +2080,13 @@ describe("secrets runtime snapshot", () => {
          discord: {
            voice: {
              tts: {
-                openai: {
-                  apiKey: {
-                    source: "env",
-                    provider: "default",
-                    id: "DISCORD_UNUSED_BASE_TTS_OPENAI",
+                providers: {
+                  openai: {
+                    apiKey: {
+                      source: "env",
+                      provider: "default",
+                      id: "DISCORD_UNUSED_BASE_TTS_OPENAI",
+                    },
                  },
                },
              },
@ -2082,11 +2096,13 @@ describe("secrets runtime snapshot", () => {
                enabled: true,
                voice: {
                  tts: {
-                    openai: {
-                      apiKey: {
-                        source: "env",
-                        provider: "default",
-                        id: "DISCORD_ENABLED_ONLY_TTS_OPENAI",
+                    providers: {
+                      openai: {
+                        apiKey: {
+                          source: "env",
+                          provider: "default",
+                          id: "DISCORD_ENABLED_ONLY_TTS_OPENAI",
+                        },
                      },
                    },
                  },
@ -2107,15 +2123,16 @@ describe("secrets runtime snapshot", () => {
    });

    expect(
-      snapshot.config.channels?.discord?.accounts?.enabledOverride?.voice?.tts?.openai?.apiKey,
+      snapshot.config.channels?.discord?.accounts?.enabledOverride?.voice?.tts?.providers?.openai
+        ?.apiKey,
    ).toBe("enabled-only-tts-openai");
-    expect(snapshot.config.channels?.discord?.voice?.tts?.openai?.apiKey).toEqual({
+    expect(snapshot.config.channels?.discord?.voice?.tts?.providers?.openai?.apiKey).toEqual({
      source: "env",
      provider: "default",
      id: "DISCORD_UNUSED_BASE_TTS_OPENAI",
    });
    expect(snapshot.warnings.map((warning) => warning.path)).toContain(
-      "channels.discord.voice.tts.openai.apiKey",
+      "channels.discord.voice.tts.providers.openai.apiKey",
    );
  });

@ -2127,8 +2144,10 @@ describe("secrets runtime snapshot", () => {
            discord: {
              voice: {
                tts: {
-                  openai: {
-                    apiKey: { source: "env", provider: "default", id: "DISCORD_BASE_TTS_OK" },
+                  providers: {
+                    openai: {
+                      apiKey: { source: "env", provider: "default", id: "DISCORD_BASE_TTS_OK" },
+                    },
                  },
                },
              },
@ -2137,11 +2156,13 @@ describe("secrets runtime snapshot", () => {
                  enabled: true,
                  voice: {
                    tts: {
-                      openai: {
-                        apiKey: {
-                          source: "env",
-                          provider: "default",
-                          id: "DISCORD_ENABLED_OVERRIDE_TTS_MISSING",
+                      providers: {
+                        openai: {
+                          apiKey: {
+                            source: "env",
+                            provider: "default",
+                            id: "DISCORD_ENABLED_OVERRIDE_TTS_MISSING",
+                          },
                        },
                      },
                    },
--- a/src/secrets/runtime.ts
+++ b/src/secrets/runtime.ts
@ -12,6 +12,7 @@ import {
  setRuntimeConfigSnapshot,
  type OpenClawConfig,
 } from "../config/config.js";
+import { migrateLegacyConfig } from "../config/legacy-migrate.js";
 import { resolveUserPath } from "../utils.js";
 import {
  collectCommandSecretAssignmentsFromSnapshot,
@ -139,7 +140,9 @@ export async function prepareSecretsRuntimeSnapshot(params: {
 }): Promise<PreparedSecretsRuntimeSnapshot> {
  const runtimeEnv = mergeSecretsRuntimeEnv(params.env);
  const sourceConfig = structuredClone(params.config);
-  const resolvedConfig = structuredClone(params.config);
+  const resolvedConfig = structuredClone(
+    migrateLegacyConfig(params.config).config ?? params.config,
+  );
  const context = createResolverContext({
    sourceConfig,
    env: runtimeEnv,
--- a/src/tts/directives.ts
+++ b/src/tts/directives.ts
@ -4,7 +4,6 @@ import { listSpeechProviders } from "./provider-registry.js";
 import type {
  SpeechModelOverridePolicy,
  SpeechProviderConfig,
-  SpeechProviderOverrides,
  TtsDirectiveOverrides,
  TtsDirectiveParseResult,
 } from "./provider-types.js";
@ -13,7 +12,6 @@ type ParseTtsDirectiveOptions = {
  cfg?: OpenClawConfig;
  providers?: readonly SpeechProviderPlugin[];
  providerConfigs?: Record<string, SpeechProviderConfig>;
-  openaiBaseUrl?: string;
 };

 function buildProviderOrder(left: SpeechProviderPlugin, right: SpeechProviderPlugin): number {
@ -36,49 +34,18 @@ function resolveDirectiveProviderConfig(
  provider: SpeechProviderPlugin,
  options?: ParseTtsDirectiveOptions,
 ): SpeechProviderConfig | undefined {
-  const explicit = options?.providerConfigs?.[provider.id];
-  if (explicit) {
-    return explicit;
-  }
-  if (provider.id === "openai" && options?.openaiBaseUrl) {
-    return { baseUrl: options.openaiBaseUrl };
-  }
-  return undefined;
-}
-
-function mergeProviderOverrides(
-  target: TtsDirectiveOverrides,
-  providerId: string,
-  next: SpeechProviderOverrides,
-): void {
-  target.providerOverrides = {
-    ...target.providerOverrides,
-    [providerId]: {
-      ...target.providerOverrides?.[providerId],
-      ...next,
-    },
-  };
-}
-
-function resolveLegacyOptions(
-  optionsOrOpenaiBaseUrl?: ParseTtsDirectiveOptions | string,
-): ParseTtsDirectiveOptions | undefined {
-  if (typeof optionsOrOpenaiBaseUrl === "string") {
-    return { openaiBaseUrl: optionsOrOpenaiBaseUrl };
-  }
-  return optionsOrOpenaiBaseUrl;
+  return options?.providerConfigs?.[provider.id];
 }

 export function parseTtsDirectives(
  text: string,
  policy: SpeechModelOverridePolicy,
-  optionsOrOpenaiBaseUrl?: ParseTtsDirectiveOptions | string,
+  options?: ParseTtsDirectiveOptions,
 ): TtsDirectiveParseResult {
  if (!policy.enabled) {
    return { cleanedText: text, overrides: {}, warnings: [], hasDirective: false };
  }

-  const options = resolveLegacyOptions(optionsOrOpenaiBaseUrl);
  const providers = resolveDirectiveProviders(options);
  const overrides: TtsDirectiveOverrides = {};
  const warnings: string[] = [];
@ -135,7 +102,13 @@ export function parseTtsDirectives(
        }
        handled = true;
        if (parsed.overrides) {
-          mergeProviderOverrides(overrides, provider.id, parsed.overrides);
+          overrides.providerOverrides = {
+            ...overrides.providerOverrides,
+            [provider.id]: {
+              ...overrides.providerOverrides?.[provider.id],
+              ...parsed.overrides,
+            },
+          };
        }
        if (parsed.warnings?.length) {
          warnings.push(...parsed.warnings);
--- a/src/tts/tts.test.ts
+++ b/src/tts/tts.test.ts
@ -86,7 +86,6 @@ const {
  parseTtsDirectives,
  resolveModelOverridePolicy,
  summarizeText,
-  resolveOutputFormat,
  getResolvedSpeechProviderConfig,
 } = _test;

@ -234,65 +233,6 @@ describe("tts", () => {
    });
  });

-  describe("resolveOutputFormat", () => {
-    it("selects opus for opus channels (telegram/feishu/whatsapp/matrix) and mp3 for others", () => {
-      const cases = [
-        {
-          channel: "telegram",
-          expected: {
-            openai: "opus",
-            elevenlabs: "opus_48000_64",
-            extension: ".opus",
-            voiceCompatible: true,
-          },
-        },
-        {
-          channel: "feishu",
-          expected: {
-            openai: "opus",
-            elevenlabs: "opus_48000_64",
-            extension: ".opus",
-            voiceCompatible: true,
-          },
-        },
-        {
-          channel: "whatsapp",
-          expected: {
-            openai: "opus",
-            elevenlabs: "opus_48000_64",
-            extension: ".opus",
-            voiceCompatible: true,
-          },
-        },
-        {
-          channel: "matrix",
-          expected: {
-            openai: "opus",
-            elevenlabs: "opus_48000_64",
-            extension: ".opus",
-            voiceCompatible: true,
-          },
-        },
-        {
-          channel: "discord",
-          expected: {
-            openai: "mp3",
-            elevenlabs: "mp3_44100_128",
-            extension: ".mp3",
-            voiceCompatible: false,
-          },
-        },
-      ] as const;
-      for (const testCase of cases) {
-        const output = resolveOutputFormat(testCase.channel);
-        expect(output.openai, testCase.channel).toBe(testCase.expected.openai);
-        expect(output.elevenlabs, testCase.channel).toBe(testCase.expected.elevenlabs);
-        expect(output.extension, testCase.channel).toBe(testCase.expected.extension);
-        expect(output.voiceCompatible, testCase.channel).toBe(testCase.expected.voiceCompatible);
-      }
-    });
-  });
-
  describe("resolveEdgeOutputFormat", () => {
    const baseCfg: OpenClawConfig = {
      agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } },
@ -383,9 +323,11 @@ describe("tts", () => {
    it("accepts custom voices and models when openaiBaseUrl is a non-default endpoint", () => {
      const policy = resolveModelOverridePolicy({ enabled: true });
      const input = "Hello [[tts:voice=kokoro-chinese model=kokoro-v1]] world";
-      const customBaseUrl = "http://localhost:8880/v1";
-
-      const result = parseTtsDirectives(input, policy, customBaseUrl);
+      const result = parseTtsDirectives(input, policy, {
+        providerConfigs: {
+          openai: { baseUrl: "http://localhost:8880/v1" },
+        },
+      });
      const openaiOverrides = result.overrides.providerOverrides?.openai as
        | { voice?: string; model?: string }
        | undefined;
@ -398,9 +340,11 @@ describe("tts", () => {
    it("rejects unknown voices and models when openaiBaseUrl is the default OpenAI endpoint", () => {
      const policy = resolveModelOverridePolicy({ enabled: true });
      const input = "Hello [[tts:voice=kokoro-chinese model=kokoro-v1]] world";
-      const defaultBaseUrl = "https://api.openai.com/v1";
-
-      const result = parseTtsDirectives(input, policy, defaultBaseUrl);
+      const result = parseTtsDirectives(input, policy, {
+        providerConfigs: {
+          openai: { baseUrl: "https://api.openai.com/v1" },
+        },
+      });
      const openaiOverrides = result.overrides.providerOverrides?.openai as
        | { voice?: string }
        | undefined;
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@ -48,22 +48,6 @@ const DEFAULT_TTS_MAX_LENGTH = 1500;
 const DEFAULT_TTS_SUMMARIZE = true;
 const DEFAULT_MAX_TEXT_LENGTH = 4096;

-const OPUS_OUTPUT = {
-  openai: "opus" as const,
-  // ElevenLabs output formats use codec_sample_rate_bitrate naming.
-  // Opus @ 48kHz/64kbps is a good voice message tradeoff.
-  elevenlabs: "opus_48000_64",
-  extension: ".opus",
-  voiceCompatible: true,
-};
-
-const DEFAULT_OUTPUT = {
-  openai: "mp3" as const,
-  elevenlabs: "mp3_44100_128",
-  extension: ".mp3",
-  voiceCompatible: false,
-};
-
 export type ResolvedTtsConfig = {
  auto: TtsAutoMode;
  mode: TtsMode;
@ -418,16 +402,9 @@ export function setLastTtsAttempt(entry: TtsStatusEntry | undefined): void {
  lastTtsAttempt = entry;
 }

-/** Channels that require opus audio */
+/** Channels that require voice-note-compatible audio */
 const OPUS_CHANNELS = new Set(["telegram", "feishu", "whatsapp", "matrix"]);

-function resolveOutputFormat(channelId?: string | null) {
-  if (channelId && OPUS_CHANNELS.has(channelId)) {
-    return OPUS_OUTPUT;
-  }
-  return DEFAULT_OUTPUT;
-}
-
 function resolveChannelId(channel: string | undefined): ChannelId | null {
  return channel ? normalizeChannelId(channel) : null;
 }
@ -876,6 +853,5 @@ export const _test = {
  parseTtsDirectives,
  resolveModelOverridePolicy,
  summarizeText,
-  resolveOutputFormat,
  getResolvedSpeechProviderConfig,
 };