refactor: centralize talk silence timeout defaults

This commit is contained in:
Peter Steinberger 2026-03-08 14:52:12 +00:00
parent 4e2290b841
commit b4c8950417
14 changed files with 89 additions and 24 deletions

View File

@ -0,0 +1,5 @@
package ai.openclaw.app.voice
internal object TalkDefaults {
const val defaultSilenceTimeoutMs = 700L
}

View File

@ -60,7 +60,6 @@ class TalkModeManager(
private const val defaultModelIdFallback = "eleven_v3" private const val defaultModelIdFallback = "eleven_v3"
private const val defaultOutputFormatFallback = "pcm_24000" private const val defaultOutputFormatFallback = "pcm_24000"
private const val defaultTalkProvider = "elevenlabs" private const val defaultTalkProvider = "elevenlabs"
private const val defaultSilenceTimeoutMs = 700L
private const val listenWatchdogMs = 12_000L private const val listenWatchdogMs = 12_000L
private const val chatFinalWaitWithSubscribeMs = 45_000L private const val chatFinalWaitWithSubscribeMs = 45_000L
private const val chatFinalWaitWithoutSubscribeMs = 6_000L private const val chatFinalWaitWithoutSubscribeMs = 6_000L
@ -118,11 +117,12 @@ class TalkModeManager(
} }
internal fun resolvedSilenceTimeoutMs(talk: JsonObject?): Long { internal fun resolvedSilenceTimeoutMs(talk: JsonObject?): Long {
val primitive = talk?.get("silenceTimeoutMs") as? JsonPrimitive ?: return defaultSilenceTimeoutMs val fallback = TalkDefaults.defaultSilenceTimeoutMs
if (primitive.isString) return defaultSilenceTimeoutMs val primitive = talk?.get("silenceTimeoutMs") as? JsonPrimitive ?: return fallback
val timeout = primitive.content.toDoubleOrNull() ?: return defaultSilenceTimeoutMs if (primitive.isString) return fallback
val timeout = primitive.content.toDoubleOrNull() ?: return fallback
if (timeout <= 0 || timeout % 1.0 != 0.0 || timeout > Long.MAX_VALUE.toDouble()) { if (timeout <= 0 || timeout % 1.0 != 0.0 || timeout > Long.MAX_VALUE.toDouble()) {
return defaultSilenceTimeoutMs return fallback
} }
return timeout.toLong() return timeout.toLong()
} }
@ -155,7 +155,7 @@ class TalkModeManager(
private var listeningMode = false private var listeningMode = false
private var silenceJob: Job? = null private var silenceJob: Job? = null
private var silenceWindowMs = defaultSilenceTimeoutMs private var silenceWindowMs = TalkDefaults.defaultSilenceTimeoutMs
private var lastTranscript: String = "" private var lastTranscript: String = ""
private var lastHeardAtMs: Long? = null private var lastHeardAtMs: Long? = null
private var lastSpokenText: String? = null private var lastSpokenText: String? = null
@ -1467,7 +1467,7 @@ class TalkModeManager(
} }
configLoaded = true configLoaded = true
} catch (_: Throwable) { } catch (_: Throwable) {
silenceWindowMs = defaultSilenceTimeoutMs silenceWindowMs = TalkDefaults.defaultSilenceTimeoutMs
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() } defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
defaultModelId = defaultModelIdFallback defaultModelId = defaultModelIdFallback
if (!modelOverrideActive) currentModelId = defaultModelId if (!modelOverrideActive) currentModelId = defaultModelId

View File

@ -94,20 +94,20 @@ class TalkModeConfigParsingTest {
@Test @Test
fun defaultsSilenceTimeoutMsWhenMissing() { fun defaultsSilenceTimeoutMsWhenMissing() {
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(null)) assertEquals(TalkDefaults.defaultSilenceTimeoutMs, TalkModeManager.resolvedSilenceTimeoutMs(null))
} }
@Test @Test
fun defaultsSilenceTimeoutMsWhenInvalid() { fun defaultsSilenceTimeoutMsWhenInvalid() {
val talk = buildJsonObject { put("silenceTimeoutMs", 0) } val talk = buildJsonObject { put("silenceTimeoutMs", 0) }
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(talk)) assertEquals(TalkDefaults.defaultSilenceTimeoutMs, TalkModeManager.resolvedSilenceTimeoutMs(talk))
} }
@Test @Test
fun defaultsSilenceTimeoutMsWhenString() { fun defaultsSilenceTimeoutMsWhenString() {
val talk = buildJsonObject { put("silenceTimeoutMs", "1500") } val talk = buildJsonObject { put("silenceTimeoutMs", "1500") }
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(talk)) assertEquals(TalkDefaults.defaultSilenceTimeoutMs, TalkModeManager.resolvedSilenceTimeoutMs(talk))
} }
} }

View File

@ -0,0 +1,3 @@
enum TalkDefaults {
static let silenceTimeoutMs = 900
}

View File

@ -34,7 +34,7 @@ final class TalkModeManager: NSObject {
private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest
private static let defaultModelIdFallback = "eleven_v3" private static let defaultModelIdFallback = "eleven_v3"
private static let defaultTalkProvider = "elevenlabs" private static let defaultTalkProvider = "elevenlabs"
private static let defaultSilenceTimeoutMs = 900 private static let defaultSilenceTimeoutMs = TalkDefaults.silenceTimeoutMs
private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__" private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__"
var isEnabled: Bool = false var isEnabled: Bool = false
var isListening: Bool = false var isListening: Bool = false

View File

@ -60,7 +60,7 @@ import Testing
} }
@Test func defaultsSilenceTimeoutMsWhenMissing() { @Test func defaultsSilenceTimeoutMsWhenMissing() {
#expect(TalkModeManager.resolvedSilenceTimeoutMs(nil) == 900) #expect(TalkModeManager.resolvedSilenceTimeoutMs(nil) == TalkDefaults.silenceTimeoutMs)
} }
@Test func defaultsSilenceTimeoutMsWhenInvalid() { @Test func defaultsSilenceTimeoutMsWhenInvalid() {
@ -68,7 +68,7 @@ import Testing
"silenceTimeoutMs": 0, "silenceTimeoutMs": 0,
] ]
#expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == 900) #expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == TalkDefaults.silenceTimeoutMs)
} }
@Test func defaultsSilenceTimeoutMsWhenBool() { @Test func defaultsSilenceTimeoutMsWhenBool() {
@ -76,6 +76,6 @@ import Testing
"silenceTimeoutMs": true, "silenceTimeoutMs": true,
] ]
#expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == 900) #expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == TalkDefaults.silenceTimeoutMs)
} }
} }

View File

@ -0,0 +1,3 @@
enum TalkDefaults {
static let silenceTimeoutMs = 700
}

View File

@ -12,7 +12,7 @@ actor TalkModeRuntime {
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts") private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
private static let defaultModelIdFallback = "eleven_v3" private static let defaultModelIdFallback = "eleven_v3"
private static let defaultTalkProvider = "elevenlabs" private static let defaultTalkProvider = "elevenlabs"
private static let defaultSilenceTimeoutMs = 700 private static let defaultSilenceTimeoutMs = TalkDefaults.silenceTimeoutMs
private final class RMSMeter: @unchecked Sendable { private final class RMSMeter: @unchecked Sendable {
private let lock = NSLock() private let lock = NSLock()

View File

@ -33,7 +33,7 @@ struct TalkModeConfigParsingTests {
#expect(selection?.config["apiKey"]?.stringValue == "legacy-key") #expect(selection?.config["apiKey"]?.stringValue == "legacy-key")
} }
@Test func readsConfiguredSilenceTimeoutMs() { @Test func `reads configured silence timeout ms`() {
let talk: [String: AnyCodable] = [ let talk: [String: AnyCodable] = [
"silenceTimeoutMs": AnyCodable(1500), "silenceTimeoutMs": AnyCodable(1500),
] ]
@ -41,15 +41,15 @@ struct TalkModeConfigParsingTests {
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 1500) #expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 1500)
} }
@Test func defaultsSilenceTimeoutMsWhenMissing() { @Test func `defaults silence timeout ms when missing`() {
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(nil) == 700) #expect(TalkModeRuntime.resolvedSilenceTimeoutMs(nil) == TalkDefaults.silenceTimeoutMs)
} }
@Test func defaultsSilenceTimeoutMsWhenInvalid() { @Test func `defaults silence timeout ms when invalid`() {
let talk: [String: AnyCodable] = [ let talk: [String: AnyCodable] = [
"silenceTimeoutMs": AnyCodable(0), "silenceTimeoutMs": AnyCodable(0),
] ]
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 700) #expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == TalkDefaults.silenceTimeoutMs)
} }
} }

View File

@ -1669,7 +1669,7 @@ Defaults for Talk mode (macOS/iOS/Android).
- `apiKey` and `providers.*.apiKey` accept plaintext strings or SecretRef objects. - `apiKey` and `providers.*.apiKey` accept plaintext strings or SecretRef objects.
- `ELEVENLABS_API_KEY` fallback applies only when no Talk API key is configured. - `ELEVENLABS_API_KEY` fallback applies only when no Talk API key is configured.
- `voiceAliases` lets Talk directives use friendly names. - `voiceAliases` lets Talk directives use friendly names.
- `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700` ms on macOS and Android, `900` ms on iOS). - `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700 ms on macOS and Android, 900 ms on iOS`).
--- ---

View File

@ -65,7 +65,7 @@ Supported keys:
Defaults: Defaults:
- `interruptOnSpeech`: true - `interruptOnSpeech`: true
- `silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700` ms on macOS and Android, `900` ms on iOS) - `silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700 ms on macOS and Android, 900 ms on iOS`)
- `voiceId`: falls back to `ELEVENLABS_VOICE_ID` / `SAG_VOICE_ID` (or first ElevenLabs voice when API key is available) - `voiceId`: falls back to `ELEVENLABS_VOICE_ID` / `SAG_VOICE_ID` (or first ElevenLabs voice when API key is available)
- `modelId`: defaults to `eleven_v3` when unset - `modelId`: defaults to `eleven_v3` when unset
- `apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available) - `apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available)

View File

@ -4,6 +4,7 @@ import {
} from "../discord/monitor/timeouts.js"; } from "../discord/monitor/timeouts.js";
import { MEDIA_AUDIO_FIELD_HELP } from "./media-audio-field-metadata.js"; import { MEDIA_AUDIO_FIELD_HELP } from "./media-audio-field-metadata.js";
import { IRC_FIELD_HELP } from "./schema.irc.js"; import { IRC_FIELD_HELP } from "./schema.irc.js";
import { describeTalkSilenceTimeoutDefaults } from "./talk-defaults.js";
export const FIELD_HELP: Record<string, string> = { export const FIELD_HELP: Record<string, string> = {
meta: "Metadata fields automatically maintained by OpenClaw to record write/version history for this config file. Keep these values system-managed and avoid manual edits unless debugging migration history.", meta: "Metadata fields automatically maintained by OpenClaw to record write/version history for this config file. Keep these values system-managed and avoid manual edits unless debugging migration history.",
@ -163,8 +164,7 @@ export const FIELD_HELP: Record<string, string> = {
"Use this legacy ElevenLabs API key for Talk mode only during migration, and keep secrets in env-backed storage. Prefer talk.providers.elevenlabs.apiKey (fallback: ELEVENLABS_API_KEY).", "Use this legacy ElevenLabs API key for Talk mode only during migration, and keep secrets in env-backed storage. Prefer talk.providers.elevenlabs.apiKey (fallback: ELEVENLABS_API_KEY).",
"talk.interruptOnSpeech": "talk.interruptOnSpeech":
"If true (default), stop assistant speech when the user starts speaking in Talk mode. Keep enabled for conversational turn-taking.", "If true (default), stop assistant speech when the user starts speaking in Talk mode. Keep enabled for conversational turn-taking.",
"talk.silenceTimeoutMs": "talk.silenceTimeoutMs": `Milliseconds of user silence before Talk mode finalizes and sends the current transcript. Leave unset to keep the platform default pause window (${describeTalkSilenceTimeoutDefaults()}).`,
"Milliseconds of user silence before Talk mode finalizes and sends the current transcript. Leave unset to keep the platform default pause window (700 ms on macOS and Android, 900 ms on iOS).",
acp: "ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.", acp: "ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.",
"acp.enabled": "acp.enabled":
"Global ACP feature gate. Keep disabled unless ACP runtime + policy are configured.", "Global ACP feature gate. Keep disabled unless ACP runtime + policy are configured.",

View File

@ -0,0 +1,43 @@
import fs from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { describe, expect, it } from "vitest";
import { FIELD_HELP } from "./schema.help.js";
import {
describeTalkSilenceTimeoutDefaults,
TALK_SILENCE_TIMEOUT_MS_BY_PLATFORM,
} from "./talk-defaults.js";
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../..");
function readRepoFile(relativePath: string): string {
return fs.readFileSync(path.join(repoRoot, relativePath), "utf8");
}
describe("talk silence timeout defaults", () => {
it("keeps help text and docs aligned with the policy", () => {
const defaultsDescription = describeTalkSilenceTimeoutDefaults();
expect(FIELD_HELP["talk.silenceTimeoutMs"]).toContain(defaultsDescription);
expect(readRepoFile("docs/gateway/configuration-reference.md")).toContain(defaultsDescription);
expect(readRepoFile("docs/nodes/talk.md")).toContain(defaultsDescription);
});
it("matches the Apple and Android runtime constants", () => {
const macDefaults = readRepoFile("apps/macos/Sources/OpenClaw/TalkDefaults.swift");
const iosDefaults = readRepoFile("apps/ios/Sources/Voice/TalkDefaults.swift");
const androidDefaults = readRepoFile(
"apps/android/app/src/main/java/ai/openclaw/app/voice/TalkDefaults.kt",
);
expect(macDefaults).toContain(
`static let silenceTimeoutMs = ${TALK_SILENCE_TIMEOUT_MS_BY_PLATFORM.macos}`,
);
expect(iosDefaults).toContain(
`static let silenceTimeoutMs = ${TALK_SILENCE_TIMEOUT_MS_BY_PLATFORM.ios}`,
);
expect(androidDefaults).toContain(
`const val defaultSilenceTimeoutMs = ${TALK_SILENCE_TIMEOUT_MS_BY_PLATFORM.android}L`,
);
});
});

View File

@ -0,0 +1,11 @@
export const TALK_SILENCE_TIMEOUT_MS_BY_PLATFORM = {
macos: 700,
android: 700,
ios: 900,
} as const;
export function describeTalkSilenceTimeoutDefaults(): string {
const macos = TALK_SILENCE_TIMEOUT_MS_BY_PLATFORM.macos;
const ios = TALK_SILENCE_TIMEOUT_MS_BY_PLATFORM.ios;
return `${macos} ms on macOS and Android, ${ios} ms on iOS`;
}