mirror of https://github.com/openclaw/openclaw.git
refactor(voice-call): use config for realtime tuning
This commit is contained in:
parent
e636ba6ab0
commit
ed0cbcba2f
|
|
@ -1,13 +1,7 @@
|
|||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { buildOpenAIRealtimeTranscriptionProvider } from "./realtime-transcription-provider.js";
|
||||
|
||||
describe("buildOpenAIRealtimeTranscriptionProvider", () => {
|
||||
const originalEnv = { ...process.env };
|
||||
|
||||
afterEach(() => {
|
||||
process.env = { ...originalEnv };
|
||||
});
|
||||
|
||||
it("normalizes OpenAI config defaults", () => {
|
||||
const provider = buildOpenAIRealtimeTranscriptionProvider();
|
||||
const resolved = provider.resolveConfig?.({
|
||||
|
|
@ -26,15 +20,19 @@ describe("buildOpenAIRealtimeTranscriptionProvider", () => {
|
|||
});
|
||||
});
|
||||
|
||||
it("reads provider-owned env fallbacks", () => {
|
||||
process.env.REALTIME_TRANSCRIPTION_MODEL = "gpt-4o-transcribe";
|
||||
process.env.SILENCE_DURATION_MS = "900";
|
||||
process.env.VAD_THRESHOLD = "0.45";
|
||||
|
||||
it("keeps provider-owned transcription settings configurable via raw provider config", () => {
|
||||
const provider = buildOpenAIRealtimeTranscriptionProvider();
|
||||
const resolved = provider.resolveConfig?.({
|
||||
cfg: {} as never,
|
||||
rawConfig: {},
|
||||
rawConfig: {
|
||||
providers: {
|
||||
openai: {
|
||||
model: "gpt-4o-transcribe",
|
||||
silenceDurationMs: 900,
|
||||
vadThreshold: 0.45,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(resolved).toEqual({
|
||||
|
|
|
|||
|
|
@ -57,21 +57,9 @@ function normalizeProviderConfig(
|
|||
value: raw?.openaiApiKey,
|
||||
path: "plugins.entries.voice-call.config.streaming.openaiApiKey",
|
||||
}),
|
||||
model:
|
||||
trimToUndefined(raw?.model) ??
|
||||
trimToUndefined(raw?.sttModel) ??
|
||||
trimToUndefined(process.env.REALTIME_TRANSCRIPTION_MODEL) ??
|
||||
trimToUndefined(process.env.STREAMING_STT_MODEL),
|
||||
silenceDurationMs:
|
||||
asNumber(raw?.silenceDurationMs) ??
|
||||
(typeof process.env.SILENCE_DURATION_MS === "string"
|
||||
? Number.parseInt(process.env.SILENCE_DURATION_MS, 10)
|
||||
: undefined),
|
||||
vadThreshold:
|
||||
asNumber(raw?.vadThreshold) ??
|
||||
(typeof process.env.VAD_THRESHOLD === "string"
|
||||
? Number.parseFloat(process.env.VAD_THRESHOLD)
|
||||
: undefined),
|
||||
model: trimToUndefined(raw?.model) ?? trimToUndefined(raw?.sttModel),
|
||||
silenceDurationMs: asNumber(raw?.silenceDurationMs),
|
||||
vadThreshold: asNumber(raw?.vadThreshold),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,24 +1,22 @@
|
|||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { buildOpenAIRealtimeVoiceProvider } from "./realtime-voice-provider.js";
|
||||
|
||||
describe("buildOpenAIRealtimeVoiceProvider", () => {
|
||||
const originalEnv = { ...process.env };
|
||||
|
||||
afterEach(() => {
|
||||
process.env = { ...originalEnv };
|
||||
});
|
||||
|
||||
it("normalizes provider-owned env fallbacks", () => {
|
||||
process.env.REALTIME_VOICE_MODEL = "gpt-realtime";
|
||||
process.env.REALTIME_VOICE_VOICE = "verse";
|
||||
process.env.REALTIME_VOICE_TEMPERATURE = "0.6";
|
||||
process.env.SILENCE_DURATION_MS = "850";
|
||||
process.env.VAD_THRESHOLD = "0.35";
|
||||
|
||||
it("normalizes provider-owned voice settings from raw provider config", () => {
|
||||
const provider = buildOpenAIRealtimeVoiceProvider();
|
||||
const resolved = provider.resolveConfig?.({
|
||||
cfg: {} as never,
|
||||
rawConfig: {},
|
||||
rawConfig: {
|
||||
providers: {
|
||||
openai: {
|
||||
model: "gpt-realtime",
|
||||
voice: "verse",
|
||||
temperature: 0.6,
|
||||
silenceDurationMs: 850,
|
||||
vadThreshold: 0.35,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(resolved).toEqual({
|
||||
|
|
|
|||
|
|
@ -103,25 +103,11 @@ function normalizeProviderConfig(
|
|||
value: raw?.apiKey,
|
||||
path: "plugins.entries.voice-call.config.realtime.providers.openai.apiKey",
|
||||
}),
|
||||
model: trimToUndefined(raw?.model) ?? trimToUndefined(process.env.REALTIME_VOICE_MODEL),
|
||||
voice: (trimToUndefined(raw?.voice) ?? trimToUndefined(process.env.REALTIME_VOICE_VOICE)) as
|
||||
| OpenAIRealtimeVoice
|
||||
| undefined,
|
||||
temperature:
|
||||
asNumber(raw?.temperature) ??
|
||||
(typeof process.env.REALTIME_VOICE_TEMPERATURE === "string"
|
||||
? Number.parseFloat(process.env.REALTIME_VOICE_TEMPERATURE)
|
||||
: undefined),
|
||||
vadThreshold:
|
||||
asNumber(raw?.vadThreshold) ??
|
||||
(typeof process.env.VAD_THRESHOLD === "string"
|
||||
? Number.parseFloat(process.env.VAD_THRESHOLD)
|
||||
: undefined),
|
||||
silenceDurationMs:
|
||||
asNumber(raw?.silenceDurationMs) ??
|
||||
(typeof process.env.SILENCE_DURATION_MS === "string"
|
||||
? Number.parseInt(process.env.SILENCE_DURATION_MS, 10)
|
||||
: undefined),
|
||||
model: trimToUndefined(raw?.model),
|
||||
voice: trimToUndefined(raw?.voice) as OpenAIRealtimeVoice | undefined,
|
||||
temperature: asNumber(raw?.temperature),
|
||||
vadThreshold: asNumber(raw?.vadThreshold),
|
||||
silenceDurationMs: asNumber(raw?.silenceDurationMs),
|
||||
prefixPaddingMs: asNumber(raw?.prefixPaddingMs),
|
||||
azureEndpoint: trimToUndefined(raw?.azureEndpoint),
|
||||
azureDeployment: trimToUndefined(raw?.azureDeployment),
|
||||
|
|
|
|||
|
|
@ -273,12 +273,6 @@ describe("normalizeVoiceCallConfig", () => {
|
|||
});
|
||||
|
||||
describe("resolveVoiceCallConfig", () => {
|
||||
const originalEnv = { ...process.env };
|
||||
|
||||
afterEach(() => {
|
||||
process.env = { ...originalEnv };
|
||||
});
|
||||
|
||||
it("keeps legacy streaming OpenAI fields inside providers.openai without forcing provider selection", () => {
|
||||
const resolved = resolveVoiceCallConfig({
|
||||
enabled: true,
|
||||
|
|
@ -301,14 +295,13 @@ describe("resolveVoiceCallConfig", () => {
|
|||
});
|
||||
});
|
||||
|
||||
it("maps realtime instructions from the legacy env hook without altering provider selection", () => {
|
||||
process.env.REALTIME_VOICE_INSTRUCTIONS = "Stay concise.";
|
||||
|
||||
it("preserves configured realtime instructions without env indirection", () => {
|
||||
const resolved = resolveVoiceCallConfig({
|
||||
enabled: true,
|
||||
provider: "twilio",
|
||||
realtime: {
|
||||
enabled: true,
|
||||
instructions: "Stay concise.",
|
||||
},
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -605,12 +605,6 @@ export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallC
|
|||
resolved.streaming = mergeLegacyStreamingOpenAICompat(resolved.streaming);
|
||||
|
||||
resolved.realtime = mergeLegacyRealtimeOpenAICompat(resolved.realtime);
|
||||
if (
|
||||
typeof resolved.realtime.instructions !== "string" &&
|
||||
typeof process.env.REALTIME_VOICE_INSTRUCTIONS === "string"
|
||||
) {
|
||||
resolved.realtime.instructions = process.env.REALTIME_VOICE_INSTRUCTIONS;
|
||||
}
|
||||
|
||||
return normalizeVoiceCallConfig(resolved);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue