From f28f0f29ba90d4637d2c3d38cf239aaa9d37485f Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 2 Apr 2026 15:28:57 +0900 Subject: [PATCH] fix(providers): centralize media request shaping (#59469) * fix(providers): centralize media request shaping * style(providers): normalize shared request imports * fix(changelog): add media request shaping entry * fix(google): preserve private network guard --- CHANGELOG.md | 1 + extensions/deepgram/audio.ts | 26 +++---- .../google/image-generation-provider.ts | 18 +++-- .../google/media-understanding-provider.ts | 28 ++++---- .../moonshot/media-understanding-provider.ts | 25 ++++--- .../openai-compatible-audio.ts | 31 ++++----- src/media-understanding/shared.test.ts | 66 ++++++++++++++++++ src/media-understanding/shared.ts | 69 ++++++++++++++++++- src/plugin-sdk/provider-http.ts | 1 + 9 files changed, 202 insertions(+), 63 deletions(-) create mode 100644 src/media-understanding/shared.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 2abbd3a13c3..2542c84d151 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai - Plugins/runtime: keep LINE reply directives and browser-backed cleanup/reset flows working even when those plugins are disabled while tightening bundled plugin activation guards. (#59412) Thanks @vincentkoc. - WhatsApp/presence: send `unavailable` presence on connect in self-chat mode so personal-phone users stop losing all push notifications while the gateway is running. (#59410) Thanks @mcaxtr. - Providers/OpenAI-compatible routing: centralize native-vs-proxy request policy so hidden attribution and related OpenAI-family defaults only apply on verified native endpoints across stream, websocket, and shared audio HTTP paths. (#59433) Thanks @vincentkoc. +- Providers/media HTTP: centralize base URL normalization, default auth/header injection, and explicit header override handling across shared OpenAI-compatible audio, Deepgram audio, Gemini media/image, and Moonshot video request paths. (#59469) Thanks @vincentkoc. - Exec approvals/doctor: report host policy sources from the real approvals file path and ignore malformed host override values when attributing effective policy conflicts. (#59367) Thanks @gumadeiras. - Matrix/onboarding: restore guided setup in `openclaw channels add` and `openclaw configure --section channels`, while keeping custom plugin wizards on the shared `setupWizard` seam. (#59462) Thanks @gumadeiras. diff --git a/extensions/deepgram/audio.ts b/extensions/deepgram/audio.ts index 77146ecfa90..dc5b83a482d 100644 --- a/extensions/deepgram/audio.ts +++ b/extensions/deepgram/audio.ts @@ -4,8 +4,8 @@ import type { } from "openclaw/plugin-sdk/media-understanding"; import { assertOkOrThrowHttpError, - normalizeBaseUrl, postTranscriptionRequest, + resolveProviderHttpRequestConfig, requireTranscriptionText, } from "openclaw/plugin-sdk/provider-http"; @@ -31,9 +31,19 @@ export async function transcribeDeepgramAudio( params: AudioTranscriptionRequest, ): Promise { const fetchFn = params.fetchFn ?? fetch; - const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_DEEPGRAM_AUDIO_BASE_URL); - const allowPrivate = Boolean(params.baseUrl?.trim()); const model = resolveModel(params.model); + const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({ + baseUrl: params.baseUrl, + defaultBaseUrl: DEFAULT_DEEPGRAM_AUDIO_BASE_URL, + headers: params.headers, + defaultHeaders: { + authorization: `Token ${params.apiKey}`, + "content-type": params.mime ?? "application/octet-stream", + }, + provider: "deepgram", + capability: "audio", + transport: "media-understanding", + }); const url = new URL(`${baseUrl}/listen`); url.searchParams.set("model", model); @@ -49,14 +59,6 @@ export async function transcribeDeepgramAudio( } } - const headers = new Headers(params.headers); - if (!headers.has("authorization")) { - headers.set("authorization", `Token ${params.apiKey}`); - } - if (!headers.has("content-type")) { - headers.set("content-type", params.mime ?? "application/octet-stream"); - } - const body = new Uint8Array(params.buffer); const { response: res, release } = await postTranscriptionRequest({ url: url.toString(), @@ -64,7 +66,7 @@ export async function transcribeDeepgramAudio( body, timeoutMs: params.timeoutMs, fetchFn, - allowPrivateNetwork: allowPrivate, + allowPrivateNetwork, }); try { diff --git a/extensions/google/image-generation-provider.ts b/extensions/google/image-generation-provider.ts index ae3898d3b32..4637bd853d0 100644 --- a/extensions/google/image-generation-provider.ts +++ b/extensions/google/image-generation-provider.ts @@ -2,8 +2,8 @@ import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generati import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; import { assertOkOrThrowHttpError, - normalizeBaseUrl, postJsonRequest, + resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; import { DEFAULT_GOOGLE_API_BASE_URL, @@ -134,10 +134,16 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProvider { } const model = normalizeGoogleImageModel(req.model); - const baseUrl = normalizeBaseUrl(resolveGoogleBaseUrl(req.cfg), DEFAULT_GOOGLE_API_BASE_URL); - const allowPrivate = Boolean(req.cfg?.models?.providers?.google?.baseUrl?.trim()); - const authHeaders = parseGeminiAuth(auth.apiKey); - const headers = new Headers(authHeaders.headers); + const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({ + baseUrl: resolveGoogleBaseUrl(req.cfg), + defaultBaseUrl: DEFAULT_GOOGLE_API_BASE_URL, + allowPrivateNetwork: Boolean(req.cfg?.models?.providers?.google?.baseUrl?.trim()), + defaultHeaders: parseGeminiAuth(auth.apiKey).headers, + provider: "google", + api: "google-generative-ai", + capability: "image", + transport: "http", + }); const imageConfig = mapSizeToImageConfig(req.size); const inputParts = (req.inputImages ?? []).map((image) => ({ inlineData: { @@ -170,7 +176,7 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProvider { }, timeoutMs: 60_000, fetchFn: fetch, - allowPrivateNetwork: allowPrivate, + allowPrivateNetwork, }); try { diff --git a/extensions/google/media-understanding-provider.ts b/extensions/google/media-understanding-provider.ts index 2766dab3207..b705b95d1af 100644 --- a/extensions/google/media-understanding-provider.ts +++ b/extensions/google/media-understanding-provider.ts @@ -9,8 +9,8 @@ import { } from "openclaw/plugin-sdk/media-understanding"; import { assertOkOrThrowHttpError, - normalizeBaseUrl, postJsonRequest, + resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; import { DEFAULT_GOOGLE_API_BASE_URL, @@ -44,11 +44,6 @@ async function generateGeminiInlineDataText(params: { missingTextError: string; }): Promise<{ text: string; model: string }> { const fetchFn = params.fetchFn ?? fetch; - const baseUrl = normalizeBaseUrl( - normalizeGoogleApiBaseUrl(params.baseUrl ?? params.defaultBaseUrl), - DEFAULT_GOOGLE_API_BASE_URL, - ); - const allowPrivate = Boolean(params.baseUrl?.trim()); const model = (() => { const trimmed = params.model?.trim(); if (!trimmed) { @@ -56,16 +51,19 @@ async function generateGeminiInlineDataText(params: { } return normalizeGoogleModelId(trimmed); })(); + const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({ + baseUrl: normalizeGoogleApiBaseUrl(params.baseUrl ?? params.defaultBaseUrl), + defaultBaseUrl: DEFAULT_GOOGLE_API_BASE_URL, + allowPrivateNetwork: Boolean(params.baseUrl?.trim()), + headers: params.headers, + defaultHeaders: parseGeminiAuth(params.apiKey).headers, + provider: "google", + api: "google-generative-ai", + capability: params.defaultMime.startsWith("audio/") ? "audio" : "video", + transport: "media-understanding", + }); const url = `${baseUrl}/models/${model}:generateContent`; - const authHeaders = parseGeminiAuth(params.apiKey); - const headers = new Headers(params.headers); - for (const [key, value] of Object.entries(authHeaders.headers)) { - if (!headers.has(key)) { - headers.set(key, value); - } - } - const prompt = (() => { const trimmed = params.prompt?.trim(); return trimmed || params.defaultPrompt; @@ -94,7 +92,7 @@ async function generateGeminiInlineDataText(params: { body, timeoutMs: params.timeoutMs, fetchFn, - allowPrivateNetwork: allowPrivate, + allowPrivateNetwork, }); try { diff --git a/extensions/moonshot/media-understanding-provider.ts b/extensions/moonshot/media-understanding-provider.ts index 7d7ace86ea0..022b80dbb35 100644 --- a/extensions/moonshot/media-understanding-provider.ts +++ b/extensions/moonshot/media-understanding-provider.ts @@ -7,8 +7,8 @@ import { } from "openclaw/plugin-sdk/media-understanding"; import { assertOkOrThrowHttpError, - normalizeBaseUrl, postJsonRequest, + resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; export const DEFAULT_MOONSHOT_VIDEO_BASE_URL = "https://api.moonshot.ai/v1"; @@ -62,20 +62,24 @@ export async function describeMoonshotVideo( params: VideoDescriptionRequest, ): Promise { const fetchFn = params.fetchFn ?? fetch; - const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_MOONSHOT_VIDEO_BASE_URL); const model = resolveModel(params.model); const mime = params.mime ?? "video/mp4"; const prompt = resolvePrompt(params.prompt); + const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({ + baseUrl: params.baseUrl, + defaultBaseUrl: DEFAULT_MOONSHOT_VIDEO_BASE_URL, + headers: params.headers, + defaultHeaders: { + "content-type": "application/json", + authorization: `Bearer ${params.apiKey}`, + }, + provider: "moonshot", + api: "openai-completions", + capability: "video", + transport: "media-understanding", + }); const url = `${baseUrl}/chat/completions`; - const headers = new Headers(params.headers); - if (!headers.has("content-type")) { - headers.set("content-type", "application/json"); - } - if (!headers.has("authorization")) { - headers.set("authorization", `Bearer ${params.apiKey}`); - } - const body = { model, messages: [ @@ -100,6 +104,7 @@ export async function describeMoonshotVideo( body, timeoutMs: params.timeoutMs, fetchFn, + allowPrivateNetwork, }); try { diff --git a/src/media-understanding/openai-compatible-audio.ts b/src/media-understanding/openai-compatible-audio.ts index 3e4bd79b28e..1e1a338258b 100644 --- a/src/media-understanding/openai-compatible-audio.ts +++ b/src/media-understanding/openai-compatible-audio.ts @@ -1,9 +1,8 @@ import path from "node:path"; import { - applyProviderRequestHeaders, assertOkOrThrowHttpError, - normalizeBaseUrl, postTranscriptionRequest, + resolveProviderHttpRequestConfig, requireTranscriptionText, } from "./shared.js"; import type { AudioTranscriptionRequest, AudioTranscriptionResult } from "./types.js"; @@ -23,8 +22,18 @@ export async function transcribeOpenAiCompatibleAudio( params: OpenAiCompatibleAudioParams, ): Promise { const fetchFn = params.fetchFn ?? fetch; - const baseUrl = normalizeBaseUrl(params.baseUrl, params.defaultBaseUrl); - const allowPrivate = Boolean(params.baseUrl?.trim()); + const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({ + baseUrl: params.baseUrl, + defaultBaseUrl: params.defaultBaseUrl, + headers: params.headers, + defaultHeaders: { + authorization: `Bearer ${params.apiKey}`, + }, + provider: params.provider, + api: "openai-audio-transcriptions", + capability: "audio", + transport: "media-understanding", + }); const url = `${baseUrl}/audio/transcriptions`; const model = resolveModel(params.model, params.defaultModel); @@ -43,25 +52,13 @@ export async function transcribeOpenAiCompatibleAudio( form.append("prompt", params.prompt.trim()); } - const headers = applyProviderRequestHeaders({ - headers: params.headers, - provider: params.provider, - api: "openai-audio-transcriptions", - baseUrl, - capability: "audio", - transport: "media-understanding", - }); - if (!headers.has("authorization")) { - headers.set("authorization", `Bearer ${params.apiKey}`); - } - const { response: res, release } = await postTranscriptionRequest({ url, headers, body: form, timeoutMs: params.timeoutMs, fetchFn, - allowPrivateNetwork: allowPrivate, + allowPrivateNetwork, }); try { diff --git a/src/media-understanding/shared.test.ts b/src/media-understanding/shared.test.ts new file mode 100644 index 00000000000..b4242ee2c0c --- /dev/null +++ b/src/media-understanding/shared.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it } from "vitest"; +import { resolveProviderHttpRequestConfig } from "./shared.js"; + +describe("resolveProviderHttpRequestConfig", () => { + it("preserves explicit caller headers over default and attribution headers", () => { + const resolved = resolveProviderHttpRequestConfig({ + baseUrl: "https://api.openai.com/v1/", + defaultBaseUrl: "https://api.openai.com/v1", + headers: { + authorization: "Bearer override", + "User-Agent": "custom-agent/1.0", + }, + defaultHeaders: { + authorization: "Bearer default-token", + "X-Default": "1", + }, + provider: "openai", + api: "openai-audio-transcriptions", + capability: "audio", + transport: "media-understanding", + }); + + expect(resolved.baseUrl).toBe("https://api.openai.com/v1"); + expect(resolved.allowPrivateNetwork).toBe(true); + expect(resolved.headers.get("authorization")).toBe("Bearer override"); + expect(resolved.headers.get("x-default")).toBe("1"); + expect(resolved.headers.get("user-agent")).toBe("custom-agent/1.0"); + expect(resolved.headers.get("originator")).toBe("openclaw"); + expect(resolved.headers.get("version")).toBeTruthy(); + }); + + it("uses the fallback base URL without enabling private-network access", () => { + const resolved = resolveProviderHttpRequestConfig({ + defaultBaseUrl: "https://api.deepgram.com/v1/", + defaultHeaders: { + authorization: "Token test-key", + }, + provider: "deepgram", + capability: "audio", + transport: "media-understanding", + }); + + expect(resolved.baseUrl).toBe("https://api.deepgram.com/v1"); + expect(resolved.allowPrivateNetwork).toBe(false); + expect(resolved.headers.get("authorization")).toBe("Token test-key"); + }); + + it("allows callers to preserve custom-base detection before URL normalization", () => { + const resolved = resolveProviderHttpRequestConfig({ + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + defaultBaseUrl: "https://generativelanguage.googleapis.com/v1beta", + allowPrivateNetwork: false, + defaultHeaders: { + "x-goog-api-key": "test-key", + }, + provider: "google", + api: "google-generative-ai", + capability: "image", + transport: "http", + }); + + expect(resolved.baseUrl).toBe("https://generativelanguage.googleapis.com/v1beta"); + expect(resolved.allowPrivateNetwork).toBe(false); + expect(resolved.headers.get("x-goog-api-key")).toBe("test-key"); + }); +}); diff --git a/src/media-understanding/shared.ts b/src/media-understanding/shared.ts index 58b51c5f07d..3c5578bbee3 100644 --- a/src/media-understanding/shared.ts +++ b/src/media-understanding/shared.ts @@ -1,7 +1,15 @@ +import type { + ProviderRequestCapability, + ProviderRequestTransport, +} from "../agents/provider-attribution.js"; +import { resolveProviderRequestAttributionHeaders } from "../agents/provider-attribution.js"; +import { + resolveProviderRequestConfig, + type ResolvedProviderRequestConfig, +} from "../agents/provider-request-config.js"; import type { GuardedFetchResult } from "../infra/net/fetch-guard.js"; import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js"; import type { LookupFn, SsrFPolicy } from "../infra/net/ssrf.js"; -import { resolveProviderRequestAttributionHeaders } from "../agents/provider-attribution.js"; export { fetchWithTimeout } from "../utils/fetch-timeout.js"; const MAX_ERROR_CHARS = 300; @@ -13,13 +21,21 @@ export function normalizeBaseUrl(baseUrl: string | undefined, fallback: string): export function applyProviderRequestHeaders(params: { headers?: HeadersInit; + defaultHeaders?: Record; provider?: string; api?: string; baseUrl?: string; - capability?: "audio" | "image" | "video" | "other"; - transport?: "http" | "media-understanding"; + capability?: ProviderRequestCapability; + transport?: ProviderRequestTransport; }): Headers { const headers = new Headers(params.headers); + if (params.defaultHeaders) { + for (const [key, value] of Object.entries(params.defaultHeaders)) { + if (!headers.has(key)) { + headers.set(key, value); + } + } + } const attributionHeaders = resolveProviderRequestAttributionHeaders({ provider: params.provider, api: params.api, @@ -38,6 +54,53 @@ export function applyProviderRequestHeaders(params: { return headers; } +export function resolveProviderHttpRequestConfig(params: { + baseUrl?: string; + defaultBaseUrl: string; + allowPrivateNetwork?: boolean; + headers?: HeadersInit; + defaultHeaders?: Record; + provider?: string; + api?: string; + capability?: ProviderRequestCapability; + transport?: ProviderRequestTransport; +}): { + baseUrl: string; + allowPrivateNetwork: boolean; + headers: Headers; + requestConfig: ResolvedProviderRequestConfig; +} { + const baseUrl = normalizeBaseUrl(params.baseUrl, params.defaultBaseUrl); + const requestConfigParams: Parameters[0] = { + provider: params.provider ?? "", + baseUrl, + capability: params.capability ?? "other", + transport: params.transport ?? "http", + }; + if (params.api !== undefined) { + requestConfigParams.api = params.api; + } + if (params.defaultHeaders !== undefined) { + requestConfigParams.providerHeaders = params.defaultHeaders; + } + const requestConfig = resolveProviderRequestConfig(requestConfigParams); + + return { + baseUrl, + allowPrivateNetwork: params.allowPrivateNetwork ?? Boolean(params.baseUrl?.trim()), + headers: applyProviderRequestHeaders({ + headers: params.headers, + defaultHeaders: requestConfig.headers, + provider: params.provider, + api: params.api, + baseUrl, + capability: params.capability, + transport: params.transport, + }), + requestConfig, + }; +} + export async function fetchWithTimeoutGuarded( url: string, init: RequestInit, diff --git a/src/plugin-sdk/provider-http.ts b/src/plugin-sdk/provider-http.ts index de59b4c029b..14c66be26b4 100644 --- a/src/plugin-sdk/provider-http.ts +++ b/src/plugin-sdk/provider-http.ts @@ -8,5 +8,6 @@ export { normalizeBaseUrl, postJsonRequest, postTranscriptionRequest, + resolveProviderHttpRequestConfig, requireTranscriptionText, } from "../media-understanding/shared.js";