fix(providers): centralize media request shaping (#59469)

* fix(providers): centralize media request shaping

* style(providers): normalize shared request imports

* fix(changelog): add media request shaping entry

* fix(google): preserve private network guard
This commit is contained in:
Vincent Koc 2026-04-02 15:28:57 +09:00 committed by GitHub
parent 9786946b2d
commit f28f0f29ba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 202 additions and 63 deletions

View File

@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
- Plugins/runtime: keep LINE reply directives and browser-backed cleanup/reset flows working even when those plugins are disabled while tightening bundled plugin activation guards. (#59412) Thanks @vincentkoc.
- WhatsApp/presence: send `unavailable` presence on connect in self-chat mode so personal-phone users stop losing all push notifications while the gateway is running. (#59410) Thanks @mcaxtr.
- Providers/OpenAI-compatible routing: centralize native-vs-proxy request policy so hidden attribution and related OpenAI-family defaults only apply on verified native endpoints across stream, websocket, and shared audio HTTP paths. (#59433) Thanks @vincentkoc.
- Providers/media HTTP: centralize base URL normalization, default auth/header injection, and explicit header override handling across shared OpenAI-compatible audio, Deepgram audio, Gemini media/image, and Moonshot video request paths. (#59469) Thanks @vincentkoc.
- Exec approvals/doctor: report host policy sources from the real approvals file path and ignore malformed host override values when attributing effective policy conflicts. (#59367) Thanks @gumadeiras.
- Matrix/onboarding: restore guided setup in `openclaw channels add` and `openclaw configure --section channels`, while keeping custom plugin wizards on the shared `setupWizard` seam. (#59462) Thanks @gumadeiras.

View File

@ -4,8 +4,8 @@ import type {
} from "openclaw/plugin-sdk/media-understanding";
import {
assertOkOrThrowHttpError,
normalizeBaseUrl,
postTranscriptionRequest,
resolveProviderHttpRequestConfig,
requireTranscriptionText,
} from "openclaw/plugin-sdk/provider-http";
@ -31,9 +31,19 @@ export async function transcribeDeepgramAudio(
params: AudioTranscriptionRequest,
): Promise<AudioTranscriptionResult> {
const fetchFn = params.fetchFn ?? fetch;
const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_DEEPGRAM_AUDIO_BASE_URL);
const allowPrivate = Boolean(params.baseUrl?.trim());
const model = resolveModel(params.model);
const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({
baseUrl: params.baseUrl,
defaultBaseUrl: DEFAULT_DEEPGRAM_AUDIO_BASE_URL,
headers: params.headers,
defaultHeaders: {
authorization: `Token ${params.apiKey}`,
"content-type": params.mime ?? "application/octet-stream",
},
provider: "deepgram",
capability: "audio",
transport: "media-understanding",
});
const url = new URL(`${baseUrl}/listen`);
url.searchParams.set("model", model);
@ -49,14 +59,6 @@ export async function transcribeDeepgramAudio(
}
}
const headers = new Headers(params.headers);
if (!headers.has("authorization")) {
headers.set("authorization", `Token ${params.apiKey}`);
}
if (!headers.has("content-type")) {
headers.set("content-type", params.mime ?? "application/octet-stream");
}
const body = new Uint8Array(params.buffer);
const { response: res, release } = await postTranscriptionRequest({
url: url.toString(),
@ -64,7 +66,7 @@ export async function transcribeDeepgramAudio(
body,
timeoutMs: params.timeoutMs,
fetchFn,
allowPrivateNetwork: allowPrivate,
allowPrivateNetwork,
});
try {

View File

@ -2,8 +2,8 @@ import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generati
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import {
assertOkOrThrowHttpError,
normalizeBaseUrl,
postJsonRequest,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
import {
DEFAULT_GOOGLE_API_BASE_URL,
@ -134,10 +134,16 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProvider {
}
const model = normalizeGoogleImageModel(req.model);
const baseUrl = normalizeBaseUrl(resolveGoogleBaseUrl(req.cfg), DEFAULT_GOOGLE_API_BASE_URL);
const allowPrivate = Boolean(req.cfg?.models?.providers?.google?.baseUrl?.trim());
const authHeaders = parseGeminiAuth(auth.apiKey);
const headers = new Headers(authHeaders.headers);
const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({
baseUrl: resolveGoogleBaseUrl(req.cfg),
defaultBaseUrl: DEFAULT_GOOGLE_API_BASE_URL,
allowPrivateNetwork: Boolean(req.cfg?.models?.providers?.google?.baseUrl?.trim()),
defaultHeaders: parseGeminiAuth(auth.apiKey).headers,
provider: "google",
api: "google-generative-ai",
capability: "image",
transport: "http",
});
const imageConfig = mapSizeToImageConfig(req.size);
const inputParts = (req.inputImages ?? []).map((image) => ({
inlineData: {
@ -170,7 +176,7 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProvider {
},
timeoutMs: 60_000,
fetchFn: fetch,
allowPrivateNetwork: allowPrivate,
allowPrivateNetwork,
});
try {

View File

@ -9,8 +9,8 @@ import {
} from "openclaw/plugin-sdk/media-understanding";
import {
assertOkOrThrowHttpError,
normalizeBaseUrl,
postJsonRequest,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
import {
DEFAULT_GOOGLE_API_BASE_URL,
@ -44,11 +44,6 @@ async function generateGeminiInlineDataText(params: {
missingTextError: string;
}): Promise<{ text: string; model: string }> {
const fetchFn = params.fetchFn ?? fetch;
const baseUrl = normalizeBaseUrl(
normalizeGoogleApiBaseUrl(params.baseUrl ?? params.defaultBaseUrl),
DEFAULT_GOOGLE_API_BASE_URL,
);
const allowPrivate = Boolean(params.baseUrl?.trim());
const model = (() => {
const trimmed = params.model?.trim();
if (!trimmed) {
@ -56,16 +51,19 @@ async function generateGeminiInlineDataText(params: {
}
return normalizeGoogleModelId(trimmed);
})();
const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({
baseUrl: normalizeGoogleApiBaseUrl(params.baseUrl ?? params.defaultBaseUrl),
defaultBaseUrl: DEFAULT_GOOGLE_API_BASE_URL,
allowPrivateNetwork: Boolean(params.baseUrl?.trim()),
headers: params.headers,
defaultHeaders: parseGeminiAuth(params.apiKey).headers,
provider: "google",
api: "google-generative-ai",
capability: params.defaultMime.startsWith("audio/") ? "audio" : "video",
transport: "media-understanding",
});
const url = `${baseUrl}/models/${model}:generateContent`;
const authHeaders = parseGeminiAuth(params.apiKey);
const headers = new Headers(params.headers);
for (const [key, value] of Object.entries(authHeaders.headers)) {
if (!headers.has(key)) {
headers.set(key, value);
}
}
const prompt = (() => {
const trimmed = params.prompt?.trim();
return trimmed || params.defaultPrompt;
@ -94,7 +92,7 @@ async function generateGeminiInlineDataText(params: {
body,
timeoutMs: params.timeoutMs,
fetchFn,
allowPrivateNetwork: allowPrivate,
allowPrivateNetwork,
});
try {

View File

@ -7,8 +7,8 @@ import {
} from "openclaw/plugin-sdk/media-understanding";
import {
assertOkOrThrowHttpError,
normalizeBaseUrl,
postJsonRequest,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
export const DEFAULT_MOONSHOT_VIDEO_BASE_URL = "https://api.moonshot.ai/v1";
@ -62,20 +62,24 @@ export async function describeMoonshotVideo(
params: VideoDescriptionRequest,
): Promise<VideoDescriptionResult> {
const fetchFn = params.fetchFn ?? fetch;
const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_MOONSHOT_VIDEO_BASE_URL);
const model = resolveModel(params.model);
const mime = params.mime ?? "video/mp4";
const prompt = resolvePrompt(params.prompt);
const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({
baseUrl: params.baseUrl,
defaultBaseUrl: DEFAULT_MOONSHOT_VIDEO_BASE_URL,
headers: params.headers,
defaultHeaders: {
"content-type": "application/json",
authorization: `Bearer ${params.apiKey}`,
},
provider: "moonshot",
api: "openai-completions",
capability: "video",
transport: "media-understanding",
});
const url = `${baseUrl}/chat/completions`;
const headers = new Headers(params.headers);
if (!headers.has("content-type")) {
headers.set("content-type", "application/json");
}
if (!headers.has("authorization")) {
headers.set("authorization", `Bearer ${params.apiKey}`);
}
const body = {
model,
messages: [
@ -100,6 +104,7 @@ export async function describeMoonshotVideo(
body,
timeoutMs: params.timeoutMs,
fetchFn,
allowPrivateNetwork,
});
try {

View File

@ -1,9 +1,8 @@
import path from "node:path";
import {
applyProviderRequestHeaders,
assertOkOrThrowHttpError,
normalizeBaseUrl,
postTranscriptionRequest,
resolveProviderHttpRequestConfig,
requireTranscriptionText,
} from "./shared.js";
import type { AudioTranscriptionRequest, AudioTranscriptionResult } from "./types.js";
@ -23,8 +22,18 @@ export async function transcribeOpenAiCompatibleAudio(
params: OpenAiCompatibleAudioParams,
): Promise<AudioTranscriptionResult> {
const fetchFn = params.fetchFn ?? fetch;
const baseUrl = normalizeBaseUrl(params.baseUrl, params.defaultBaseUrl);
const allowPrivate = Boolean(params.baseUrl?.trim());
const { baseUrl, allowPrivateNetwork, headers } = resolveProviderHttpRequestConfig({
baseUrl: params.baseUrl,
defaultBaseUrl: params.defaultBaseUrl,
headers: params.headers,
defaultHeaders: {
authorization: `Bearer ${params.apiKey}`,
},
provider: params.provider,
api: "openai-audio-transcriptions",
capability: "audio",
transport: "media-understanding",
});
const url = `${baseUrl}/audio/transcriptions`;
const model = resolveModel(params.model, params.defaultModel);
@ -43,25 +52,13 @@ export async function transcribeOpenAiCompatibleAudio(
form.append("prompt", params.prompt.trim());
}
const headers = applyProviderRequestHeaders({
headers: params.headers,
provider: params.provider,
api: "openai-audio-transcriptions",
baseUrl,
capability: "audio",
transport: "media-understanding",
});
if (!headers.has("authorization")) {
headers.set("authorization", `Bearer ${params.apiKey}`);
}
const { response: res, release } = await postTranscriptionRequest({
url,
headers,
body: form,
timeoutMs: params.timeoutMs,
fetchFn,
allowPrivateNetwork: allowPrivate,
allowPrivateNetwork,
});
try {

View File

@ -0,0 +1,66 @@
import { describe, expect, it } from "vitest";
import { resolveProviderHttpRequestConfig } from "./shared.js";
describe("resolveProviderHttpRequestConfig", () => {
it("preserves explicit caller headers over default and attribution headers", () => {
const resolved = resolveProviderHttpRequestConfig({
baseUrl: "https://api.openai.com/v1/",
defaultBaseUrl: "https://api.openai.com/v1",
headers: {
authorization: "Bearer override",
"User-Agent": "custom-agent/1.0",
},
defaultHeaders: {
authorization: "Bearer default-token",
"X-Default": "1",
},
provider: "openai",
api: "openai-audio-transcriptions",
capability: "audio",
transport: "media-understanding",
});
expect(resolved.baseUrl).toBe("https://api.openai.com/v1");
expect(resolved.allowPrivateNetwork).toBe(true);
expect(resolved.headers.get("authorization")).toBe("Bearer override");
expect(resolved.headers.get("x-default")).toBe("1");
expect(resolved.headers.get("user-agent")).toBe("custom-agent/1.0");
expect(resolved.headers.get("originator")).toBe("openclaw");
expect(resolved.headers.get("version")).toBeTruthy();
});
it("uses the fallback base URL without enabling private-network access", () => {
const resolved = resolveProviderHttpRequestConfig({
defaultBaseUrl: "https://api.deepgram.com/v1/",
defaultHeaders: {
authorization: "Token test-key",
},
provider: "deepgram",
capability: "audio",
transport: "media-understanding",
});
expect(resolved.baseUrl).toBe("https://api.deepgram.com/v1");
expect(resolved.allowPrivateNetwork).toBe(false);
expect(resolved.headers.get("authorization")).toBe("Token test-key");
});
it("allows callers to preserve custom-base detection before URL normalization", () => {
const resolved = resolveProviderHttpRequestConfig({
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
defaultBaseUrl: "https://generativelanguage.googleapis.com/v1beta",
allowPrivateNetwork: false,
defaultHeaders: {
"x-goog-api-key": "test-key",
},
provider: "google",
api: "google-generative-ai",
capability: "image",
transport: "http",
});
expect(resolved.baseUrl).toBe("https://generativelanguage.googleapis.com/v1beta");
expect(resolved.allowPrivateNetwork).toBe(false);
expect(resolved.headers.get("x-goog-api-key")).toBe("test-key");
});
});

View File

@ -1,7 +1,15 @@
import type {
ProviderRequestCapability,
ProviderRequestTransport,
} from "../agents/provider-attribution.js";
import { resolveProviderRequestAttributionHeaders } from "../agents/provider-attribution.js";
import {
resolveProviderRequestConfig,
type ResolvedProviderRequestConfig,
} from "../agents/provider-request-config.js";
import type { GuardedFetchResult } from "../infra/net/fetch-guard.js";
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
import type { LookupFn, SsrFPolicy } from "../infra/net/ssrf.js";
import { resolveProviderRequestAttributionHeaders } from "../agents/provider-attribution.js";
export { fetchWithTimeout } from "../utils/fetch-timeout.js";
const MAX_ERROR_CHARS = 300;
@ -13,13 +21,21 @@ export function normalizeBaseUrl(baseUrl: string | undefined, fallback: string):
export function applyProviderRequestHeaders(params: {
headers?: HeadersInit;
defaultHeaders?: Record<string, string>;
provider?: string;
api?: string;
baseUrl?: string;
capability?: "audio" | "image" | "video" | "other";
transport?: "http" | "media-understanding";
capability?: ProviderRequestCapability;
transport?: ProviderRequestTransport;
}): Headers {
const headers = new Headers(params.headers);
if (params.defaultHeaders) {
for (const [key, value] of Object.entries(params.defaultHeaders)) {
if (!headers.has(key)) {
headers.set(key, value);
}
}
}
const attributionHeaders = resolveProviderRequestAttributionHeaders({
provider: params.provider,
api: params.api,
@ -38,6 +54,53 @@ export function applyProviderRequestHeaders(params: {
return headers;
}
export function resolveProviderHttpRequestConfig(params: {
baseUrl?: string;
defaultBaseUrl: string;
allowPrivateNetwork?: boolean;
headers?: HeadersInit;
defaultHeaders?: Record<string, string>;
provider?: string;
api?: string;
capability?: ProviderRequestCapability;
transport?: ProviderRequestTransport;
}): {
baseUrl: string;
allowPrivateNetwork: boolean;
headers: Headers;
requestConfig: ResolvedProviderRequestConfig;
} {
const baseUrl = normalizeBaseUrl(params.baseUrl, params.defaultBaseUrl);
const requestConfigParams: Parameters<typeof resolveProviderRequestConfig>[0] = {
provider: params.provider ?? "",
baseUrl,
capability: params.capability ?? "other",
transport: params.transport ?? "http",
};
if (params.api !== undefined) {
requestConfigParams.api = params.api;
}
if (params.defaultHeaders !== undefined) {
requestConfigParams.providerHeaders = params.defaultHeaders;
}
const requestConfig = resolveProviderRequestConfig(requestConfigParams);
return {
baseUrl,
allowPrivateNetwork: params.allowPrivateNetwork ?? Boolean(params.baseUrl?.trim()),
headers: applyProviderRequestHeaders({
headers: params.headers,
defaultHeaders: requestConfig.headers,
provider: params.provider,
api: params.api,
baseUrl,
capability: params.capability,
transport: params.transport,
}),
requestConfig,
};
}
export async function fetchWithTimeoutGuarded(
url: string,
init: RequestInit,

View File

@ -8,5 +8,6 @@ export {
normalizeBaseUrl,
postJsonRequest,
postTranscriptionRequest,
resolveProviderHttpRequestConfig,
requireTranscriptionText,
} from "../media-understanding/shared.js";