mirror of https://github.com/openclaw/openclaw.git
Gateway: normalize HEIC input_image sources (#38122)
* Media: normalize HEIC input images * Gateway: accept HEIC image input schema * Media: add HEIC input normalization tests * Gateway: cover HEIC input schema parity * Docs: document HEIC input image support * Changelog: note HEIC input image fix
This commit is contained in:
parent
81f22ae109
commit
9aceb51379
|
|
@ -137,6 +137,7 @@ Docs: https://docs.openclaw.ai
|
||||||
- Discord/voice messages: request upload slots with JSON fetch calls so voice message uploads no longer fail with content-type errors. Thanks @thewilloftheshadow.
|
- Discord/voice messages: request upload slots with JSON fetch calls so voice message uploads no longer fail with content-type errors. Thanks @thewilloftheshadow.
|
||||||
- Discord/voice decoder fallback: drop the native Opus dependency and use opusscript for voice decoding to avoid native-opus installs. Thanks @thewilloftheshadow.
|
- Discord/voice decoder fallback: drop the native Opus dependency and use opusscript for voice decoding to avoid native-opus installs. Thanks @thewilloftheshadow.
|
||||||
- Discord/auto presence health signal: add runtime availability-driven presence updates plus connected-state reporting to improve health monitoring and operator visibility. (#33277) Thanks @thewilloftheshadow.
|
- Discord/auto presence health signal: add runtime availability-driven presence updates plus connected-state reporting to improve health monitoring and operator visibility. (#33277) Thanks @thewilloftheshadow.
|
||||||
|
- HEIC image inputs: accept HEIC/HEIF `input_image` sources in Gateway HTTP APIs, normalize them to JPEG before provider delivery, and document the expanded default MIME allowlist. Thanks @vincentkoc.
|
||||||
- Telegram/draft-stream boundary stability: materialize DM draft previews at assistant-message/tool boundaries, serialize lane-boundary callbacks before final delivery, and scope preview cleanup to the active preview so multi-step Telegram streams no longer lose, overwrite, or leave stale preview bubbles. (#33842) Thanks @ngutman.
|
- Telegram/draft-stream boundary stability: materialize DM draft previews at assistant-message/tool boundaries, serialize lane-boundary callbacks before final delivery, and scope preview cleanup to the active preview so multi-step Telegram streams no longer lose, overwrite, or leave stale preview bubbles. (#33842) Thanks @ngutman.
|
||||||
- Telegram/DM draft finalization reliability: require verified final-text draft emission before treating preview finalization as delivered, and fall back to normal payload send when final draft delivery is not confirmed (preventing missing final responses and preserving media/button delivery). (#32118) Thanks @OpenCils.
|
- Telegram/DM draft finalization reliability: require verified final-text draft emission before treating preview finalization as delivered, and fall back to normal payload send when final draft delivery is not confirmed (preventing missing final responses and preserving media/button delivery). (#32118) Thanks @OpenCils.
|
||||||
- Telegram/DM draft final delivery: materialize text-only `sendMessageDraft` previews into one permanent final message and skip duplicate final payload sends, while preserving fallback behavior when materialization fails. (#34318) Thanks @Brotherinlaw-13.
|
- Telegram/DM draft final delivery: materialize text-only `sendMessageDraft` previews into one permanent final message and skip duplicate final payload sends, while preserving fallback behavior when materialization fails. (#34318) Thanks @Brotherinlaw-13.
|
||||||
|
|
|
||||||
|
|
@ -242,7 +242,14 @@ Defaults can be tuned under `gateway.http.endpoints.responses`:
|
||||||
images: {
|
images: {
|
||||||
allowUrl: true,
|
allowUrl: true,
|
||||||
urlAllowlist: ["images.example.com"],
|
urlAllowlist: ["images.example.com"],
|
||||||
allowedMimes: ["image/jpeg", "image/png", "image/gif", "image/webp"],
|
allowedMimes: [
|
||||||
|
"image/jpeg",
|
||||||
|
"image/png",
|
||||||
|
"image/gif",
|
||||||
|
"image/webp",
|
||||||
|
"image/heic",
|
||||||
|
"image/heif",
|
||||||
|
],
|
||||||
maxBytes: 10485760,
|
maxBytes: 10485760,
|
||||||
maxRedirects: 3,
|
maxRedirects: 3,
|
||||||
timeoutMs: 10000,
|
timeoutMs: 10000,
|
||||||
|
|
@ -268,6 +275,7 @@ Defaults when omitted:
|
||||||
- `images.maxBytes`: 10MB
|
- `images.maxBytes`: 10MB
|
||||||
- `images.maxRedirects`: 3
|
- `images.maxRedirects`: 3
|
||||||
- `images.timeoutMs`: 10s
|
- `images.timeoutMs`: 10s
|
||||||
|
- HEIC/HEIF `input_image` sources are accepted and normalized to JPEG before provider delivery.
|
||||||
|
|
||||||
Security note:
|
Security note:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,14 @@ export const InputImageSourceSchema = z.discriminatedUnion("type", [
|
||||||
}),
|
}),
|
||||||
z.object({
|
z.object({
|
||||||
type: z.literal("base64"),
|
type: z.literal("base64"),
|
||||||
media_type: z.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
|
media_type: z.enum([
|
||||||
|
"image/jpeg",
|
||||||
|
"image/png",
|
||||||
|
"image/gif",
|
||||||
|
"image/webp",
|
||||||
|
"image/heic",
|
||||||
|
"image/heif",
|
||||||
|
]),
|
||||||
data: z.string().min(1), // base64-encoded
|
data: z.string().min(1), // base64-encoded
|
||||||
}),
|
}),
|
||||||
]);
|
]);
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,20 @@ describe("OpenResponses Feature Parity", () => {
|
||||||
expect(result.success).toBe(true);
|
expect(result.success).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should validate input_image with HEIC base64 source", async () => {
|
||||||
|
const validImage = {
|
||||||
|
type: "input_image" as const,
|
||||||
|
source: {
|
||||||
|
type: "base64" as const,
|
||||||
|
media_type: "image/heic" as const,
|
||||||
|
data: "aGVpYy1pbWFnZQ==",
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = InputImageContentPartSchema.safeParse(validImage);
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
it("should reject input_image with invalid mime type", async () => {
|
it("should reject input_image with invalid mime type", async () => {
|
||||||
const invalidImage = {
|
const invalidImage = {
|
||||||
type: "input_image" as const,
|
type: "input_image" as const,
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,16 @@
|
||||||
import { beforeAll, describe, expect, it, vi } from "vitest";
|
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
const fetchWithSsrFGuardMock = vi.fn();
|
const fetchWithSsrFGuardMock = vi.fn();
|
||||||
|
const convertHeicToJpegMock = vi.fn();
|
||||||
|
|
||||||
vi.mock("../infra/net/fetch-guard.js", () => ({
|
vi.mock("../infra/net/fetch-guard.js", () => ({
|
||||||
fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args),
|
fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
vi.mock("./image-ops.js", () => ({
|
||||||
|
convertHeicToJpeg: (...args: unknown[]) => convertHeicToJpegMock(...args),
|
||||||
|
}));
|
||||||
|
|
||||||
async function waitForMicrotaskTurn(): Promise<void> {
|
async function waitForMicrotaskTurn(): Promise<void> {
|
||||||
await new Promise<void>((resolve) => queueMicrotask(resolve));
|
await new Promise<void>((resolve) => queueMicrotask(resolve));
|
||||||
}
|
}
|
||||||
|
|
@ -19,6 +24,75 @@ beforeAll(async () => {
|
||||||
await import("./input-files.js"));
|
await import("./input-files.js"));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("HEIC input image normalization", () => {
|
||||||
|
it("converts base64 HEIC images to JPEG before returning them", async () => {
|
||||||
|
const normalized = Buffer.from("jpeg-normalized");
|
||||||
|
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
|
||||||
|
|
||||||
|
const image = await extractImageContentFromSource(
|
||||||
|
{
|
||||||
|
type: "base64",
|
||||||
|
data: Buffer.from("heic-source").toString("base64"),
|
||||||
|
mediaType: "image/heic",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
allowUrl: false,
|
||||||
|
allowedMimes: new Set(["image/heic", "image/jpeg"]),
|
||||||
|
maxBytes: 1024 * 1024,
|
||||||
|
maxRedirects: 0,
|
||||||
|
timeoutMs: 1,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(convertHeicToJpegMock).toHaveBeenCalledTimes(1);
|
||||||
|
expect(image).toEqual({
|
||||||
|
type: "image",
|
||||||
|
data: normalized.toString("base64"),
|
||||||
|
mimeType: "image/jpeg",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("converts URL HEIC images to JPEG before returning them", async () => {
|
||||||
|
const release = vi.fn(async () => {});
|
||||||
|
fetchWithSsrFGuardMock.mockResolvedValueOnce({
|
||||||
|
response: new Response(Buffer.from("heic-url-source"), {
|
||||||
|
status: 200,
|
||||||
|
headers: { "content-type": "image/heic" },
|
||||||
|
}),
|
||||||
|
release,
|
||||||
|
finalUrl: "https://example.com/photo.heic",
|
||||||
|
});
|
||||||
|
const normalized = Buffer.from("jpeg-url-normalized");
|
||||||
|
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
|
||||||
|
|
||||||
|
const image = await extractImageContentFromSource(
|
||||||
|
{
|
||||||
|
type: "url",
|
||||||
|
url: "https://example.com/photo.heic",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
allowUrl: true,
|
||||||
|
allowedMimes: new Set(["image/heic", "image/jpeg"]),
|
||||||
|
maxBytes: 1024 * 1024,
|
||||||
|
maxRedirects: 0,
|
||||||
|
timeoutMs: 1000,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(convertHeicToJpegMock).toHaveBeenCalledTimes(1);
|
||||||
|
expect(image).toEqual({
|
||||||
|
type: "image",
|
||||||
|
data: normalized.toString("base64"),
|
||||||
|
mimeType: "image/jpeg",
|
||||||
|
});
|
||||||
|
expect(release).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("fetchWithGuard", () => {
|
describe("fetchWithGuard", () => {
|
||||||
it("rejects oversized streamed payloads and cancels the stream", async () => {
|
it("rejects oversized streamed payloads and cancels the stream", async () => {
|
||||||
let canceled = false;
|
let canceled = false;
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@ import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
|
||||||
import type { SsrFPolicy } from "../infra/net/ssrf.js";
|
import type { SsrFPolicy } from "../infra/net/ssrf.js";
|
||||||
import { logWarn } from "../logger.js";
|
import { logWarn } from "../logger.js";
|
||||||
import { canonicalizeBase64, estimateBase64DecodedBytes } from "./base64.js";
|
import { canonicalizeBase64, estimateBase64DecodedBytes } from "./base64.js";
|
||||||
|
import { convertHeicToJpeg } from "./image-ops.js";
|
||||||
|
import { detectMime } from "./mime.js";
|
||||||
import { extractPdfContent, type PdfExtractedImage } from "./pdf-extract.js";
|
import { extractPdfContent, type PdfExtractedImage } from "./pdf-extract.js";
|
||||||
import { readResponseWithLimit } from "./read-response-with-limit.js";
|
import { readResponseWithLimit } from "./read-response-with-limit.js";
|
||||||
|
|
||||||
|
|
@ -85,7 +87,14 @@ export type InputFetchResult = {
|
||||||
contentType?: string;
|
contentType?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const DEFAULT_INPUT_IMAGE_MIMES = ["image/jpeg", "image/png", "image/gif", "image/webp"];
|
export const DEFAULT_INPUT_IMAGE_MIMES = [
|
||||||
|
"image/jpeg",
|
||||||
|
"image/png",
|
||||||
|
"image/gif",
|
||||||
|
"image/webp",
|
||||||
|
"image/heic",
|
||||||
|
"image/heif",
|
||||||
|
];
|
||||||
export const DEFAULT_INPUT_FILE_MIMES = [
|
export const DEFAULT_INPUT_FILE_MIMES = [
|
||||||
"text/plain",
|
"text/plain",
|
||||||
"text/markdown",
|
"text/markdown",
|
||||||
|
|
@ -102,6 +111,8 @@ export const DEFAULT_INPUT_TIMEOUT_MS = 10_000;
|
||||||
export const DEFAULT_INPUT_PDF_MAX_PAGES = 4;
|
export const DEFAULT_INPUT_PDF_MAX_PAGES = 4;
|
||||||
export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4_000_000;
|
export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4_000_000;
|
||||||
export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200;
|
export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200;
|
||||||
|
const NORMALIZED_INPUT_IMAGE_MIME = "image/jpeg";
|
||||||
|
const HEIC_INPUT_IMAGE_MIMES = new Set(["image/heic", "image/heif"]);
|
||||||
|
|
||||||
function rejectOversizedBase64Payload(params: {
|
function rejectOversizedBase64Payload(params: {
|
||||||
data: string;
|
data: string;
|
||||||
|
|
@ -218,6 +229,40 @@ function clampText(text: string, maxChars: number): string {
|
||||||
return text.slice(0, maxChars);
|
return text.slice(0, maxChars);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function normalizeInputImage(params: {
|
||||||
|
buffer: Buffer;
|
||||||
|
mimeType?: string;
|
||||||
|
limits: InputImageLimits;
|
||||||
|
}): Promise<InputImageContent> {
|
||||||
|
const sourceMime =
|
||||||
|
normalizeMimeType(await detectMime({ buffer: params.buffer, headerMime: params.mimeType })) ??
|
||||||
|
normalizeMimeType(params.mimeType) ??
|
||||||
|
"application/octet-stream";
|
||||||
|
if (!params.limits.allowedMimes.has(sourceMime)) {
|
||||||
|
throw new Error(`Unsupported image MIME type: ${sourceMime}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!HEIC_INPUT_IMAGE_MIMES.has(sourceMime)) {
|
||||||
|
return {
|
||||||
|
type: "image",
|
||||||
|
data: params.buffer.toString("base64"),
|
||||||
|
mimeType: sourceMime,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalizedBuffer = await convertHeicToJpeg(params.buffer);
|
||||||
|
if (normalizedBuffer.byteLength > params.limits.maxBytes) {
|
||||||
|
throw new Error(
|
||||||
|
`Image too large after HEIC conversion: ${normalizedBuffer.byteLength} bytes (limit: ${params.limits.maxBytes} bytes)`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
type: "image",
|
||||||
|
data: normalizedBuffer.toString("base64"),
|
||||||
|
mimeType: NORMALIZED_INPUT_IMAGE_MIME,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export async function extractImageContentFromSource(
|
export async function extractImageContentFromSource(
|
||||||
source: InputImageSource,
|
source: InputImageSource,
|
||||||
limits: InputImageLimits,
|
limits: InputImageLimits,
|
||||||
|
|
@ -228,17 +273,17 @@ export async function extractImageContentFromSource(
|
||||||
if (!canonicalData) {
|
if (!canonicalData) {
|
||||||
throw new Error("input_image base64 source has invalid 'data' field");
|
throw new Error("input_image base64 source has invalid 'data' field");
|
||||||
}
|
}
|
||||||
const mimeType = normalizeMimeType(source.mediaType) ?? "image/png";
|
|
||||||
if (!limits.allowedMimes.has(mimeType)) {
|
|
||||||
throw new Error(`Unsupported image MIME type: ${mimeType}`);
|
|
||||||
}
|
|
||||||
const buffer = Buffer.from(canonicalData, "base64");
|
const buffer = Buffer.from(canonicalData, "base64");
|
||||||
if (buffer.byteLength > limits.maxBytes) {
|
if (buffer.byteLength > limits.maxBytes) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Image too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`,
|
`Image too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return { type: "image", data: canonicalData, mimeType };
|
return await normalizeInputImage({
|
||||||
|
buffer,
|
||||||
|
mimeType: normalizeMimeType(source.mediaType) ?? "image/png",
|
||||||
|
limits,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (source.type === "url") {
|
if (source.type === "url") {
|
||||||
|
|
@ -256,10 +301,11 @@ export async function extractImageContentFromSource(
|
||||||
},
|
},
|
||||||
auditContext: "openresponses.input_image",
|
auditContext: "openresponses.input_image",
|
||||||
});
|
});
|
||||||
if (!limits.allowedMimes.has(result.mimeType)) {
|
return await normalizeInputImage({
|
||||||
throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`);
|
buffer: result.buffer,
|
||||||
}
|
mimeType: result.mimeType,
|
||||||
return { type: "image", data: result.buffer.toString("base64"), mimeType: result.mimeType };
|
limits,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error(`Unsupported input_image source type: ${(source as { type: string }).type}`);
|
throw new Error(`Unsupported input_image source type: ${(source as { type: string }).type}`);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue