openclaw/extensions/google/image-generation-provider.ts

217 lines
6.2 KiB
TypeScript

import type { ImageGenerationProviderPlugin } from "openclaw/plugin-sdk/image-generation-core";
import {
normalizeGoogleModelId,
parseGeminiAuth,
resolveApiKeyForProvider,
} from "openclaw/plugin-sdk/image-generation-core";
import {
assertOkOrThrowHttpError,
normalizeBaseUrl,
postJsonRequest,
} from "openclaw/plugin-sdk/media-understanding";
const DEFAULT_GOOGLE_IMAGE_BASE_URL = "https://generativelanguage.googleapis.com/v1beta";
const DEFAULT_GOOGLE_IMAGE_MODEL = "gemini-3.1-flash-image-preview";
const DEFAULT_OUTPUT_MIME = "image/png";
const GOOGLE_SUPPORTED_SIZES = [
"1024x1024",
"1024x1536",
"1536x1024",
"1024x1792",
"1792x1024",
] as const;
const GOOGLE_SUPPORTED_ASPECT_RATIOS = [
"1:1",
"2:3",
"3:2",
"3:4",
"4:3",
"4:5",
"5:4",
"9:16",
"16:9",
"21:9",
] as const;
type GoogleInlineDataPart = {
mimeType?: string;
mime_type?: string;
data?: string;
};
type GoogleGenerateImageResponse = {
candidates?: Array<{
content?: {
parts?: Array<{
text?: string;
inlineData?: GoogleInlineDataPart;
inline_data?: GoogleInlineDataPart;
}>;
};
}>;
};
function resolveGoogleBaseUrl(cfg: Parameters<typeof resolveApiKeyForProvider>[0]["cfg"]): string {
const direct = cfg?.models?.providers?.google?.baseUrl?.trim();
return direct || DEFAULT_GOOGLE_IMAGE_BASE_URL;
}
function normalizeGoogleImageModel(model: string | undefined): string {
const trimmed = model?.trim();
return normalizeGoogleModelId(trimmed || DEFAULT_GOOGLE_IMAGE_MODEL);
}
function mapSizeToImageConfig(
size: string | undefined,
): { aspectRatio?: string; imageSize?: "2K" | "4K" } | undefined {
const trimmed = size?.trim();
if (!trimmed) {
return undefined;
}
const normalized = trimmed.toLowerCase();
const mapping = new Map<string, string>([
["1024x1024", "1:1"],
["1024x1536", "2:3"],
["1536x1024", "3:2"],
["1024x1792", "9:16"],
["1792x1024", "16:9"],
]);
const aspectRatio = mapping.get(normalized);
const [widthRaw, heightRaw] = normalized.split("x");
const width = Number.parseInt(widthRaw ?? "", 10);
const height = Number.parseInt(heightRaw ?? "", 10);
const longestEdge = Math.max(width, height);
const imageSize = longestEdge >= 3072 ? "4K" : longestEdge >= 1536 ? "2K" : undefined;
if (!aspectRatio && !imageSize) {
return undefined;
}
return {
...(aspectRatio ? { aspectRatio } : {}),
...(imageSize ? { imageSize } : {}),
};
}
export function buildGoogleImageGenerationProvider(): ImageGenerationProviderPlugin {
return {
id: "google",
label: "Google",
defaultModel: DEFAULT_GOOGLE_IMAGE_MODEL,
models: [DEFAULT_GOOGLE_IMAGE_MODEL, "gemini-3-pro-image-preview"],
capabilities: {
generate: {
maxCount: 4,
supportsSize: true,
supportsAspectRatio: true,
supportsResolution: true,
},
edit: {
enabled: true,
maxCount: 4,
maxInputImages: 5,
supportsSize: true,
supportsAspectRatio: true,
supportsResolution: true,
},
geometry: {
sizes: [...GOOGLE_SUPPORTED_SIZES],
aspectRatios: [...GOOGLE_SUPPORTED_ASPECT_RATIOS],
resolutions: ["1K", "2K", "4K"],
},
},
async generateImage(req) {
const auth = await resolveApiKeyForProvider({
provider: "google",
cfg: req.cfg,
agentDir: req.agentDir,
store: req.authStore,
});
if (!auth.apiKey) {
throw new Error("Google API key missing");
}
const model = normalizeGoogleImageModel(req.model);
const baseUrl = normalizeBaseUrl(
resolveGoogleBaseUrl(req.cfg),
DEFAULT_GOOGLE_IMAGE_BASE_URL,
);
const allowPrivate = Boolean(req.cfg?.models?.providers?.google?.baseUrl?.trim());
const authHeaders = parseGeminiAuth(auth.apiKey);
const headers = new Headers(authHeaders.headers);
const imageConfig = mapSizeToImageConfig(req.size);
const inputParts = (req.inputImages ?? []).map((image) => ({
inlineData: {
mimeType: image.mimeType,
data: image.buffer.toString("base64"),
},
}));
const resolvedImageConfig = {
...imageConfig,
...(req.aspectRatio?.trim() ? { aspectRatio: req.aspectRatio.trim() } : {}),
...(req.resolution ? { imageSize: req.resolution } : {}),
};
const { response: res, release } = await postJsonRequest({
url: `${baseUrl}/models/${model}:generateContent`,
headers,
body: {
contents: [
{
role: "user",
parts: [...inputParts, { text: req.prompt }],
},
],
generationConfig: {
responseModalities: ["TEXT", "IMAGE"],
...(Object.keys(resolvedImageConfig).length > 0
? { imageConfig: resolvedImageConfig }
: {}),
},
},
timeoutMs: 60_000,
fetchFn: fetch,
allowPrivateNetwork: allowPrivate,
});
try {
await assertOkOrThrowHttpError(res, "Google image generation failed");
const payload = (await res.json()) as GoogleGenerateImageResponse;
let imageIndex = 0;
const images = (payload.candidates ?? [])
.flatMap((candidate) => candidate.content?.parts ?? [])
.map((part) => {
const inline = part.inlineData ?? part.inline_data;
const data = inline?.data?.trim();
if (!data) {
return null;
}
const mimeType = inline?.mimeType ?? inline?.mime_type ?? DEFAULT_OUTPUT_MIME;
const extension = mimeType.includes("jpeg") ? "jpg" : (mimeType.split("/")[1] ?? "png");
imageIndex += 1;
return {
buffer: Buffer.from(data, "base64"),
mimeType,
fileName: `image-${imageIndex}.${extension}`,
};
})
.filter((entry): entry is NonNullable<typeof entry> => entry !== null);
if (images.length === 0) {
throw new Error("Google image generation response missing image data");
}
return {
images,
model,
};
} finally {
await release();
}
},
};
}