mirror of https://github.com/openclaw/openclaw.git
refactor: move media generation runtimes into core
This commit is contained in:
parent
5da21bc2f7
commit
9f2b760d33
|
|
@ -2,6 +2,7 @@ import type {
|
|||
GeneratedImageAsset,
|
||||
ImageGenerationProvider,
|
||||
} from "openclaw/plugin-sdk/image-generation";
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
|
|
@ -294,6 +295,11 @@ export function buildFalImageGenerationProvider(): ImageGenerationProvider {
|
|||
label: "fal",
|
||||
defaultModel: DEFAULT_FAL_IMAGE_MODEL,
|
||||
models: [DEFAULT_FAL_IMAGE_MODEL, `${DEFAULT_FAL_IMAGE_MODEL}/${DEFAULT_FAL_EDIT_SUBPATH}`],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "fal",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
generate: {
|
||||
maxCount: 4,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation";
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import { assertOkOrThrowHttpError, postJsonRequest } from "openclaw/plugin-sdk/provider-http";
|
||||
import { normalizeGoogleModelId, resolveGoogleGenerativeAiHttpRequestConfig } from "./api.js";
|
||||
|
|
@ -88,6 +89,11 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProvider {
|
|||
label: "Google",
|
||||
defaultModel: DEFAULT_GOOGLE_IMAGE_MODEL,
|
||||
models: [DEFAULT_GOOGLE_IMAGE_MODEL, "gemini-3-pro-image-preview"],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "google",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
generate: {
|
||||
maxCount: 4,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation";
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
|
|
@ -57,6 +58,11 @@ function buildMinimaxImageProvider(providerId: string): ImageGenerationProvider
|
|||
label: "MiniMax",
|
||||
defaultModel: DEFAULT_MODEL,
|
||||
models: [DEFAULT_MODEL],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: providerId,
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
generate: {
|
||||
maxCount: 9,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation";
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
|
|
@ -49,6 +50,11 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider {
|
|||
label: "OpenAI",
|
||||
defaultModel: DEFAULT_OPENAI_IMAGE_MODEL,
|
||||
models: [DEFAULT_OPENAI_IMAGE_MODEL],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "openai",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
generate: {
|
||||
maxCount: 4,
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
|
|
@ -204,6 +205,11 @@ export function buildQwenVideoGenerationProvider(): VideoGenerationProvider {
|
|||
label: "Qwen Cloud",
|
||||
defaultModel: DEFAULT_QWEN_VIDEO_MODEL,
|
||||
models: ["wan2.6-t2v", "wan2.6-i2v", "wan2.6-r2v", "wan2.6-r2v-flash", "wan2.7-r2v"],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "qwen",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ import {
|
|||
import {
|
||||
buildToolModelConfigFromCandidates,
|
||||
coerceToolModelConfig,
|
||||
hasAuthForProvider,
|
||||
hasToolModelConfig,
|
||||
resolveDefaultModelRef,
|
||||
type ToolModelConfig,
|
||||
|
|
@ -113,20 +114,30 @@ function getImageGenerationProviderAuthEnvVars(providerId: string): string[] {
|
|||
return getProviderEnvVars(providerId);
|
||||
}
|
||||
|
||||
function resolveImageGenerationModelCandidates(
|
||||
cfg: OpenClawConfig | undefined,
|
||||
): Array<string | undefined> {
|
||||
function resolveImageGenerationModelCandidates(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
}): Array<string | undefined> {
|
||||
const providerDefaults = new Map<string, string>();
|
||||
for (const provider of listRuntimeImageGenerationProviders({ config: cfg })) {
|
||||
for (const provider of listRuntimeImageGenerationProviders({ config: params.cfg })) {
|
||||
const providerId = provider.id.trim();
|
||||
const modelId = provider.defaultModel?.trim();
|
||||
if (!providerId || !modelId || providerDefaults.has(providerId)) {
|
||||
if (
|
||||
!providerId ||
|
||||
!modelId ||
|
||||
providerDefaults.has(providerId) ||
|
||||
!isImageGenerationProviderConfigured({
|
||||
provider,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
})
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
providerDefaults.set(providerId, `${providerId}/${modelId}`);
|
||||
}
|
||||
|
||||
const primaryProvider = resolveDefaultModelRef(cfg).provider;
|
||||
const primaryProvider = resolveDefaultModelRef(params.cfg).provider;
|
||||
const orderedProviders = [
|
||||
primaryProvider,
|
||||
...[...providerDefaults.keys()]
|
||||
|
|
@ -157,10 +168,45 @@ export function resolveImageGenerationModelConfigForTool(params: {
|
|||
return buildToolModelConfigFromCandidates({
|
||||
explicit,
|
||||
agentDir: params.agentDir,
|
||||
candidates: resolveImageGenerationModelCandidates(params.cfg),
|
||||
candidates: resolveImageGenerationModelCandidates(params),
|
||||
isProviderConfigured: (providerId) =>
|
||||
isImageGenerationProviderConfigured({
|
||||
providerId,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
function isImageGenerationProviderConfigured(params: {
|
||||
provider?: ImageGenerationProvider;
|
||||
providerId?: string;
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
}): boolean {
|
||||
const provider =
|
||||
params.provider ??
|
||||
listRuntimeImageGenerationProviders({ config: params.cfg }).find((candidate) => {
|
||||
const normalizedId = normalizeProviderId(params.providerId ?? "");
|
||||
return (
|
||||
normalizeProviderId(candidate.id) === normalizedId ||
|
||||
(candidate.aliases ?? []).some((alias) => normalizeProviderId(alias) === normalizedId)
|
||||
);
|
||||
});
|
||||
if (!provider) {
|
||||
return params.providerId
|
||||
? hasAuthForProvider({ provider: params.providerId, agentDir: params.agentDir })
|
||||
: false;
|
||||
}
|
||||
if (provider.isConfigured) {
|
||||
return provider.isConfigured({
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
}
|
||||
return hasAuthForProvider({ provider: provider.id, agentDir: params.agentDir });
|
||||
}
|
||||
|
||||
function resolveAction(args: Record<string, unknown>): "generate" | "list" {
|
||||
const raw = readStringParam(args, "action");
|
||||
if (!raw) {
|
||||
|
|
@ -497,7 +543,7 @@ export function createImageGenerateTool(options?: {
|
|||
label: "Image Generation",
|
||||
name: "image_generate",
|
||||
description:
|
||||
'Generate new images or edit reference images with the configured or inferred image-generation model. Set agents.defaults.imageGenerationModel.primary to pick a provider/model. If you want openai/*, google/*, fal/*, or another provider, configure that provider auth/API key first. Use action="list" to inspect available providers, models, and auth hints. Generated images are delivered automatically from the tool result as MEDIA paths.',
|
||||
'Generate new images or edit reference images with the configured or inferred image-generation model. Set agents.defaults.imageGenerationModel.primary to pick a provider/model. Providers declare their own auth/readiness; use action="list" to inspect registered providers, models, readiness, and auth hints. Generated images are delivered automatically from the tool result as MEDIA paths.',
|
||||
parameters: ImageGenerateToolSchema,
|
||||
execute: async (_toolCallId, args) => {
|
||||
const params = args as Record<string, unknown>;
|
||||
|
|
@ -509,6 +555,11 @@ export function createImageGenerateTool(options?: {
|
|||
...(provider.label ? { label: provider.label } : {}),
|
||||
...(provider.defaultModel ? { defaultModel: provider.defaultModel } : {}),
|
||||
models: provider.models ?? (provider.defaultModel ? [provider.defaultModel] : []),
|
||||
configured: isImageGenerationProviderConfigured({
|
||||
provider,
|
||||
cfg: effectiveCfg,
|
||||
agentDir: options?.agentDir,
|
||||
}),
|
||||
authEnvVars: getImageGenerationProviderAuthEnvVars(provider.id),
|
||||
capabilities: provider.capabilities,
|
||||
}),
|
||||
|
|
@ -537,6 +588,7 @@ export function createImageGenerateTool(options?: {
|
|||
return [
|
||||
`${provider.id}${provider.defaultModel ? ` (default ${provider.defaultModel})` : ""}`,
|
||||
` ${modelLine}`,
|
||||
` configured: ${provider.configured ? "yes" : "no"}`,
|
||||
...(provider.authEnvVars.length > 0
|
||||
? [` auth: set ${provider.authEnvVars.join(" / ")} to use ${provider.id}/*`]
|
||||
: []),
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ export function buildToolModelConfigFromCandidates(params: {
|
|||
explicit: ToolModelConfig;
|
||||
agentDir?: string;
|
||||
candidates: Array<string | null | undefined>;
|
||||
isProviderConfigured?: (provider: string) => boolean;
|
||||
}): ToolModelConfig | null {
|
||||
if (hasToolModelConfig(params.explicit)) {
|
||||
return params.explicit;
|
||||
|
|
@ -68,7 +69,10 @@ export function buildToolModelConfigFromCandidates(params: {
|
|||
continue;
|
||||
}
|
||||
const provider = trimmed.slice(0, trimmed.indexOf("/")).trim();
|
||||
if (!provider || !hasAuthForProvider({ provider, agentDir: params.agentDir })) {
|
||||
const providerConfigured =
|
||||
params.isProviderConfigured?.(provider) ??
|
||||
hasAuthForProvider({ provider, agentDir: params.agentDir });
|
||||
if (!provider || !providerConfigured) {
|
||||
continue;
|
||||
}
|
||||
if (!deduped.includes(trimmed)) {
|
||||
|
|
|
|||
|
|
@ -1,37 +1,111 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import type { ImageGenerationProvider } from "../image-generation/types.js";
|
||||
import {
|
||||
generateImage,
|
||||
listRuntimeImageGenerationProviders,
|
||||
type GenerateImageRuntimeResult,
|
||||
} from "./runtime.js";
|
||||
import { generateImage, listRuntimeImageGenerationProviders } from "./runtime.js";
|
||||
import type { ImageGenerationProvider } from "./types.js";
|
||||
|
||||
const mocks = vi.hoisted(() => ({
|
||||
generateImage: vi.fn<typeof generateImage>(),
|
||||
listRuntimeImageGenerationProviders: vi.fn<typeof listRuntimeImageGenerationProviders>(),
|
||||
const mocks = vi.hoisted(() => {
|
||||
const debug = vi.fn();
|
||||
return {
|
||||
createSubsystemLogger: vi.fn(() => ({ debug })),
|
||||
describeFailoverError: vi.fn(),
|
||||
getImageGenerationProvider: vi.fn<
|
||||
(providerId: string, config?: OpenClawConfig) => ImageGenerationProvider | undefined
|
||||
>(() => undefined),
|
||||
getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []),
|
||||
isFailoverError: vi.fn<(err: unknown) => boolean>(() => false),
|
||||
listImageGenerationProviders: vi.fn<(config?: OpenClawConfig) => ImageGenerationProvider[]>(
|
||||
() => [],
|
||||
),
|
||||
parseImageGenerationModelRef: vi.fn<
|
||||
(raw?: string) => { provider: string; model: string } | undefined
|
||||
>((raw?: string) => {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
const slash = trimmed.indexOf("/");
|
||||
if (slash <= 0 || slash === trimmed.length - 1) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
provider: trimmed.slice(0, slash),
|
||||
model: trimmed.slice(slash + 1),
|
||||
};
|
||||
}),
|
||||
resolveAgentModelFallbackValues: vi.fn<(value: unknown) => string[]>(() => []),
|
||||
resolveAgentModelPrimaryValue: vi.fn<(value: unknown) => string | undefined>(() => undefined),
|
||||
debug,
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("../agents/failover-error.js", () => ({
|
||||
describeFailoverError: mocks.describeFailoverError,
|
||||
isFailoverError: mocks.isFailoverError,
|
||||
}));
|
||||
vi.mock("../config/model-input.js", () => ({
|
||||
resolveAgentModelFallbackValues: mocks.resolveAgentModelFallbackValues,
|
||||
resolveAgentModelPrimaryValue: mocks.resolveAgentModelPrimaryValue,
|
||||
}));
|
||||
vi.mock("../logging/subsystem.js", () => ({
|
||||
createSubsystemLogger: mocks.createSubsystemLogger,
|
||||
}));
|
||||
vi.mock("../secrets/provider-env-vars.js", () => ({
|
||||
getProviderEnvVars: mocks.getProviderEnvVars,
|
||||
}));
|
||||
vi.mock("./model-ref.js", () => ({
|
||||
parseImageGenerationModelRef: mocks.parseImageGenerationModelRef,
|
||||
}));
|
||||
vi.mock("./provider-registry.js", () => ({
|
||||
getImageGenerationProvider: mocks.getImageGenerationProvider,
|
||||
listImageGenerationProviders: mocks.listImageGenerationProviders,
|
||||
}));
|
||||
|
||||
vi.mock("../../extensions/image-generation-core/runtime-api.js", () => ({
|
||||
generateImage: mocks.generateImage,
|
||||
listRuntimeImageGenerationProviders: mocks.listRuntimeImageGenerationProviders,
|
||||
}));
|
||||
|
||||
describe("image-generation runtime facade", () => {
|
||||
afterEach(() => {
|
||||
mocks.generateImage.mockReset();
|
||||
mocks.listRuntimeImageGenerationProviders.mockReset();
|
||||
describe("image-generation runtime", () => {
|
||||
beforeEach(() => {
|
||||
mocks.createSubsystemLogger.mockClear();
|
||||
mocks.describeFailoverError.mockReset();
|
||||
mocks.getImageGenerationProvider.mockReset();
|
||||
mocks.getProviderEnvVars.mockReset();
|
||||
mocks.getProviderEnvVars.mockReturnValue([]);
|
||||
mocks.isFailoverError.mockReset();
|
||||
mocks.isFailoverError.mockReturnValue(false);
|
||||
mocks.listImageGenerationProviders.mockReset();
|
||||
mocks.listImageGenerationProviders.mockReturnValue([]);
|
||||
mocks.parseImageGenerationModelRef.mockClear();
|
||||
mocks.resolveAgentModelFallbackValues.mockReset();
|
||||
mocks.resolveAgentModelFallbackValues.mockReturnValue([]);
|
||||
mocks.resolveAgentModelPrimaryValue.mockReset();
|
||||
mocks.resolveAgentModelPrimaryValue.mockReturnValue(undefined);
|
||||
mocks.debug.mockReset();
|
||||
});
|
||||
|
||||
it("delegates image generation to the image runtime", async () => {
|
||||
const result: GenerateImageRuntimeResult = {
|
||||
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png", fileName: "sample.png" }],
|
||||
provider: "image-plugin",
|
||||
model: "img-v1",
|
||||
attempts: [],
|
||||
it("generates images through the active image-generation provider", async () => {
|
||||
const authStore = { version: 1, profiles: {} } as const;
|
||||
let seenAuthStore: unknown;
|
||||
mocks.resolveAgentModelPrimaryValue.mockReturnValue("image-plugin/img-v1");
|
||||
const provider: ImageGenerationProvider = {
|
||||
id: "image-plugin",
|
||||
capabilities: {
|
||||
generate: {},
|
||||
edit: { enabled: false },
|
||||
},
|
||||
async generateImage(req: { authStore?: unknown }) {
|
||||
seenAuthStore = req.authStore;
|
||||
return {
|
||||
images: [
|
||||
{
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
mimeType: "image/png",
|
||||
fileName: "sample.png",
|
||||
},
|
||||
],
|
||||
model: "img-v1",
|
||||
};
|
||||
},
|
||||
};
|
||||
mocks.generateImage.mockResolvedValue(result);
|
||||
const params = {
|
||||
mocks.getImageGenerationProvider.mockReturnValue(provider);
|
||||
|
||||
const result = await generateImage({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
|
|
@ -41,19 +115,58 @@ describe("image-generation runtime facade", () => {
|
|||
} as OpenClawConfig,
|
||||
prompt: "draw a cat",
|
||||
agentDir: "/tmp/agent",
|
||||
authStore: { version: 1, profiles: {} },
|
||||
};
|
||||
authStore,
|
||||
});
|
||||
|
||||
await expect(generateImage(params)).resolves.toBe(result);
|
||||
expect(mocks.generateImage).toHaveBeenCalledWith(params);
|
||||
expect(result.provider).toBe("image-plugin");
|
||||
expect(result.model).toBe("img-v1");
|
||||
expect(result.attempts).toEqual([]);
|
||||
expect(seenAuthStore).toEqual(authStore);
|
||||
expect(result.images).toEqual([
|
||||
{
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
mimeType: "image/png",
|
||||
fileName: "sample.png",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("delegates provider listing to the image runtime", () => {
|
||||
it("lists runtime image-generation providers through the provider registry", () => {
|
||||
const providers: ImageGenerationProvider[] = [
|
||||
{
|
||||
id: "image-plugin",
|
||||
defaultModel: "img-v1",
|
||||
models: ["img-v1", "img-v2"],
|
||||
capabilities: {
|
||||
generate: {
|
||||
supportsResolution: true,
|
||||
},
|
||||
edit: {
|
||||
enabled: true,
|
||||
maxInputImages: 3,
|
||||
},
|
||||
geometry: {
|
||||
resolutions: ["1K", "2K"],
|
||||
},
|
||||
},
|
||||
generateImage: async () => ({
|
||||
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
|
||||
}),
|
||||
},
|
||||
];
|
||||
mocks.listImageGenerationProviders.mockReturnValue(providers);
|
||||
|
||||
expect(listRuntimeImageGenerationProviders({ config: {} as OpenClawConfig })).toEqual(
|
||||
providers,
|
||||
);
|
||||
expect(mocks.listImageGenerationProviders).toHaveBeenCalledWith({} as OpenClawConfig);
|
||||
});
|
||||
|
||||
it("builds a generic config hint without hardcoded provider ids", async () => {
|
||||
mocks.listImageGenerationProviders.mockReturnValue([
|
||||
{
|
||||
id: "vision-one",
|
||||
defaultModel: "paint-v1",
|
||||
capabilities: {
|
||||
generate: {},
|
||||
edit: { enabled: false },
|
||||
|
|
@ -62,11 +175,35 @@ describe("image-generation runtime facade", () => {
|
|||
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
|
||||
}),
|
||||
},
|
||||
];
|
||||
mocks.listRuntimeImageGenerationProviders.mockReturnValue(providers);
|
||||
const params = { config: {} as OpenClawConfig };
|
||||
{
|
||||
id: "vision-two",
|
||||
defaultModel: "paint-v2",
|
||||
capabilities: {
|
||||
generate: {},
|
||||
edit: { enabled: false },
|
||||
},
|
||||
generateImage: async () => ({
|
||||
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
|
||||
}),
|
||||
},
|
||||
]);
|
||||
mocks.getProviderEnvVars.mockImplementation((providerId: string) => {
|
||||
if (providerId === "vision-one") {
|
||||
return ["VISION_ONE_API_KEY"];
|
||||
}
|
||||
if (providerId === "vision-two") {
|
||||
return ["VISION_TWO_API_KEY"];
|
||||
}
|
||||
return [];
|
||||
});
|
||||
|
||||
expect(listRuntimeImageGenerationProviders(params)).toBe(providers);
|
||||
expect(mocks.listRuntimeImageGenerationProviders).toHaveBeenCalledWith(params);
|
||||
const promise = generateImage({ cfg: {} as OpenClawConfig, prompt: "draw a cat" });
|
||||
|
||||
await expect(promise).rejects.toThrow("No image-generation model configured.");
|
||||
await expect(promise).rejects.toThrow(
|
||||
'Set agents.defaults.imageGenerationModel.primary to a provider/model like "vision-one/paint-v1".',
|
||||
);
|
||||
await expect(promise).rejects.toThrow("vision-one: VISION_ONE_API_KEY");
|
||||
await expect(promise).rejects.toThrow("vision-two: VISION_TWO_API_KEY");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,186 @@
|
|||
export {
|
||||
generateImage,
|
||||
listRuntimeImageGenerationProviders,
|
||||
type GenerateImageParams,
|
||||
type GenerateImageRuntimeResult,
|
||||
} from "../../extensions/image-generation-core/runtime-api.js";
|
||||
import type { AuthProfileStore } from "../agents/auth-profiles.js";
|
||||
import { describeFailoverError, isFailoverError } from "../agents/failover-error.js";
|
||||
import type { FallbackAttempt } from "../agents/model-fallback.types.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
resolveAgentModelFallbackValues,
|
||||
resolveAgentModelPrimaryValue,
|
||||
} from "../config/model-input.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { getProviderEnvVars } from "../secrets/provider-env-vars.js";
|
||||
import { parseImageGenerationModelRef } from "./model-ref.js";
|
||||
import { getImageGenerationProvider, listImageGenerationProviders } from "./provider-registry.js";
|
||||
import type {
|
||||
GeneratedImageAsset,
|
||||
ImageGenerationResolution,
|
||||
ImageGenerationResult,
|
||||
ImageGenerationSourceImage,
|
||||
} from "./types.js";
|
||||
|
||||
const log = createSubsystemLogger("image-generation");
|
||||
|
||||
export type GenerateImageParams = {
|
||||
cfg: OpenClawConfig;
|
||||
prompt: string;
|
||||
agentDir?: string;
|
||||
authStore?: AuthProfileStore;
|
||||
modelOverride?: string;
|
||||
count?: number;
|
||||
size?: string;
|
||||
aspectRatio?: string;
|
||||
resolution?: ImageGenerationResolution;
|
||||
inputImages?: ImageGenerationSourceImage[];
|
||||
};
|
||||
|
||||
export type GenerateImageRuntimeResult = {
|
||||
images: GeneratedImageAsset[];
|
||||
provider: string;
|
||||
model: string;
|
||||
attempts: FallbackAttempt[];
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
function resolveImageGenerationCandidates(params: {
|
||||
cfg: OpenClawConfig;
|
||||
modelOverride?: string;
|
||||
}): Array<{ provider: string; model: string }> {
|
||||
const candidates: Array<{ provider: string; model: string }> = [];
|
||||
const seen = new Set<string>();
|
||||
const add = (raw: string | undefined) => {
|
||||
const parsed = parseImageGenerationModelRef(raw);
|
||||
if (!parsed) {
|
||||
return;
|
||||
}
|
||||
const key = `${parsed.provider}/${parsed.model}`;
|
||||
if (seen.has(key)) {
|
||||
return;
|
||||
}
|
||||
seen.add(key);
|
||||
candidates.push(parsed);
|
||||
};
|
||||
|
||||
add(params.modelOverride);
|
||||
add(resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.imageGenerationModel));
|
||||
for (const fallback of resolveAgentModelFallbackValues(
|
||||
params.cfg.agents?.defaults?.imageGenerationModel,
|
||||
)) {
|
||||
add(fallback);
|
||||
}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function throwImageGenerationFailure(params: {
|
||||
attempts: FallbackAttempt[];
|
||||
lastError: unknown;
|
||||
}): never {
|
||||
if (params.attempts.length <= 1 && params.lastError) {
|
||||
throw params.lastError;
|
||||
}
|
||||
const summary =
|
||||
params.attempts.length > 0
|
||||
? params.attempts
|
||||
.map((attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`)
|
||||
.join(" | ")
|
||||
: "unknown";
|
||||
throw new Error(`All image generation models failed (${params.attempts.length}): ${summary}`, {
|
||||
cause: params.lastError instanceof Error ? params.lastError : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
function buildNoImageGenerationModelConfiguredMessage(cfg: OpenClawConfig): string {
|
||||
const providers = listImageGenerationProviders(cfg);
|
||||
const sampleModel = providers.find(
|
||||
(provider) => provider.id.trim().length > 0 && provider.defaultModel?.trim(),
|
||||
);
|
||||
const sampleRef = sampleModel
|
||||
? `${sampleModel.id}/${sampleModel.defaultModel}`
|
||||
: "<provider>/<model>";
|
||||
const authHints = providers
|
||||
.flatMap((provider) => {
|
||||
const envVars = getProviderEnvVars(provider.id);
|
||||
if (envVars.length === 0) {
|
||||
return [];
|
||||
}
|
||||
return [`${provider.id}: ${envVars.join(" / ")}`];
|
||||
})
|
||||
.slice(0, 3);
|
||||
return [
|
||||
`No image-generation model configured. Set agents.defaults.imageGenerationModel.primary to a provider/model like "${sampleRef}".`,
|
||||
authHints.length > 0
|
||||
? `If you want a specific provider, also configure that provider's auth/API key first (${authHints.join("; ")}).`
|
||||
: "If you want a specific provider, also configure that provider's auth/API key first.",
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
export function listRuntimeImageGenerationProviders(params?: { config?: OpenClawConfig }) {
|
||||
return listImageGenerationProviders(params?.config);
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
params: GenerateImageParams,
|
||||
): Promise<GenerateImageRuntimeResult> {
|
||||
const candidates = resolveImageGenerationCandidates({
|
||||
cfg: params.cfg,
|
||||
modelOverride: params.modelOverride,
|
||||
});
|
||||
if (candidates.length === 0) {
|
||||
throw new Error(buildNoImageGenerationModelConfiguredMessage(params.cfg));
|
||||
}
|
||||
|
||||
const attempts: FallbackAttempt[] = [];
|
||||
let lastError: unknown;
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const provider = getImageGenerationProvider(candidate.provider, params.cfg);
|
||||
if (!provider) {
|
||||
const error = `No image-generation provider registered for ${candidate.provider}`;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error,
|
||||
});
|
||||
lastError = new Error(error);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const result: ImageGenerationResult = await provider.generateImage({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
prompt: params.prompt,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
authStore: params.authStore,
|
||||
count: params.count,
|
||||
size: params.size,
|
||||
aspectRatio: params.aspectRatio,
|
||||
resolution: params.resolution,
|
||||
inputImages: params.inputImages,
|
||||
});
|
||||
if (!Array.isArray(result.images) || result.images.length === 0) {
|
||||
throw new Error("Image generation provider returned no images.");
|
||||
}
|
||||
return {
|
||||
images: result.images,
|
||||
provider: candidate.provider,
|
||||
model: result.model ?? candidate.model,
|
||||
attempts,
|
||||
metadata: result.metadata,
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
const described = isFailoverError(err) ? describeFailoverError(err) : undefined;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error: described?.message ?? (err instanceof Error ? err.message : String(err)),
|
||||
reason: described?.reason,
|
||||
status: described?.status,
|
||||
code: described?.code,
|
||||
});
|
||||
log.debug(`image-generation candidate failed: ${candidate.provider}/${candidate.model}`);
|
||||
}
|
||||
}
|
||||
|
||||
throwImageGenerationFailure({ attempts, lastError });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,11 @@ export type ImageGenerationSourceImage = {
|
|||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export type ImageGenerationProviderConfiguredContext = {
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
};
|
||||
|
||||
export type ImageGenerationRequest = {
|
||||
provider: string;
|
||||
model: string;
|
||||
|
|
@ -70,5 +75,6 @@ export type ImageGenerationProvider = {
|
|||
defaultModel?: string;
|
||||
models?: string[];
|
||||
capabilities: ImageGenerationProviderCapabilities;
|
||||
isConfigured?: (ctx: ImageGenerationProviderConfiguredContext) => boolean;
|
||||
generateImage: (req: ImageGenerationRequest) => Promise<ImageGenerationResult>;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,90 +1,97 @@
|
|||
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import type { MediaUnderstandingOutput } from "../media-understanding/types.js";
|
||||
import { describeImageFile, runMediaUnderstandingFile } from "./runtime.js";
|
||||
|
||||
const hoisted = vi.hoisted(() => ({
|
||||
describeImageFile: vi.fn(),
|
||||
runMediaUnderstandingFile: vi.fn(),
|
||||
const mocks = vi.hoisted(() => {
|
||||
const cleanup = vi.fn(async () => {});
|
||||
return {
|
||||
buildProviderRegistry: vi.fn(() => new Map()),
|
||||
createMediaAttachmentCache: vi.fn(() => ({ cleanup })),
|
||||
normalizeMediaAttachments: vi.fn(() => []),
|
||||
normalizeMediaProviderId: vi.fn((provider: string) => provider.trim().toLowerCase()),
|
||||
runCapability: vi.fn(),
|
||||
cleanup,
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("../plugin-sdk/media-runtime.js", () => ({
|
||||
buildProviderRegistry: mocks.buildProviderRegistry,
|
||||
createMediaAttachmentCache: mocks.createMediaAttachmentCache,
|
||||
normalizeMediaAttachments: mocks.normalizeMediaAttachments,
|
||||
normalizeMediaProviderId: mocks.normalizeMediaProviderId,
|
||||
runCapability: mocks.runCapability,
|
||||
}));
|
||||
|
||||
vi.mock("../../extensions/media-understanding-core/runtime-api.js", () => ({
|
||||
describeImageFile: hoisted.describeImageFile,
|
||||
describeImageFileWithModel: vi.fn(),
|
||||
describeVideoFile: vi.fn(),
|
||||
runMediaUnderstandingFile: hoisted.runMediaUnderstandingFile,
|
||||
transcribeAudioFile: vi.fn(),
|
||||
}));
|
||||
|
||||
let describeImageFile: typeof import("./runtime.js").describeImageFile;
|
||||
let runMediaUnderstandingFile: typeof import("./runtime.js").runMediaUnderstandingFile;
|
||||
|
||||
describe("media-understanding runtime facade", () => {
|
||||
beforeAll(async () => {
|
||||
({ describeImageFile, runMediaUnderstandingFile } = await import("./runtime.js"));
|
||||
});
|
||||
|
||||
describe("media-understanding runtime", () => {
|
||||
afterEach(() => {
|
||||
hoisted.describeImageFile.mockReset();
|
||||
hoisted.runMediaUnderstandingFile.mockReset();
|
||||
mocks.buildProviderRegistry.mockReset();
|
||||
mocks.createMediaAttachmentCache.mockReset();
|
||||
mocks.normalizeMediaAttachments.mockReset();
|
||||
mocks.normalizeMediaProviderId.mockReset();
|
||||
mocks.runCapability.mockReset();
|
||||
mocks.cleanup.mockReset();
|
||||
mocks.cleanup.mockResolvedValue(undefined);
|
||||
});
|
||||
|
||||
it("delegates describeImageFile to the shared media-understanding runtime", async () => {
|
||||
const params = {
|
||||
filePath: "/tmp/sample.jpg",
|
||||
mime: "image/jpeg",
|
||||
cfg: {
|
||||
tools: {
|
||||
media: {
|
||||
image: {
|
||||
models: [{ provider: "vision-plugin", model: "vision-v1" }],
|
||||
it("returns disabled state without loading providers", async () => {
|
||||
mocks.normalizeMediaAttachments.mockReturnValue([{ kind: "image" }]);
|
||||
|
||||
await expect(
|
||||
runMediaUnderstandingFile({
|
||||
capability: "image",
|
||||
filePath: "/tmp/sample.jpg",
|
||||
mime: "image/jpeg",
|
||||
cfg: {
|
||||
tools: {
|
||||
media: {
|
||||
image: {
|
||||
enabled: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
agentDir: "/tmp/agent",
|
||||
};
|
||||
const result = {
|
||||
text: "image ok",
|
||||
provider: "vision-plugin",
|
||||
model: "vision-v1",
|
||||
output: {
|
||||
kind: "image.description" as const,
|
||||
attachmentIndex: 0,
|
||||
text: "image ok",
|
||||
provider: "vision-plugin",
|
||||
model: "vision-v1",
|
||||
},
|
||||
};
|
||||
hoisted.describeImageFile.mockResolvedValue(result);
|
||||
|
||||
await expect(describeImageFile(params)).resolves.toEqual(result);
|
||||
expect(hoisted.describeImageFile).toHaveBeenCalledWith(params);
|
||||
});
|
||||
|
||||
it("delegates runMediaUnderstandingFile to the shared media-understanding runtime", async () => {
|
||||
const params = {
|
||||
capability: "image" as const,
|
||||
filePath: "/tmp/sample.jpg",
|
||||
mime: "image/jpeg",
|
||||
cfg: {
|
||||
tools: {
|
||||
media: {
|
||||
image: {
|
||||
enabled: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
agentDir: "/tmp/agent",
|
||||
};
|
||||
const result = {
|
||||
} as OpenClawConfig,
|
||||
agentDir: "/tmp/agent",
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
text: undefined,
|
||||
provider: undefined,
|
||||
model: undefined,
|
||||
output: undefined,
|
||||
};
|
||||
hoisted.runMediaUnderstandingFile.mockResolvedValue(result);
|
||||
});
|
||||
|
||||
await expect(runMediaUnderstandingFile(params)).resolves.toEqual(result);
|
||||
expect(hoisted.runMediaUnderstandingFile).toHaveBeenCalledWith(params);
|
||||
expect(mocks.buildProviderRegistry).not.toHaveBeenCalled();
|
||||
expect(mocks.runCapability).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("returns the matching capability output", async () => {
|
||||
const output: MediaUnderstandingOutput = {
|
||||
kind: "image.description",
|
||||
attachmentIndex: 0,
|
||||
provider: "vision-plugin",
|
||||
model: "vision-v1",
|
||||
text: "image ok",
|
||||
};
|
||||
mocks.normalizeMediaAttachments.mockReturnValue([{ kind: "image" }]);
|
||||
mocks.runCapability.mockResolvedValue({
|
||||
outputs: [output],
|
||||
});
|
||||
|
||||
await expect(
|
||||
describeImageFile({
|
||||
filePath: "/tmp/sample.jpg",
|
||||
mime: "image/jpeg",
|
||||
cfg: {} as OpenClawConfig,
|
||||
agentDir: "/tmp/agent",
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
text: "image ok",
|
||||
provider: "vision-plugin",
|
||||
model: "vision-v1",
|
||||
output,
|
||||
});
|
||||
|
||||
expect(mocks.runCapability).toHaveBeenCalledTimes(1);
|
||||
expect(mocks.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,9 +1,156 @@
|
|||
export {
|
||||
describeImageFile,
|
||||
describeImageFileWithModel,
|
||||
describeVideoFile,
|
||||
runMediaUnderstandingFile,
|
||||
transcribeAudioFile,
|
||||
type RunMediaUnderstandingFileParams,
|
||||
type RunMediaUnderstandingFileResult,
|
||||
} from "../../extensions/media-understanding-core/runtime-api.js";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
normalizeMediaProviderId,
|
||||
runCapability,
|
||||
type ActiveMediaModel,
|
||||
} from "../plugin-sdk/media-runtime.js";
|
||||
|
||||
type MediaUnderstandingCapability = "image" | "audio" | "video";
|
||||
type MediaUnderstandingOutput = Awaited<ReturnType<typeof runCapability>>["outputs"][number];
|
||||
|
||||
const KIND_BY_CAPABILITY: Record<MediaUnderstandingCapability, MediaUnderstandingOutput["kind"]> = {
|
||||
audio: "audio.transcription",
|
||||
image: "image.description",
|
||||
video: "video.description",
|
||||
};
|
||||
|
||||
export type RunMediaUnderstandingFileParams = {
|
||||
capability: MediaUnderstandingCapability;
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
};
|
||||
|
||||
export type RunMediaUnderstandingFileResult = {
|
||||
text: string | undefined;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
output?: MediaUnderstandingOutput;
|
||||
};
|
||||
|
||||
function buildFileContext(params: { filePath: string; mime?: string }) {
|
||||
return {
|
||||
MediaPath: params.filePath,
|
||||
MediaType: params.mime,
|
||||
};
|
||||
}
|
||||
|
||||
export async function runMediaUnderstandingFile(
|
||||
params: RunMediaUnderstandingFileParams,
|
||||
): Promise<RunMediaUnderstandingFileResult> {
|
||||
const ctx = buildFileContext(params);
|
||||
const attachments = normalizeMediaAttachments(ctx);
|
||||
if (attachments.length === 0) {
|
||||
return { text: undefined };
|
||||
}
|
||||
const config = params.cfg.tools?.media?.[params.capability];
|
||||
if (config?.enabled === false) {
|
||||
return {
|
||||
text: undefined,
|
||||
provider: undefined,
|
||||
model: undefined,
|
||||
output: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
const providerRegistry = buildProviderRegistry(undefined, params.cfg);
|
||||
const cache = createMediaAttachmentCache(attachments, {
|
||||
localPathRoots: [path.dirname(params.filePath)],
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await runCapability({
|
||||
capability: params.capability,
|
||||
cfg: params.cfg,
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media: attachments,
|
||||
agentDir: params.agentDir,
|
||||
providerRegistry,
|
||||
config,
|
||||
activeModel: params.activeModel,
|
||||
});
|
||||
const output = result.outputs.find(
|
||||
(entry) => entry.kind === KIND_BY_CAPABILITY[params.capability],
|
||||
);
|
||||
const text = output?.text?.trim();
|
||||
return {
|
||||
text: text || undefined,
|
||||
provider: output?.provider,
|
||||
model: output?.model,
|
||||
output,
|
||||
};
|
||||
} finally {
|
||||
await cache.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
export async function describeImageFile(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
}): Promise<RunMediaUnderstandingFileResult> {
|
||||
return await runMediaUnderstandingFile({ ...params, capability: "image" });
|
||||
}
|
||||
|
||||
export async function describeImageFileWithModel(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
prompt: string;
|
||||
maxTokens?: number;
|
||||
timeoutMs?: number;
|
||||
}) {
|
||||
const timeoutMs = params.timeoutMs ?? 30_000;
|
||||
const providerRegistry = buildProviderRegistry(undefined, params.cfg);
|
||||
const provider = providerRegistry.get(normalizeMediaProviderId(params.provider));
|
||||
if (!provider?.describeImage) {
|
||||
throw new Error(`Provider does not support image analysis: ${params.provider}`);
|
||||
}
|
||||
const buffer = await fs.readFile(params.filePath);
|
||||
return await provider.describeImage({
|
||||
buffer,
|
||||
fileName: path.basename(params.filePath),
|
||||
mime: params.mime,
|
||||
provider: params.provider,
|
||||
model: params.model,
|
||||
prompt: params.prompt,
|
||||
maxTokens: params.maxTokens,
|
||||
timeoutMs,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir ?? "",
|
||||
});
|
||||
}
|
||||
|
||||
export async function describeVideoFile(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
}): Promise<RunMediaUnderstandingFileResult> {
|
||||
return await runMediaUnderstandingFile({ ...params, capability: "video" });
|
||||
}
|
||||
|
||||
export async function transcribeAudioFile(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
activeModel?: ActiveMediaModel;
|
||||
}): Promise<{ text: string | undefined }> {
|
||||
const result = await runMediaUnderstandingFile({ ...params, capability: "audio" });
|
||||
return { text: result.text };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ export type { ImageGenerationProviderPlugin } from "../plugins/types.js";
|
|||
export type {
|
||||
GeneratedImageAsset,
|
||||
ImageGenerationProvider,
|
||||
ImageGenerationProviderConfiguredContext,
|
||||
ImageGenerationResolution,
|
||||
ImageGenerationRequest,
|
||||
ImageGenerationResult,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
export type {
|
||||
GeneratedImageAsset,
|
||||
ImageGenerationProvider,
|
||||
ImageGenerationProviderConfiguredContext,
|
||||
ImageGenerationResolution,
|
||||
ImageGenerationRequest,
|
||||
ImageGenerationResult,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
// Public auth/onboarding helpers for provider plugins.
|
||||
|
||||
import { listProfilesForProvider } from "../agents/auth-profiles/profiles.js";
|
||||
import { ensureAuthProfileStore } from "../agents/auth-profiles/store.js";
|
||||
import { resolveEnvApiKey } from "../agents/model-auth-env.js";
|
||||
|
||||
export type { OpenClawConfig } from "../config/config.js";
|
||||
export type { SecretInput } from "../config/types.secrets.js";
|
||||
export type { ProviderAuthResult } from "../plugins/types.js";
|
||||
|
|
@ -13,6 +17,7 @@ export {
|
|||
upsertAuthProfile,
|
||||
upsertAuthProfileWithLock,
|
||||
} from "../agents/auth-profiles/profiles.js";
|
||||
export { resolveEnvApiKey } from "../agents/model-auth-env.js";
|
||||
export { readClaudeCliCredentialsCached } from "../agents/cli-credentials.js";
|
||||
export { suggestOAuthProfileIdForLegacyDefault } from "../agents/auth-profiles/repair.js";
|
||||
export {
|
||||
|
|
@ -51,3 +56,20 @@ export {
|
|||
} from "../secrets/provider-env-vars.js";
|
||||
export { buildOauthProviderAuthResult } from "./provider-auth-result.js";
|
||||
export { generatePkceVerifierChallenge, toFormUrlEncoded } from "./oauth-utils.js";
|
||||
|
||||
export function isProviderApiKeyConfigured(params: {
|
||||
provider: string;
|
||||
agentDir?: string;
|
||||
}): boolean {
|
||||
if (resolveEnvApiKey(params.provider)?.apiKey) {
|
||||
return true;
|
||||
}
|
||||
const agentDir = params.agentDir?.trim();
|
||||
if (!agentDir) {
|
||||
return false;
|
||||
}
|
||||
const store = ensureAuthProfileStore(agentDir, {
|
||||
allowKeychainPrompt: false,
|
||||
});
|
||||
return listProfilesForProvider(store, params.provider).length > 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ export type { VideoGenerationProviderPlugin } from "../plugins/types.js";
|
|||
export type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationProviderConfiguredContext,
|
||||
VideoGenerationRequest,
|
||||
VideoGenerationResolution,
|
||||
VideoGenerationResult,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
export type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationProviderConfiguredContext,
|
||||
VideoGenerationRequest,
|
||||
VideoGenerationResolution,
|
||||
VideoGenerationResult,
|
||||
|
|
|
|||
|
|
@ -1,37 +1,108 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import type { VideoGenerationProvider } from "../video-generation/types.js";
|
||||
import {
|
||||
generateVideo,
|
||||
listRuntimeVideoGenerationProviders,
|
||||
type GenerateVideoRuntimeResult,
|
||||
} from "./runtime.js";
|
||||
import { generateVideo, listRuntimeVideoGenerationProviders } from "./runtime.js";
|
||||
import type { VideoGenerationProvider } from "./types.js";
|
||||
|
||||
const mocks = vi.hoisted(() => ({
|
||||
generateVideo: vi.fn<typeof generateVideo>(),
|
||||
listRuntimeVideoGenerationProviders: vi.fn<typeof listRuntimeVideoGenerationProviders>(),
|
||||
const mocks = vi.hoisted(() => {
|
||||
const debug = vi.fn();
|
||||
return {
|
||||
createSubsystemLogger: vi.fn(() => ({ debug })),
|
||||
describeFailoverError: vi.fn(),
|
||||
getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []),
|
||||
getVideoGenerationProvider: vi.fn<
|
||||
(providerId: string, config?: OpenClawConfig) => VideoGenerationProvider | undefined
|
||||
>(() => undefined),
|
||||
isFailoverError: vi.fn<(err: unknown) => boolean>(() => false),
|
||||
listVideoGenerationProviders: vi.fn<(config?: OpenClawConfig) => VideoGenerationProvider[]>(
|
||||
() => [],
|
||||
),
|
||||
parseVideoGenerationModelRef: vi.fn<
|
||||
(raw?: string) => { provider: string; model: string } | undefined
|
||||
>((raw?: string) => {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
const slash = trimmed.indexOf("/");
|
||||
if (slash <= 0 || slash === trimmed.length - 1) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
provider: trimmed.slice(0, slash),
|
||||
model: trimmed.slice(slash + 1),
|
||||
};
|
||||
}),
|
||||
resolveAgentModelFallbackValues: vi.fn<(value: unknown) => string[]>(() => []),
|
||||
resolveAgentModelPrimaryValue: vi.fn<(value: unknown) => string | undefined>(() => undefined),
|
||||
debug,
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("../agents/failover-error.js", () => ({
|
||||
describeFailoverError: mocks.describeFailoverError,
|
||||
isFailoverError: mocks.isFailoverError,
|
||||
}));
|
||||
vi.mock("../config/model-input.js", () => ({
|
||||
resolveAgentModelFallbackValues: mocks.resolveAgentModelFallbackValues,
|
||||
resolveAgentModelPrimaryValue: mocks.resolveAgentModelPrimaryValue,
|
||||
}));
|
||||
vi.mock("../logging/subsystem.js", () => ({
|
||||
createSubsystemLogger: mocks.createSubsystemLogger,
|
||||
}));
|
||||
vi.mock("../secrets/provider-env-vars.js", () => ({
|
||||
getProviderEnvVars: mocks.getProviderEnvVars,
|
||||
}));
|
||||
vi.mock("./model-ref.js", () => ({
|
||||
parseVideoGenerationModelRef: mocks.parseVideoGenerationModelRef,
|
||||
}));
|
||||
vi.mock("./provider-registry.js", () => ({
|
||||
getVideoGenerationProvider: mocks.getVideoGenerationProvider,
|
||||
listVideoGenerationProviders: mocks.listVideoGenerationProviders,
|
||||
}));
|
||||
|
||||
vi.mock("../../extensions/video-generation-core/runtime-api.js", () => ({
|
||||
generateVideo: mocks.generateVideo,
|
||||
listRuntimeVideoGenerationProviders: mocks.listRuntimeVideoGenerationProviders,
|
||||
}));
|
||||
|
||||
describe("video-generation runtime facade", () => {
|
||||
afterEach(() => {
|
||||
mocks.generateVideo.mockReset();
|
||||
mocks.listRuntimeVideoGenerationProviders.mockReset();
|
||||
describe("video-generation runtime", () => {
|
||||
beforeEach(() => {
|
||||
mocks.createSubsystemLogger.mockClear();
|
||||
mocks.describeFailoverError.mockReset();
|
||||
mocks.getProviderEnvVars.mockReset();
|
||||
mocks.getProviderEnvVars.mockReturnValue([]);
|
||||
mocks.getVideoGenerationProvider.mockReset();
|
||||
mocks.isFailoverError.mockReset();
|
||||
mocks.isFailoverError.mockReturnValue(false);
|
||||
mocks.listVideoGenerationProviders.mockReset();
|
||||
mocks.listVideoGenerationProviders.mockReturnValue([]);
|
||||
mocks.parseVideoGenerationModelRef.mockClear();
|
||||
mocks.resolveAgentModelFallbackValues.mockReset();
|
||||
mocks.resolveAgentModelFallbackValues.mockReturnValue([]);
|
||||
mocks.resolveAgentModelPrimaryValue.mockReset();
|
||||
mocks.resolveAgentModelPrimaryValue.mockReturnValue(undefined);
|
||||
mocks.debug.mockReset();
|
||||
});
|
||||
|
||||
it("delegates video generation to the shared video-generation runtime", async () => {
|
||||
const result: GenerateVideoRuntimeResult = {
|
||||
videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4", fileName: "sample.mp4" }],
|
||||
provider: "video-plugin",
|
||||
model: "vid-v1",
|
||||
attempts: [],
|
||||
it("generates videos through the active video-generation provider", async () => {
|
||||
const authStore = { version: 1, profiles: {} } as const;
|
||||
let seenAuthStore: unknown;
|
||||
mocks.resolveAgentModelPrimaryValue.mockReturnValue("video-plugin/vid-v1");
|
||||
const provider: VideoGenerationProvider = {
|
||||
id: "video-plugin",
|
||||
capabilities: {},
|
||||
async generateVideo(req: { authStore?: unknown }) {
|
||||
seenAuthStore = req.authStore;
|
||||
return {
|
||||
videos: [
|
||||
{
|
||||
buffer: Buffer.from("mp4-bytes"),
|
||||
mimeType: "video/mp4",
|
||||
fileName: "sample.mp4",
|
||||
},
|
||||
],
|
||||
model: "vid-v1",
|
||||
};
|
||||
},
|
||||
};
|
||||
mocks.generateVideo.mockResolvedValue(result);
|
||||
const params = {
|
||||
mocks.getVideoGenerationProvider.mockReturnValue(provider);
|
||||
|
||||
const result = await generateVideo({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
|
|
@ -41,21 +112,29 @@ describe("video-generation runtime facade", () => {
|
|||
} as OpenClawConfig,
|
||||
prompt: "animate a cat",
|
||||
agentDir: "/tmp/agent",
|
||||
authStore: { version: 1, profiles: {} },
|
||||
};
|
||||
authStore,
|
||||
});
|
||||
|
||||
await expect(generateVideo(params)).resolves.toBe(result);
|
||||
expect(mocks.generateVideo).toHaveBeenCalledWith(params);
|
||||
expect(result.provider).toBe("video-plugin");
|
||||
expect(result.model).toBe("vid-v1");
|
||||
expect(result.attempts).toEqual([]);
|
||||
expect(seenAuthStore).toEqual(authStore);
|
||||
expect(result.videos).toEqual([
|
||||
{
|
||||
buffer: Buffer.from("mp4-bytes"),
|
||||
mimeType: "video/mp4",
|
||||
fileName: "sample.mp4",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("delegates provider listing to the shared video-generation runtime", () => {
|
||||
it("lists runtime video-generation providers through the provider registry", () => {
|
||||
const providers: VideoGenerationProvider[] = [
|
||||
{
|
||||
id: "video-plugin",
|
||||
defaultModel: "vid-v1",
|
||||
models: ["vid-v1", "vid-v2"],
|
||||
models: ["vid-v1"],
|
||||
capabilities: {
|
||||
maxDurationSeconds: 10,
|
||||
supportsAudio: true,
|
||||
},
|
||||
generateVideo: async () => ({
|
||||
|
|
@ -63,10 +142,33 @@ describe("video-generation runtime facade", () => {
|
|||
}),
|
||||
},
|
||||
];
|
||||
mocks.listRuntimeVideoGenerationProviders.mockReturnValue(providers);
|
||||
const params = { config: {} as OpenClawConfig };
|
||||
mocks.listVideoGenerationProviders.mockReturnValue(providers);
|
||||
|
||||
expect(listRuntimeVideoGenerationProviders(params)).toBe(providers);
|
||||
expect(mocks.listRuntimeVideoGenerationProviders).toHaveBeenCalledWith(params);
|
||||
expect(listRuntimeVideoGenerationProviders({ config: {} as OpenClawConfig })).toEqual(
|
||||
providers,
|
||||
);
|
||||
expect(mocks.listVideoGenerationProviders).toHaveBeenCalledWith({} as OpenClawConfig);
|
||||
});
|
||||
|
||||
it("builds a generic config hint without hardcoded provider ids", async () => {
|
||||
mocks.listVideoGenerationProviders.mockReturnValue([
|
||||
{
|
||||
id: "motion-one",
|
||||
defaultModel: "animate-v1",
|
||||
capabilities: {},
|
||||
generateVideo: async () => ({
|
||||
videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }],
|
||||
}),
|
||||
},
|
||||
]);
|
||||
mocks.getProviderEnvVars.mockReturnValue(["MOTION_ONE_API_KEY"]);
|
||||
|
||||
const promise = generateVideo({ cfg: {} as OpenClawConfig, prompt: "animate a cat" });
|
||||
|
||||
await expect(promise).rejects.toThrow("No video-generation model configured.");
|
||||
await expect(promise).rejects.toThrow(
|
||||
'Set agents.defaults.videoGenerationModel.primary to a provider/model like "motion-one/animate-v1".',
|
||||
);
|
||||
await expect(promise).rejects.toThrow("motion-one: MOTION_ONE_API_KEY");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,192 @@
|
|||
export {
|
||||
generateVideo,
|
||||
listRuntimeVideoGenerationProviders,
|
||||
type GenerateVideoParams,
|
||||
type GenerateVideoRuntimeResult,
|
||||
} from "../../extensions/video-generation-core/runtime-api.js";
|
||||
import type { AuthProfileStore } from "../agents/auth-profiles.js";
|
||||
import { describeFailoverError, isFailoverError } from "../agents/failover-error.js";
|
||||
import type { FallbackAttempt } from "../agents/model-fallback.types.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
resolveAgentModelFallbackValues,
|
||||
resolveAgentModelPrimaryValue,
|
||||
} from "../config/model-input.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { getProviderEnvVars } from "../secrets/provider-env-vars.js";
|
||||
import { parseVideoGenerationModelRef } from "./model-ref.js";
|
||||
import { getVideoGenerationProvider, listVideoGenerationProviders } from "./provider-registry.js";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationResolution,
|
||||
VideoGenerationResult,
|
||||
VideoGenerationSourceAsset,
|
||||
} from "./types.js";
|
||||
|
||||
const log = createSubsystemLogger("video-generation");
|
||||
|
||||
export type GenerateVideoParams = {
|
||||
cfg: OpenClawConfig;
|
||||
prompt: string;
|
||||
agentDir?: string;
|
||||
authStore?: AuthProfileStore;
|
||||
modelOverride?: string;
|
||||
size?: string;
|
||||
aspectRatio?: string;
|
||||
resolution?: VideoGenerationResolution;
|
||||
durationSeconds?: number;
|
||||
audio?: boolean;
|
||||
watermark?: boolean;
|
||||
inputImages?: VideoGenerationSourceAsset[];
|
||||
inputVideos?: VideoGenerationSourceAsset[];
|
||||
};
|
||||
|
||||
export type GenerateVideoRuntimeResult = {
|
||||
videos: GeneratedVideoAsset[];
|
||||
provider: string;
|
||||
model: string;
|
||||
attempts: FallbackAttempt[];
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
function resolveVideoGenerationCandidates(params: {
|
||||
cfg: OpenClawConfig;
|
||||
modelOverride?: string;
|
||||
}): Array<{ provider: string; model: string }> {
|
||||
const candidates: Array<{ provider: string; model: string }> = [];
|
||||
const seen = new Set<string>();
|
||||
const add = (raw: string | undefined) => {
|
||||
const parsed = parseVideoGenerationModelRef(raw);
|
||||
if (!parsed) {
|
||||
return;
|
||||
}
|
||||
const key = `${parsed.provider}/${parsed.model}`;
|
||||
if (seen.has(key)) {
|
||||
return;
|
||||
}
|
||||
seen.add(key);
|
||||
candidates.push(parsed);
|
||||
};
|
||||
|
||||
add(params.modelOverride);
|
||||
add(resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.videoGenerationModel));
|
||||
for (const fallback of resolveAgentModelFallbackValues(
|
||||
params.cfg.agents?.defaults?.videoGenerationModel,
|
||||
)) {
|
||||
add(fallback);
|
||||
}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function throwVideoGenerationFailure(params: {
|
||||
attempts: FallbackAttempt[];
|
||||
lastError: unknown;
|
||||
}): never {
|
||||
if (params.attempts.length <= 1 && params.lastError) {
|
||||
throw params.lastError;
|
||||
}
|
||||
const summary =
|
||||
params.attempts.length > 0
|
||||
? params.attempts
|
||||
.map((attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`)
|
||||
.join(" | ")
|
||||
: "unknown";
|
||||
throw new Error(`All video generation models failed (${params.attempts.length}): ${summary}`, {
|
||||
cause: params.lastError instanceof Error ? params.lastError : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
function buildNoVideoGenerationModelConfiguredMessage(cfg: OpenClawConfig): string {
|
||||
const providers = listVideoGenerationProviders(cfg);
|
||||
const sampleModel = providers.find(
|
||||
(provider) => provider.id.trim().length > 0 && provider.defaultModel?.trim(),
|
||||
);
|
||||
const sampleRef = sampleModel
|
||||
? `${sampleModel.id}/${sampleModel.defaultModel}`
|
||||
: "<provider>/<model>";
|
||||
const authHints = providers
|
||||
.flatMap((provider) => {
|
||||
const envVars = getProviderEnvVars(provider.id);
|
||||
if (envVars.length === 0) {
|
||||
return [];
|
||||
}
|
||||
return [`${provider.id}: ${envVars.join(" / ")}`];
|
||||
})
|
||||
.slice(0, 3);
|
||||
return [
|
||||
`No video-generation model configured. Set agents.defaults.videoGenerationModel.primary to a provider/model like "${sampleRef}".`,
|
||||
authHints.length > 0
|
||||
? `If you want a specific provider, also configure that provider's auth/API key first (${authHints.join("; ")}).`
|
||||
: "If you want a specific provider, also configure that provider's auth/API key first.",
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
export function listRuntimeVideoGenerationProviders(params?: { config?: OpenClawConfig }) {
|
||||
return listVideoGenerationProviders(params?.config);
|
||||
}
|
||||
|
||||
export async function generateVideo(
|
||||
params: GenerateVideoParams,
|
||||
): Promise<GenerateVideoRuntimeResult> {
|
||||
const candidates = resolveVideoGenerationCandidates({
|
||||
cfg: params.cfg,
|
||||
modelOverride: params.modelOverride,
|
||||
});
|
||||
if (candidates.length === 0) {
|
||||
throw new Error(buildNoVideoGenerationModelConfiguredMessage(params.cfg));
|
||||
}
|
||||
|
||||
const attempts: FallbackAttempt[] = [];
|
||||
let lastError: unknown;
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const provider = getVideoGenerationProvider(candidate.provider, params.cfg);
|
||||
if (!provider) {
|
||||
const error = `No video-generation provider registered for ${candidate.provider}`;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error,
|
||||
});
|
||||
lastError = new Error(error);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const result: VideoGenerationResult = await provider.generateVideo({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
prompt: params.prompt,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
authStore: params.authStore,
|
||||
size: params.size,
|
||||
aspectRatio: params.aspectRatio,
|
||||
resolution: params.resolution,
|
||||
durationSeconds: params.durationSeconds,
|
||||
audio: params.audio,
|
||||
watermark: params.watermark,
|
||||
inputImages: params.inputImages,
|
||||
inputVideos: params.inputVideos,
|
||||
});
|
||||
if (!Array.isArray(result.videos) || result.videos.length === 0) {
|
||||
throw new Error("Video generation provider returned no videos.");
|
||||
}
|
||||
return {
|
||||
videos: result.videos,
|
||||
provider: candidate.provider,
|
||||
model: result.model ?? candidate.model,
|
||||
attempts,
|
||||
metadata: result.metadata,
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
const described = isFailoverError(err) ? describeFailoverError(err) : undefined;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error: described?.message ?? (err instanceof Error ? err.message : String(err)),
|
||||
reason: described?.reason,
|
||||
status: described?.status,
|
||||
code: described?.code,
|
||||
});
|
||||
log.debug(`video-generation candidate failed: ${candidate.provider}/${candidate.model}`);
|
||||
}
|
||||
}
|
||||
|
||||
throwVideoGenerationFailure({ attempts, lastError });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,11 @@ export type VideoGenerationSourceAsset = {
|
|||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export type VideoGenerationProviderConfiguredContext = {
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
};
|
||||
|
||||
export type VideoGenerationRequest = {
|
||||
provider: string;
|
||||
model: string;
|
||||
|
|
@ -61,5 +66,6 @@ export type VideoGenerationProvider = {
|
|||
defaultModel?: string;
|
||||
models?: string[];
|
||||
capabilities: VideoGenerationProviderCapabilities;
|
||||
isConfigured?: (ctx: VideoGenerationProviderConfiguredContext) => boolean;
|
||||
generateVideo: (req: VideoGenerationRequest) => Promise<VideoGenerationResult>;
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue