From 9f2b760d336269dee9c0e71fdb1211ae7b439ec9 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 5 Apr 2026 15:13:08 +0100 Subject: [PATCH] refactor: move media generation runtimes into core --- extensions/fal/image-generation-provider.ts | 6 + .../google/image-generation-provider.ts | 6 + .../minimax/image-generation-provider.ts | 6 + .../openai/image-generation-provider.ts | 6 + extensions/qwen/video-generation-provider.ts | 6 + src/agents/tools/image-generate-tool.ts | 68 +++++- src/agents/tools/model-config.helpers.ts | 6 +- src/image-generation/runtime.test.ts | 211 +++++++++++++++--- src/image-generation/runtime.ts | 192 +++++++++++++++- src/image-generation/types.ts | 6 + src/media-understanding/runtime.test.ts | 155 +++++++------ src/media-understanding/runtime.ts | 165 +++++++++++++- src/plugin-sdk/image-generation-core.ts | 1 + src/plugin-sdk/image-generation.ts | 1 + src/plugin-sdk/provider-auth.ts | 22 ++ src/plugin-sdk/video-generation-core.ts | 1 + src/plugin-sdk/video-generation.ts | 1 + src/video-generation/runtime.test.ts | 178 +++++++++++---- src/video-generation/runtime.ts | 198 +++++++++++++++- src/video-generation/types.ts | 6 + 20 files changed, 1062 insertions(+), 179 deletions(-) diff --git a/extensions/fal/image-generation-provider.ts b/extensions/fal/image-generation-provider.ts index a27c3006bef..707b28eea94 100644 --- a/extensions/fal/image-generation-provider.ts +++ b/extensions/fal/image-generation-provider.ts @@ -2,6 +2,7 @@ import type { GeneratedImageAsset, ImageGenerationProvider, } from "openclaw/plugin-sdk/image-generation"; +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; import { assertOkOrThrowHttpError, @@ -294,6 +295,11 @@ export function buildFalImageGenerationProvider(): ImageGenerationProvider { label: "fal", defaultModel: DEFAULT_FAL_IMAGE_MODEL, models: [DEFAULT_FAL_IMAGE_MODEL, `${DEFAULT_FAL_IMAGE_MODEL}/${DEFAULT_FAL_EDIT_SUBPATH}`], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: "fal", + agentDir, + }), capabilities: { generate: { maxCount: 4, diff --git a/extensions/google/image-generation-provider.ts b/extensions/google/image-generation-provider.ts index 80cefa77d5f..07bb7046da8 100644 --- a/extensions/google/image-generation-provider.ts +++ b/extensions/google/image-generation-provider.ts @@ -1,4 +1,5 @@ import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation"; +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; import { assertOkOrThrowHttpError, postJsonRequest } from "openclaw/plugin-sdk/provider-http"; import { normalizeGoogleModelId, resolveGoogleGenerativeAiHttpRequestConfig } from "./api.js"; @@ -88,6 +89,11 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProvider { label: "Google", defaultModel: DEFAULT_GOOGLE_IMAGE_MODEL, models: [DEFAULT_GOOGLE_IMAGE_MODEL, "gemini-3-pro-image-preview"], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: "google", + agentDir, + }), capabilities: { generate: { maxCount: 4, diff --git a/extensions/minimax/image-generation-provider.ts b/extensions/minimax/image-generation-provider.ts index 416cc3d31dd..24d80c712f9 100644 --- a/extensions/minimax/image-generation-provider.ts +++ b/extensions/minimax/image-generation-provider.ts @@ -1,4 +1,5 @@ import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation"; +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; import { assertOkOrThrowHttpError, @@ -57,6 +58,11 @@ function buildMinimaxImageProvider(providerId: string): ImageGenerationProvider label: "MiniMax", defaultModel: DEFAULT_MODEL, models: [DEFAULT_MODEL], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: providerId, + agentDir, + }), capabilities: { generate: { maxCount: 9, diff --git a/extensions/openai/image-generation-provider.ts b/extensions/openai/image-generation-provider.ts index e0286d20337..2f8a4035fe6 100644 --- a/extensions/openai/image-generation-provider.ts +++ b/extensions/openai/image-generation-provider.ts @@ -1,4 +1,5 @@ import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation"; +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; import { assertOkOrThrowHttpError, @@ -49,6 +50,11 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider { label: "OpenAI", defaultModel: DEFAULT_OPENAI_IMAGE_MODEL, models: [DEFAULT_OPENAI_IMAGE_MODEL], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: "openai", + agentDir, + }), capabilities: { generate: { maxCount: 4, diff --git a/extensions/qwen/video-generation-provider.ts b/extensions/qwen/video-generation-provider.ts index 5ed249be535..8db83a4604f 100644 --- a/extensions/qwen/video-generation-provider.ts +++ b/extensions/qwen/video-generation-provider.ts @@ -1,3 +1,4 @@ +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; import { assertOkOrThrowHttpError, @@ -204,6 +205,11 @@ export function buildQwenVideoGenerationProvider(): VideoGenerationProvider { label: "Qwen Cloud", defaultModel: DEFAULT_QWEN_VIDEO_MODEL, models: ["wan2.6-t2v", "wan2.6-i2v", "wan2.6-r2v", "wan2.6-r2v-flash", "wan2.7-r2v"], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: "qwen", + agentDir, + }), capabilities: { maxVideos: 1, maxInputImages: 1, diff --git a/src/agents/tools/image-generate-tool.ts b/src/agents/tools/image-generate-tool.ts index d84d6207a42..0b07032b7a4 100644 --- a/src/agents/tools/image-generate-tool.ts +++ b/src/agents/tools/image-generate-tool.ts @@ -26,6 +26,7 @@ import { import { buildToolModelConfigFromCandidates, coerceToolModelConfig, + hasAuthForProvider, hasToolModelConfig, resolveDefaultModelRef, type ToolModelConfig, @@ -113,20 +114,30 @@ function getImageGenerationProviderAuthEnvVars(providerId: string): string[] { return getProviderEnvVars(providerId); } -function resolveImageGenerationModelCandidates( - cfg: OpenClawConfig | undefined, -): Array { +function resolveImageGenerationModelCandidates(params: { + cfg?: OpenClawConfig; + agentDir?: string; +}): Array { const providerDefaults = new Map(); - for (const provider of listRuntimeImageGenerationProviders({ config: cfg })) { + for (const provider of listRuntimeImageGenerationProviders({ config: params.cfg })) { const providerId = provider.id.trim(); const modelId = provider.defaultModel?.trim(); - if (!providerId || !modelId || providerDefaults.has(providerId)) { + if ( + !providerId || + !modelId || + providerDefaults.has(providerId) || + !isImageGenerationProviderConfigured({ + provider, + cfg: params.cfg, + agentDir: params.agentDir, + }) + ) { continue; } providerDefaults.set(providerId, `${providerId}/${modelId}`); } - const primaryProvider = resolveDefaultModelRef(cfg).provider; + const primaryProvider = resolveDefaultModelRef(params.cfg).provider; const orderedProviders = [ primaryProvider, ...[...providerDefaults.keys()] @@ -157,10 +168,45 @@ export function resolveImageGenerationModelConfigForTool(params: { return buildToolModelConfigFromCandidates({ explicit, agentDir: params.agentDir, - candidates: resolveImageGenerationModelCandidates(params.cfg), + candidates: resolveImageGenerationModelCandidates(params), + isProviderConfigured: (providerId) => + isImageGenerationProviderConfigured({ + providerId, + cfg: params.cfg, + agentDir: params.agentDir, + }), }); } +function isImageGenerationProviderConfigured(params: { + provider?: ImageGenerationProvider; + providerId?: string; + cfg?: OpenClawConfig; + agentDir?: string; +}): boolean { + const provider = + params.provider ?? + listRuntimeImageGenerationProviders({ config: params.cfg }).find((candidate) => { + const normalizedId = normalizeProviderId(params.providerId ?? ""); + return ( + normalizeProviderId(candidate.id) === normalizedId || + (candidate.aliases ?? []).some((alias) => normalizeProviderId(alias) === normalizedId) + ); + }); + if (!provider) { + return params.providerId + ? hasAuthForProvider({ provider: params.providerId, agentDir: params.agentDir }) + : false; + } + if (provider.isConfigured) { + return provider.isConfigured({ + cfg: params.cfg, + agentDir: params.agentDir, + }); + } + return hasAuthForProvider({ provider: provider.id, agentDir: params.agentDir }); +} + function resolveAction(args: Record): "generate" | "list" { const raw = readStringParam(args, "action"); if (!raw) { @@ -497,7 +543,7 @@ export function createImageGenerateTool(options?: { label: "Image Generation", name: "image_generate", description: - 'Generate new images or edit reference images with the configured or inferred image-generation model. Set agents.defaults.imageGenerationModel.primary to pick a provider/model. If you want openai/*, google/*, fal/*, or another provider, configure that provider auth/API key first. Use action="list" to inspect available providers, models, and auth hints. Generated images are delivered automatically from the tool result as MEDIA paths.', + 'Generate new images or edit reference images with the configured or inferred image-generation model. Set agents.defaults.imageGenerationModel.primary to pick a provider/model. Providers declare their own auth/readiness; use action="list" to inspect registered providers, models, readiness, and auth hints. Generated images are delivered automatically from the tool result as MEDIA paths.', parameters: ImageGenerateToolSchema, execute: async (_toolCallId, args) => { const params = args as Record; @@ -509,6 +555,11 @@ export function createImageGenerateTool(options?: { ...(provider.label ? { label: provider.label } : {}), ...(provider.defaultModel ? { defaultModel: provider.defaultModel } : {}), models: provider.models ?? (provider.defaultModel ? [provider.defaultModel] : []), + configured: isImageGenerationProviderConfigured({ + provider, + cfg: effectiveCfg, + agentDir: options?.agentDir, + }), authEnvVars: getImageGenerationProviderAuthEnvVars(provider.id), capabilities: provider.capabilities, }), @@ -537,6 +588,7 @@ export function createImageGenerateTool(options?: { return [ `${provider.id}${provider.defaultModel ? ` (default ${provider.defaultModel})` : ""}`, ` ${modelLine}`, + ` configured: ${provider.configured ? "yes" : "no"}`, ...(provider.authEnvVars.length > 0 ? [` auth: set ${provider.authEnvVars.join(" / ")} to use ${provider.id}/*`] : []), diff --git a/src/agents/tools/model-config.helpers.ts b/src/agents/tools/model-config.helpers.ts index 3d6700c90f7..8ba5b8ea283 100644 --- a/src/agents/tools/model-config.helpers.ts +++ b/src/agents/tools/model-config.helpers.ts @@ -56,6 +56,7 @@ export function buildToolModelConfigFromCandidates(params: { explicit: ToolModelConfig; agentDir?: string; candidates: Array; + isProviderConfigured?: (provider: string) => boolean; }): ToolModelConfig | null { if (hasToolModelConfig(params.explicit)) { return params.explicit; @@ -68,7 +69,10 @@ export function buildToolModelConfigFromCandidates(params: { continue; } const provider = trimmed.slice(0, trimmed.indexOf("/")).trim(); - if (!provider || !hasAuthForProvider({ provider, agentDir: params.agentDir })) { + const providerConfigured = + params.isProviderConfigured?.(provider) ?? + hasAuthForProvider({ provider, agentDir: params.agentDir }); + if (!provider || !providerConfigured) { continue; } if (!deduped.includes(trimmed)) { diff --git a/src/image-generation/runtime.test.ts b/src/image-generation/runtime.test.ts index 6ff58c4833d..5423b864dc6 100644 --- a/src/image-generation/runtime.test.ts +++ b/src/image-generation/runtime.test.ts @@ -1,37 +1,111 @@ -import { afterEach, describe, expect, it, vi } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; -import type { ImageGenerationProvider } from "../image-generation/types.js"; -import { - generateImage, - listRuntimeImageGenerationProviders, - type GenerateImageRuntimeResult, -} from "./runtime.js"; +import { generateImage, listRuntimeImageGenerationProviders } from "./runtime.js"; +import type { ImageGenerationProvider } from "./types.js"; -const mocks = vi.hoisted(() => ({ - generateImage: vi.fn(), - listRuntimeImageGenerationProviders: vi.fn(), +const mocks = vi.hoisted(() => { + const debug = vi.fn(); + return { + createSubsystemLogger: vi.fn(() => ({ debug })), + describeFailoverError: vi.fn(), + getImageGenerationProvider: vi.fn< + (providerId: string, config?: OpenClawConfig) => ImageGenerationProvider | undefined + >(() => undefined), + getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []), + isFailoverError: vi.fn<(err: unknown) => boolean>(() => false), + listImageGenerationProviders: vi.fn<(config?: OpenClawConfig) => ImageGenerationProvider[]>( + () => [], + ), + parseImageGenerationModelRef: vi.fn< + (raw?: string) => { provider: string; model: string } | undefined + >((raw?: string) => { + const trimmed = raw?.trim(); + if (!trimmed) { + return undefined; + } + const slash = trimmed.indexOf("/"); + if (slash <= 0 || slash === trimmed.length - 1) { + return undefined; + } + return { + provider: trimmed.slice(0, slash), + model: trimmed.slice(slash + 1), + }; + }), + resolveAgentModelFallbackValues: vi.fn<(value: unknown) => string[]>(() => []), + resolveAgentModelPrimaryValue: vi.fn<(value: unknown) => string | undefined>(() => undefined), + debug, + }; +}); + +vi.mock("../agents/failover-error.js", () => ({ + describeFailoverError: mocks.describeFailoverError, + isFailoverError: mocks.isFailoverError, +})); +vi.mock("../config/model-input.js", () => ({ + resolveAgentModelFallbackValues: mocks.resolveAgentModelFallbackValues, + resolveAgentModelPrimaryValue: mocks.resolveAgentModelPrimaryValue, +})); +vi.mock("../logging/subsystem.js", () => ({ + createSubsystemLogger: mocks.createSubsystemLogger, +})); +vi.mock("../secrets/provider-env-vars.js", () => ({ + getProviderEnvVars: mocks.getProviderEnvVars, +})); +vi.mock("./model-ref.js", () => ({ + parseImageGenerationModelRef: mocks.parseImageGenerationModelRef, +})); +vi.mock("./provider-registry.js", () => ({ + getImageGenerationProvider: mocks.getImageGenerationProvider, + listImageGenerationProviders: mocks.listImageGenerationProviders, })); -vi.mock("../../extensions/image-generation-core/runtime-api.js", () => ({ - generateImage: mocks.generateImage, - listRuntimeImageGenerationProviders: mocks.listRuntimeImageGenerationProviders, -})); - -describe("image-generation runtime facade", () => { - afterEach(() => { - mocks.generateImage.mockReset(); - mocks.listRuntimeImageGenerationProviders.mockReset(); +describe("image-generation runtime", () => { + beforeEach(() => { + mocks.createSubsystemLogger.mockClear(); + mocks.describeFailoverError.mockReset(); + mocks.getImageGenerationProvider.mockReset(); + mocks.getProviderEnvVars.mockReset(); + mocks.getProviderEnvVars.mockReturnValue([]); + mocks.isFailoverError.mockReset(); + mocks.isFailoverError.mockReturnValue(false); + mocks.listImageGenerationProviders.mockReset(); + mocks.listImageGenerationProviders.mockReturnValue([]); + mocks.parseImageGenerationModelRef.mockClear(); + mocks.resolveAgentModelFallbackValues.mockReset(); + mocks.resolveAgentModelFallbackValues.mockReturnValue([]); + mocks.resolveAgentModelPrimaryValue.mockReset(); + mocks.resolveAgentModelPrimaryValue.mockReturnValue(undefined); + mocks.debug.mockReset(); }); - it("delegates image generation to the image runtime", async () => { - const result: GenerateImageRuntimeResult = { - images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png", fileName: "sample.png" }], - provider: "image-plugin", - model: "img-v1", - attempts: [], + it("generates images through the active image-generation provider", async () => { + const authStore = { version: 1, profiles: {} } as const; + let seenAuthStore: unknown; + mocks.resolveAgentModelPrimaryValue.mockReturnValue("image-plugin/img-v1"); + const provider: ImageGenerationProvider = { + id: "image-plugin", + capabilities: { + generate: {}, + edit: { enabled: false }, + }, + async generateImage(req: { authStore?: unknown }) { + seenAuthStore = req.authStore; + return { + images: [ + { + buffer: Buffer.from("png-bytes"), + mimeType: "image/png", + fileName: "sample.png", + }, + ], + model: "img-v1", + }; + }, }; - mocks.generateImage.mockResolvedValue(result); - const params = { + mocks.getImageGenerationProvider.mockReturnValue(provider); + + const result = await generateImage({ cfg: { agents: { defaults: { @@ -41,19 +115,58 @@ describe("image-generation runtime facade", () => { } as OpenClawConfig, prompt: "draw a cat", agentDir: "/tmp/agent", - authStore: { version: 1, profiles: {} }, - }; + authStore, + }); - await expect(generateImage(params)).resolves.toBe(result); - expect(mocks.generateImage).toHaveBeenCalledWith(params); + expect(result.provider).toBe("image-plugin"); + expect(result.model).toBe("img-v1"); + expect(result.attempts).toEqual([]); + expect(seenAuthStore).toEqual(authStore); + expect(result.images).toEqual([ + { + buffer: Buffer.from("png-bytes"), + mimeType: "image/png", + fileName: "sample.png", + }, + ]); }); - it("delegates provider listing to the image runtime", () => { + it("lists runtime image-generation providers through the provider registry", () => { const providers: ImageGenerationProvider[] = [ { id: "image-plugin", defaultModel: "img-v1", models: ["img-v1", "img-v2"], + capabilities: { + generate: { + supportsResolution: true, + }, + edit: { + enabled: true, + maxInputImages: 3, + }, + geometry: { + resolutions: ["1K", "2K"], + }, + }, + generateImage: async () => ({ + images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }], + }), + }, + ]; + mocks.listImageGenerationProviders.mockReturnValue(providers); + + expect(listRuntimeImageGenerationProviders({ config: {} as OpenClawConfig })).toEqual( + providers, + ); + expect(mocks.listImageGenerationProviders).toHaveBeenCalledWith({} as OpenClawConfig); + }); + + it("builds a generic config hint without hardcoded provider ids", async () => { + mocks.listImageGenerationProviders.mockReturnValue([ + { + id: "vision-one", + defaultModel: "paint-v1", capabilities: { generate: {}, edit: { enabled: false }, @@ -62,11 +175,35 @@ describe("image-generation runtime facade", () => { images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }], }), }, - ]; - mocks.listRuntimeImageGenerationProviders.mockReturnValue(providers); - const params = { config: {} as OpenClawConfig }; + { + id: "vision-two", + defaultModel: "paint-v2", + capabilities: { + generate: {}, + edit: { enabled: false }, + }, + generateImage: async () => ({ + images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }], + }), + }, + ]); + mocks.getProviderEnvVars.mockImplementation((providerId: string) => { + if (providerId === "vision-one") { + return ["VISION_ONE_API_KEY"]; + } + if (providerId === "vision-two") { + return ["VISION_TWO_API_KEY"]; + } + return []; + }); - expect(listRuntimeImageGenerationProviders(params)).toBe(providers); - expect(mocks.listRuntimeImageGenerationProviders).toHaveBeenCalledWith(params); + const promise = generateImage({ cfg: {} as OpenClawConfig, prompt: "draw a cat" }); + + await expect(promise).rejects.toThrow("No image-generation model configured."); + await expect(promise).rejects.toThrow( + 'Set agents.defaults.imageGenerationModel.primary to a provider/model like "vision-one/paint-v1".', + ); + await expect(promise).rejects.toThrow("vision-one: VISION_ONE_API_KEY"); + await expect(promise).rejects.toThrow("vision-two: VISION_TWO_API_KEY"); }); }); diff --git a/src/image-generation/runtime.ts b/src/image-generation/runtime.ts index 460a50f6efb..c8056b756e8 100644 --- a/src/image-generation/runtime.ts +++ b/src/image-generation/runtime.ts @@ -1,6 +1,186 @@ -export { - generateImage, - listRuntimeImageGenerationProviders, - type GenerateImageParams, - type GenerateImageRuntimeResult, -} from "../../extensions/image-generation-core/runtime-api.js"; +import type { AuthProfileStore } from "../agents/auth-profiles.js"; +import { describeFailoverError, isFailoverError } from "../agents/failover-error.js"; +import type { FallbackAttempt } from "../agents/model-fallback.types.js"; +import type { OpenClawConfig } from "../config/config.js"; +import { + resolveAgentModelFallbackValues, + resolveAgentModelPrimaryValue, +} from "../config/model-input.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; +import { getProviderEnvVars } from "../secrets/provider-env-vars.js"; +import { parseImageGenerationModelRef } from "./model-ref.js"; +import { getImageGenerationProvider, listImageGenerationProviders } from "./provider-registry.js"; +import type { + GeneratedImageAsset, + ImageGenerationResolution, + ImageGenerationResult, + ImageGenerationSourceImage, +} from "./types.js"; + +const log = createSubsystemLogger("image-generation"); + +export type GenerateImageParams = { + cfg: OpenClawConfig; + prompt: string; + agentDir?: string; + authStore?: AuthProfileStore; + modelOverride?: string; + count?: number; + size?: string; + aspectRatio?: string; + resolution?: ImageGenerationResolution; + inputImages?: ImageGenerationSourceImage[]; +}; + +export type GenerateImageRuntimeResult = { + images: GeneratedImageAsset[]; + provider: string; + model: string; + attempts: FallbackAttempt[]; + metadata?: Record; +}; + +function resolveImageGenerationCandidates(params: { + cfg: OpenClawConfig; + modelOverride?: string; +}): Array<{ provider: string; model: string }> { + const candidates: Array<{ provider: string; model: string }> = []; + const seen = new Set(); + const add = (raw: string | undefined) => { + const parsed = parseImageGenerationModelRef(raw); + if (!parsed) { + return; + } + const key = `${parsed.provider}/${parsed.model}`; + if (seen.has(key)) { + return; + } + seen.add(key); + candidates.push(parsed); + }; + + add(params.modelOverride); + add(resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.imageGenerationModel)); + for (const fallback of resolveAgentModelFallbackValues( + params.cfg.agents?.defaults?.imageGenerationModel, + )) { + add(fallback); + } + return candidates; +} + +function throwImageGenerationFailure(params: { + attempts: FallbackAttempt[]; + lastError: unknown; +}): never { + if (params.attempts.length <= 1 && params.lastError) { + throw params.lastError; + } + const summary = + params.attempts.length > 0 + ? params.attempts + .map((attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`) + .join(" | ") + : "unknown"; + throw new Error(`All image generation models failed (${params.attempts.length}): ${summary}`, { + cause: params.lastError instanceof Error ? params.lastError : undefined, + }); +} + +function buildNoImageGenerationModelConfiguredMessage(cfg: OpenClawConfig): string { + const providers = listImageGenerationProviders(cfg); + const sampleModel = providers.find( + (provider) => provider.id.trim().length > 0 && provider.defaultModel?.trim(), + ); + const sampleRef = sampleModel + ? `${sampleModel.id}/${sampleModel.defaultModel}` + : "/"; + const authHints = providers + .flatMap((provider) => { + const envVars = getProviderEnvVars(provider.id); + if (envVars.length === 0) { + return []; + } + return [`${provider.id}: ${envVars.join(" / ")}`]; + }) + .slice(0, 3); + return [ + `No image-generation model configured. Set agents.defaults.imageGenerationModel.primary to a provider/model like "${sampleRef}".`, + authHints.length > 0 + ? `If you want a specific provider, also configure that provider's auth/API key first (${authHints.join("; ")}).` + : "If you want a specific provider, also configure that provider's auth/API key first.", + ].join(" "); +} + +export function listRuntimeImageGenerationProviders(params?: { config?: OpenClawConfig }) { + return listImageGenerationProviders(params?.config); +} + +export async function generateImage( + params: GenerateImageParams, +): Promise { + const candidates = resolveImageGenerationCandidates({ + cfg: params.cfg, + modelOverride: params.modelOverride, + }); + if (candidates.length === 0) { + throw new Error(buildNoImageGenerationModelConfiguredMessage(params.cfg)); + } + + const attempts: FallbackAttempt[] = []; + let lastError: unknown; + + for (const candidate of candidates) { + const provider = getImageGenerationProvider(candidate.provider, params.cfg); + if (!provider) { + const error = `No image-generation provider registered for ${candidate.provider}`; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error, + }); + lastError = new Error(error); + continue; + } + + try { + const result: ImageGenerationResult = await provider.generateImage({ + provider: candidate.provider, + model: candidate.model, + prompt: params.prompt, + cfg: params.cfg, + agentDir: params.agentDir, + authStore: params.authStore, + count: params.count, + size: params.size, + aspectRatio: params.aspectRatio, + resolution: params.resolution, + inputImages: params.inputImages, + }); + if (!Array.isArray(result.images) || result.images.length === 0) { + throw new Error("Image generation provider returned no images."); + } + return { + images: result.images, + provider: candidate.provider, + model: result.model ?? candidate.model, + attempts, + metadata: result.metadata, + }; + } catch (err) { + lastError = err; + const described = isFailoverError(err) ? describeFailoverError(err) : undefined; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error: described?.message ?? (err instanceof Error ? err.message : String(err)), + reason: described?.reason, + status: described?.status, + code: described?.code, + }); + log.debug(`image-generation candidate failed: ${candidate.provider}/${candidate.model}`); + } + } + + throwImageGenerationFailure({ attempts, lastError }); +} diff --git a/src/image-generation/types.ts b/src/image-generation/types.ts index 8e1a8fa0136..7a97deb5e95 100644 --- a/src/image-generation/types.ts +++ b/src/image-generation/types.ts @@ -18,6 +18,11 @@ export type ImageGenerationSourceImage = { metadata?: Record; }; +export type ImageGenerationProviderConfiguredContext = { + cfg?: OpenClawConfig; + agentDir?: string; +}; + export type ImageGenerationRequest = { provider: string; model: string; @@ -70,5 +75,6 @@ export type ImageGenerationProvider = { defaultModel?: string; models?: string[]; capabilities: ImageGenerationProviderCapabilities; + isConfigured?: (ctx: ImageGenerationProviderConfiguredContext) => boolean; generateImage: (req: ImageGenerationRequest) => Promise; }; diff --git a/src/media-understanding/runtime.test.ts b/src/media-understanding/runtime.test.ts index 2a4abc7479f..c6bb1cf2167 100644 --- a/src/media-understanding/runtime.test.ts +++ b/src/media-understanding/runtime.test.ts @@ -1,90 +1,97 @@ -import { afterEach, beforeAll, describe, expect, it, vi } from "vitest"; +import { afterEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; +import type { MediaUnderstandingOutput } from "../media-understanding/types.js"; +import { describeImageFile, runMediaUnderstandingFile } from "./runtime.js"; -const hoisted = vi.hoisted(() => ({ - describeImageFile: vi.fn(), - runMediaUnderstandingFile: vi.fn(), +const mocks = vi.hoisted(() => { + const cleanup = vi.fn(async () => {}); + return { + buildProviderRegistry: vi.fn(() => new Map()), + createMediaAttachmentCache: vi.fn(() => ({ cleanup })), + normalizeMediaAttachments: vi.fn(() => []), + normalizeMediaProviderId: vi.fn((provider: string) => provider.trim().toLowerCase()), + runCapability: vi.fn(), + cleanup, + }; +}); + +vi.mock("../plugin-sdk/media-runtime.js", () => ({ + buildProviderRegistry: mocks.buildProviderRegistry, + createMediaAttachmentCache: mocks.createMediaAttachmentCache, + normalizeMediaAttachments: mocks.normalizeMediaAttachments, + normalizeMediaProviderId: mocks.normalizeMediaProviderId, + runCapability: mocks.runCapability, })); -vi.mock("../../extensions/media-understanding-core/runtime-api.js", () => ({ - describeImageFile: hoisted.describeImageFile, - describeImageFileWithModel: vi.fn(), - describeVideoFile: vi.fn(), - runMediaUnderstandingFile: hoisted.runMediaUnderstandingFile, - transcribeAudioFile: vi.fn(), -})); - -let describeImageFile: typeof import("./runtime.js").describeImageFile; -let runMediaUnderstandingFile: typeof import("./runtime.js").runMediaUnderstandingFile; - -describe("media-understanding runtime facade", () => { - beforeAll(async () => { - ({ describeImageFile, runMediaUnderstandingFile } = await import("./runtime.js")); - }); - +describe("media-understanding runtime", () => { afterEach(() => { - hoisted.describeImageFile.mockReset(); - hoisted.runMediaUnderstandingFile.mockReset(); + mocks.buildProviderRegistry.mockReset(); + mocks.createMediaAttachmentCache.mockReset(); + mocks.normalizeMediaAttachments.mockReset(); + mocks.normalizeMediaProviderId.mockReset(); + mocks.runCapability.mockReset(); + mocks.cleanup.mockReset(); + mocks.cleanup.mockResolvedValue(undefined); }); - it("delegates describeImageFile to the shared media-understanding runtime", async () => { - const params = { - filePath: "/tmp/sample.jpg", - mime: "image/jpeg", - cfg: { - tools: { - media: { - image: { - models: [{ provider: "vision-plugin", model: "vision-v1" }], + it("returns disabled state without loading providers", async () => { + mocks.normalizeMediaAttachments.mockReturnValue([{ kind: "image" }]); + + await expect( + runMediaUnderstandingFile({ + capability: "image", + filePath: "/tmp/sample.jpg", + mime: "image/jpeg", + cfg: { + tools: { + media: { + image: { + enabled: false, + }, }, }, - }, - } as OpenClawConfig, - agentDir: "/tmp/agent", - }; - const result = { - text: "image ok", - provider: "vision-plugin", - model: "vision-v1", - output: { - kind: "image.description" as const, - attachmentIndex: 0, - text: "image ok", - provider: "vision-plugin", - model: "vision-v1", - }, - }; - hoisted.describeImageFile.mockResolvedValue(result); - - await expect(describeImageFile(params)).resolves.toEqual(result); - expect(hoisted.describeImageFile).toHaveBeenCalledWith(params); - }); - - it("delegates runMediaUnderstandingFile to the shared media-understanding runtime", async () => { - const params = { - capability: "image" as const, - filePath: "/tmp/sample.jpg", - mime: "image/jpeg", - cfg: { - tools: { - media: { - image: { - enabled: false, - }, - }, - }, - } as OpenClawConfig, - agentDir: "/tmp/agent", - }; - const result = { + } as OpenClawConfig, + agentDir: "/tmp/agent", + }), + ).resolves.toEqual({ text: undefined, provider: undefined, model: undefined, output: undefined, - }; - hoisted.runMediaUnderstandingFile.mockResolvedValue(result); + }); - await expect(runMediaUnderstandingFile(params)).resolves.toEqual(result); - expect(hoisted.runMediaUnderstandingFile).toHaveBeenCalledWith(params); + expect(mocks.buildProviderRegistry).not.toHaveBeenCalled(); + expect(mocks.runCapability).not.toHaveBeenCalled(); + }); + + it("returns the matching capability output", async () => { + const output: MediaUnderstandingOutput = { + kind: "image.description", + attachmentIndex: 0, + provider: "vision-plugin", + model: "vision-v1", + text: "image ok", + }; + mocks.normalizeMediaAttachments.mockReturnValue([{ kind: "image" }]); + mocks.runCapability.mockResolvedValue({ + outputs: [output], + }); + + await expect( + describeImageFile({ + filePath: "/tmp/sample.jpg", + mime: "image/jpeg", + cfg: {} as OpenClawConfig, + agentDir: "/tmp/agent", + }), + ).resolves.toEqual({ + text: "image ok", + provider: "vision-plugin", + model: "vision-v1", + output, + }); + + expect(mocks.runCapability).toHaveBeenCalledTimes(1); + expect(mocks.cleanup).toHaveBeenCalledTimes(1); }); }); diff --git a/src/media-understanding/runtime.ts b/src/media-understanding/runtime.ts index e5d3bc653ae..9c7b647957f 100644 --- a/src/media-understanding/runtime.ts +++ b/src/media-understanding/runtime.ts @@ -1,9 +1,156 @@ -export { - describeImageFile, - describeImageFileWithModel, - describeVideoFile, - runMediaUnderstandingFile, - transcribeAudioFile, - type RunMediaUnderstandingFileParams, - type RunMediaUnderstandingFileResult, -} from "../../extensions/media-understanding-core/runtime-api.js"; +import fs from "node:fs/promises"; +import path from "node:path"; +import type { OpenClawConfig } from "../config/config.js"; +import { + buildProviderRegistry, + createMediaAttachmentCache, + normalizeMediaAttachments, + normalizeMediaProviderId, + runCapability, + type ActiveMediaModel, +} from "../plugin-sdk/media-runtime.js"; + +type MediaUnderstandingCapability = "image" | "audio" | "video"; +type MediaUnderstandingOutput = Awaited>["outputs"][number]; + +const KIND_BY_CAPABILITY: Record = { + audio: "audio.transcription", + image: "image.description", + video: "video.description", +}; + +export type RunMediaUnderstandingFileParams = { + capability: MediaUnderstandingCapability; + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + activeModel?: ActiveMediaModel; +}; + +export type RunMediaUnderstandingFileResult = { + text: string | undefined; + provider?: string; + model?: string; + output?: MediaUnderstandingOutput; +}; + +function buildFileContext(params: { filePath: string; mime?: string }) { + return { + MediaPath: params.filePath, + MediaType: params.mime, + }; +} + +export async function runMediaUnderstandingFile( + params: RunMediaUnderstandingFileParams, +): Promise { + const ctx = buildFileContext(params); + const attachments = normalizeMediaAttachments(ctx); + if (attachments.length === 0) { + return { text: undefined }; + } + const config = params.cfg.tools?.media?.[params.capability]; + if (config?.enabled === false) { + return { + text: undefined, + provider: undefined, + model: undefined, + output: undefined, + }; + } + + const providerRegistry = buildProviderRegistry(undefined, params.cfg); + const cache = createMediaAttachmentCache(attachments, { + localPathRoots: [path.dirname(params.filePath)], + }); + + try { + const result = await runCapability({ + capability: params.capability, + cfg: params.cfg, + ctx, + attachments: cache, + media: attachments, + agentDir: params.agentDir, + providerRegistry, + config, + activeModel: params.activeModel, + }); + const output = result.outputs.find( + (entry) => entry.kind === KIND_BY_CAPABILITY[params.capability], + ); + const text = output?.text?.trim(); + return { + text: text || undefined, + provider: output?.provider, + model: output?.model, + output, + }; + } finally { + await cache.cleanup(); + } +} + +export async function describeImageFile(params: { + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + activeModel?: ActiveMediaModel; +}): Promise { + return await runMediaUnderstandingFile({ ...params, capability: "image" }); +} + +export async function describeImageFileWithModel(params: { + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + provider: string; + model: string; + prompt: string; + maxTokens?: number; + timeoutMs?: number; +}) { + const timeoutMs = params.timeoutMs ?? 30_000; + const providerRegistry = buildProviderRegistry(undefined, params.cfg); + const provider = providerRegistry.get(normalizeMediaProviderId(params.provider)); + if (!provider?.describeImage) { + throw new Error(`Provider does not support image analysis: ${params.provider}`); + } + const buffer = await fs.readFile(params.filePath); + return await provider.describeImage({ + buffer, + fileName: path.basename(params.filePath), + mime: params.mime, + provider: params.provider, + model: params.model, + prompt: params.prompt, + maxTokens: params.maxTokens, + timeoutMs, + cfg: params.cfg, + agentDir: params.agentDir ?? "", + }); +} + +export async function describeVideoFile(params: { + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + activeModel?: ActiveMediaModel; +}): Promise { + return await runMediaUnderstandingFile({ ...params, capability: "video" }); +} + +export async function transcribeAudioFile(params: { + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + activeModel?: ActiveMediaModel; +}): Promise<{ text: string | undefined }> { + const result = await runMediaUnderstandingFile({ ...params, capability: "audio" }); + return { text: result.text }; +} diff --git a/src/plugin-sdk/image-generation-core.ts b/src/plugin-sdk/image-generation-core.ts index 63ab55718a7..65670f92408 100644 --- a/src/plugin-sdk/image-generation-core.ts +++ b/src/plugin-sdk/image-generation-core.ts @@ -6,6 +6,7 @@ export type { ImageGenerationProviderPlugin } from "../plugins/types.js"; export type { GeneratedImageAsset, ImageGenerationProvider, + ImageGenerationProviderConfiguredContext, ImageGenerationResolution, ImageGenerationRequest, ImageGenerationResult, diff --git a/src/plugin-sdk/image-generation.ts b/src/plugin-sdk/image-generation.ts index 0c37a9ece7f..4c0f01c30c5 100644 --- a/src/plugin-sdk/image-generation.ts +++ b/src/plugin-sdk/image-generation.ts @@ -3,6 +3,7 @@ export type { GeneratedImageAsset, ImageGenerationProvider, + ImageGenerationProviderConfiguredContext, ImageGenerationResolution, ImageGenerationRequest, ImageGenerationResult, diff --git a/src/plugin-sdk/provider-auth.ts b/src/plugin-sdk/provider-auth.ts index ede72ff6660..43b60a613ec 100644 --- a/src/plugin-sdk/provider-auth.ts +++ b/src/plugin-sdk/provider-auth.ts @@ -1,5 +1,9 @@ // Public auth/onboarding helpers for provider plugins. +import { listProfilesForProvider } from "../agents/auth-profiles/profiles.js"; +import { ensureAuthProfileStore } from "../agents/auth-profiles/store.js"; +import { resolveEnvApiKey } from "../agents/model-auth-env.js"; + export type { OpenClawConfig } from "../config/config.js"; export type { SecretInput } from "../config/types.secrets.js"; export type { ProviderAuthResult } from "../plugins/types.js"; @@ -13,6 +17,7 @@ export { upsertAuthProfile, upsertAuthProfileWithLock, } from "../agents/auth-profiles/profiles.js"; +export { resolveEnvApiKey } from "../agents/model-auth-env.js"; export { readClaudeCliCredentialsCached } from "../agents/cli-credentials.js"; export { suggestOAuthProfileIdForLegacyDefault } from "../agents/auth-profiles/repair.js"; export { @@ -51,3 +56,20 @@ export { } from "../secrets/provider-env-vars.js"; export { buildOauthProviderAuthResult } from "./provider-auth-result.js"; export { generatePkceVerifierChallenge, toFormUrlEncoded } from "./oauth-utils.js"; + +export function isProviderApiKeyConfigured(params: { + provider: string; + agentDir?: string; +}): boolean { + if (resolveEnvApiKey(params.provider)?.apiKey) { + return true; + } + const agentDir = params.agentDir?.trim(); + if (!agentDir) { + return false; + } + const store = ensureAuthProfileStore(agentDir, { + allowKeychainPrompt: false, + }); + return listProfilesForProvider(store, params.provider).length > 0; +} diff --git a/src/plugin-sdk/video-generation-core.ts b/src/plugin-sdk/video-generation-core.ts index cb735cc7ceb..65c9a153388 100644 --- a/src/plugin-sdk/video-generation-core.ts +++ b/src/plugin-sdk/video-generation-core.ts @@ -6,6 +6,7 @@ export type { VideoGenerationProviderPlugin } from "../plugins/types.js"; export type { GeneratedVideoAsset, VideoGenerationProvider, + VideoGenerationProviderConfiguredContext, VideoGenerationRequest, VideoGenerationResolution, VideoGenerationResult, diff --git a/src/plugin-sdk/video-generation.ts b/src/plugin-sdk/video-generation.ts index aaef20c7e5d..ae166e6052e 100644 --- a/src/plugin-sdk/video-generation.ts +++ b/src/plugin-sdk/video-generation.ts @@ -3,6 +3,7 @@ export type { GeneratedVideoAsset, VideoGenerationProvider, + VideoGenerationProviderConfiguredContext, VideoGenerationRequest, VideoGenerationResolution, VideoGenerationResult, diff --git a/src/video-generation/runtime.test.ts b/src/video-generation/runtime.test.ts index 5f579fb6c83..74120ac6eca 100644 --- a/src/video-generation/runtime.test.ts +++ b/src/video-generation/runtime.test.ts @@ -1,37 +1,108 @@ -import { afterEach, describe, expect, it, vi } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; -import type { VideoGenerationProvider } from "../video-generation/types.js"; -import { - generateVideo, - listRuntimeVideoGenerationProviders, - type GenerateVideoRuntimeResult, -} from "./runtime.js"; +import { generateVideo, listRuntimeVideoGenerationProviders } from "./runtime.js"; +import type { VideoGenerationProvider } from "./types.js"; -const mocks = vi.hoisted(() => ({ - generateVideo: vi.fn(), - listRuntimeVideoGenerationProviders: vi.fn(), +const mocks = vi.hoisted(() => { + const debug = vi.fn(); + return { + createSubsystemLogger: vi.fn(() => ({ debug })), + describeFailoverError: vi.fn(), + getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []), + getVideoGenerationProvider: vi.fn< + (providerId: string, config?: OpenClawConfig) => VideoGenerationProvider | undefined + >(() => undefined), + isFailoverError: vi.fn<(err: unknown) => boolean>(() => false), + listVideoGenerationProviders: vi.fn<(config?: OpenClawConfig) => VideoGenerationProvider[]>( + () => [], + ), + parseVideoGenerationModelRef: vi.fn< + (raw?: string) => { provider: string; model: string } | undefined + >((raw?: string) => { + const trimmed = raw?.trim(); + if (!trimmed) { + return undefined; + } + const slash = trimmed.indexOf("/"); + if (slash <= 0 || slash === trimmed.length - 1) { + return undefined; + } + return { + provider: trimmed.slice(0, slash), + model: trimmed.slice(slash + 1), + }; + }), + resolveAgentModelFallbackValues: vi.fn<(value: unknown) => string[]>(() => []), + resolveAgentModelPrimaryValue: vi.fn<(value: unknown) => string | undefined>(() => undefined), + debug, + }; +}); + +vi.mock("../agents/failover-error.js", () => ({ + describeFailoverError: mocks.describeFailoverError, + isFailoverError: mocks.isFailoverError, +})); +vi.mock("../config/model-input.js", () => ({ + resolveAgentModelFallbackValues: mocks.resolveAgentModelFallbackValues, + resolveAgentModelPrimaryValue: mocks.resolveAgentModelPrimaryValue, +})); +vi.mock("../logging/subsystem.js", () => ({ + createSubsystemLogger: mocks.createSubsystemLogger, +})); +vi.mock("../secrets/provider-env-vars.js", () => ({ + getProviderEnvVars: mocks.getProviderEnvVars, +})); +vi.mock("./model-ref.js", () => ({ + parseVideoGenerationModelRef: mocks.parseVideoGenerationModelRef, +})); +vi.mock("./provider-registry.js", () => ({ + getVideoGenerationProvider: mocks.getVideoGenerationProvider, + listVideoGenerationProviders: mocks.listVideoGenerationProviders, })); -vi.mock("../../extensions/video-generation-core/runtime-api.js", () => ({ - generateVideo: mocks.generateVideo, - listRuntimeVideoGenerationProviders: mocks.listRuntimeVideoGenerationProviders, -})); - -describe("video-generation runtime facade", () => { - afterEach(() => { - mocks.generateVideo.mockReset(); - mocks.listRuntimeVideoGenerationProviders.mockReset(); +describe("video-generation runtime", () => { + beforeEach(() => { + mocks.createSubsystemLogger.mockClear(); + mocks.describeFailoverError.mockReset(); + mocks.getProviderEnvVars.mockReset(); + mocks.getProviderEnvVars.mockReturnValue([]); + mocks.getVideoGenerationProvider.mockReset(); + mocks.isFailoverError.mockReset(); + mocks.isFailoverError.mockReturnValue(false); + mocks.listVideoGenerationProviders.mockReset(); + mocks.listVideoGenerationProviders.mockReturnValue([]); + mocks.parseVideoGenerationModelRef.mockClear(); + mocks.resolveAgentModelFallbackValues.mockReset(); + mocks.resolveAgentModelFallbackValues.mockReturnValue([]); + mocks.resolveAgentModelPrimaryValue.mockReset(); + mocks.resolveAgentModelPrimaryValue.mockReturnValue(undefined); + mocks.debug.mockReset(); }); - it("delegates video generation to the shared video-generation runtime", async () => { - const result: GenerateVideoRuntimeResult = { - videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4", fileName: "sample.mp4" }], - provider: "video-plugin", - model: "vid-v1", - attempts: [], + it("generates videos through the active video-generation provider", async () => { + const authStore = { version: 1, profiles: {} } as const; + let seenAuthStore: unknown; + mocks.resolveAgentModelPrimaryValue.mockReturnValue("video-plugin/vid-v1"); + const provider: VideoGenerationProvider = { + id: "video-plugin", + capabilities: {}, + async generateVideo(req: { authStore?: unknown }) { + seenAuthStore = req.authStore; + return { + videos: [ + { + buffer: Buffer.from("mp4-bytes"), + mimeType: "video/mp4", + fileName: "sample.mp4", + }, + ], + model: "vid-v1", + }; + }, }; - mocks.generateVideo.mockResolvedValue(result); - const params = { + mocks.getVideoGenerationProvider.mockReturnValue(provider); + + const result = await generateVideo({ cfg: { agents: { defaults: { @@ -41,21 +112,29 @@ describe("video-generation runtime facade", () => { } as OpenClawConfig, prompt: "animate a cat", agentDir: "/tmp/agent", - authStore: { version: 1, profiles: {} }, - }; + authStore, + }); - await expect(generateVideo(params)).resolves.toBe(result); - expect(mocks.generateVideo).toHaveBeenCalledWith(params); + expect(result.provider).toBe("video-plugin"); + expect(result.model).toBe("vid-v1"); + expect(result.attempts).toEqual([]); + expect(seenAuthStore).toEqual(authStore); + expect(result.videos).toEqual([ + { + buffer: Buffer.from("mp4-bytes"), + mimeType: "video/mp4", + fileName: "sample.mp4", + }, + ]); }); - it("delegates provider listing to the shared video-generation runtime", () => { + it("lists runtime video-generation providers through the provider registry", () => { const providers: VideoGenerationProvider[] = [ { id: "video-plugin", defaultModel: "vid-v1", - models: ["vid-v1", "vid-v2"], + models: ["vid-v1"], capabilities: { - maxDurationSeconds: 10, supportsAudio: true, }, generateVideo: async () => ({ @@ -63,10 +142,33 @@ describe("video-generation runtime facade", () => { }), }, ]; - mocks.listRuntimeVideoGenerationProviders.mockReturnValue(providers); - const params = { config: {} as OpenClawConfig }; + mocks.listVideoGenerationProviders.mockReturnValue(providers); - expect(listRuntimeVideoGenerationProviders(params)).toBe(providers); - expect(mocks.listRuntimeVideoGenerationProviders).toHaveBeenCalledWith(params); + expect(listRuntimeVideoGenerationProviders({ config: {} as OpenClawConfig })).toEqual( + providers, + ); + expect(mocks.listVideoGenerationProviders).toHaveBeenCalledWith({} as OpenClawConfig); + }); + + it("builds a generic config hint without hardcoded provider ids", async () => { + mocks.listVideoGenerationProviders.mockReturnValue([ + { + id: "motion-one", + defaultModel: "animate-v1", + capabilities: {}, + generateVideo: async () => ({ + videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }], + }), + }, + ]); + mocks.getProviderEnvVars.mockReturnValue(["MOTION_ONE_API_KEY"]); + + const promise = generateVideo({ cfg: {} as OpenClawConfig, prompt: "animate a cat" }); + + await expect(promise).rejects.toThrow("No video-generation model configured."); + await expect(promise).rejects.toThrow( + 'Set agents.defaults.videoGenerationModel.primary to a provider/model like "motion-one/animate-v1".', + ); + await expect(promise).rejects.toThrow("motion-one: MOTION_ONE_API_KEY"); }); }); diff --git a/src/video-generation/runtime.ts b/src/video-generation/runtime.ts index c44d19906ba..1d606f9df40 100644 --- a/src/video-generation/runtime.ts +++ b/src/video-generation/runtime.ts @@ -1,6 +1,192 @@ -export { - generateVideo, - listRuntimeVideoGenerationProviders, - type GenerateVideoParams, - type GenerateVideoRuntimeResult, -} from "../../extensions/video-generation-core/runtime-api.js"; +import type { AuthProfileStore } from "../agents/auth-profiles.js"; +import { describeFailoverError, isFailoverError } from "../agents/failover-error.js"; +import type { FallbackAttempt } from "../agents/model-fallback.types.js"; +import type { OpenClawConfig } from "../config/config.js"; +import { + resolveAgentModelFallbackValues, + resolveAgentModelPrimaryValue, +} from "../config/model-input.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; +import { getProviderEnvVars } from "../secrets/provider-env-vars.js"; +import { parseVideoGenerationModelRef } from "./model-ref.js"; +import { getVideoGenerationProvider, listVideoGenerationProviders } from "./provider-registry.js"; +import type { + GeneratedVideoAsset, + VideoGenerationResolution, + VideoGenerationResult, + VideoGenerationSourceAsset, +} from "./types.js"; + +const log = createSubsystemLogger("video-generation"); + +export type GenerateVideoParams = { + cfg: OpenClawConfig; + prompt: string; + agentDir?: string; + authStore?: AuthProfileStore; + modelOverride?: string; + size?: string; + aspectRatio?: string; + resolution?: VideoGenerationResolution; + durationSeconds?: number; + audio?: boolean; + watermark?: boolean; + inputImages?: VideoGenerationSourceAsset[]; + inputVideos?: VideoGenerationSourceAsset[]; +}; + +export type GenerateVideoRuntimeResult = { + videos: GeneratedVideoAsset[]; + provider: string; + model: string; + attempts: FallbackAttempt[]; + metadata?: Record; +}; + +function resolveVideoGenerationCandidates(params: { + cfg: OpenClawConfig; + modelOverride?: string; +}): Array<{ provider: string; model: string }> { + const candidates: Array<{ provider: string; model: string }> = []; + const seen = new Set(); + const add = (raw: string | undefined) => { + const parsed = parseVideoGenerationModelRef(raw); + if (!parsed) { + return; + } + const key = `${parsed.provider}/${parsed.model}`; + if (seen.has(key)) { + return; + } + seen.add(key); + candidates.push(parsed); + }; + + add(params.modelOverride); + add(resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.videoGenerationModel)); + for (const fallback of resolveAgentModelFallbackValues( + params.cfg.agents?.defaults?.videoGenerationModel, + )) { + add(fallback); + } + return candidates; +} + +function throwVideoGenerationFailure(params: { + attempts: FallbackAttempt[]; + lastError: unknown; +}): never { + if (params.attempts.length <= 1 && params.lastError) { + throw params.lastError; + } + const summary = + params.attempts.length > 0 + ? params.attempts + .map((attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`) + .join(" | ") + : "unknown"; + throw new Error(`All video generation models failed (${params.attempts.length}): ${summary}`, { + cause: params.lastError instanceof Error ? params.lastError : undefined, + }); +} + +function buildNoVideoGenerationModelConfiguredMessage(cfg: OpenClawConfig): string { + const providers = listVideoGenerationProviders(cfg); + const sampleModel = providers.find( + (provider) => provider.id.trim().length > 0 && provider.defaultModel?.trim(), + ); + const sampleRef = sampleModel + ? `${sampleModel.id}/${sampleModel.defaultModel}` + : "/"; + const authHints = providers + .flatMap((provider) => { + const envVars = getProviderEnvVars(provider.id); + if (envVars.length === 0) { + return []; + } + return [`${provider.id}: ${envVars.join(" / ")}`]; + }) + .slice(0, 3); + return [ + `No video-generation model configured. Set agents.defaults.videoGenerationModel.primary to a provider/model like "${sampleRef}".`, + authHints.length > 0 + ? `If you want a specific provider, also configure that provider's auth/API key first (${authHints.join("; ")}).` + : "If you want a specific provider, also configure that provider's auth/API key first.", + ].join(" "); +} + +export function listRuntimeVideoGenerationProviders(params?: { config?: OpenClawConfig }) { + return listVideoGenerationProviders(params?.config); +} + +export async function generateVideo( + params: GenerateVideoParams, +): Promise { + const candidates = resolveVideoGenerationCandidates({ + cfg: params.cfg, + modelOverride: params.modelOverride, + }); + if (candidates.length === 0) { + throw new Error(buildNoVideoGenerationModelConfiguredMessage(params.cfg)); + } + + const attempts: FallbackAttempt[] = []; + let lastError: unknown; + + for (const candidate of candidates) { + const provider = getVideoGenerationProvider(candidate.provider, params.cfg); + if (!provider) { + const error = `No video-generation provider registered for ${candidate.provider}`; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error, + }); + lastError = new Error(error); + continue; + } + + try { + const result: VideoGenerationResult = await provider.generateVideo({ + provider: candidate.provider, + model: candidate.model, + prompt: params.prompt, + cfg: params.cfg, + agentDir: params.agentDir, + authStore: params.authStore, + size: params.size, + aspectRatio: params.aspectRatio, + resolution: params.resolution, + durationSeconds: params.durationSeconds, + audio: params.audio, + watermark: params.watermark, + inputImages: params.inputImages, + inputVideos: params.inputVideos, + }); + if (!Array.isArray(result.videos) || result.videos.length === 0) { + throw new Error("Video generation provider returned no videos."); + } + return { + videos: result.videos, + provider: candidate.provider, + model: result.model ?? candidate.model, + attempts, + metadata: result.metadata, + }; + } catch (err) { + lastError = err; + const described = isFailoverError(err) ? describeFailoverError(err) : undefined; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error: described?.message ?? (err instanceof Error ? err.message : String(err)), + reason: described?.reason, + status: described?.status, + code: described?.code, + }); + log.debug(`video-generation candidate failed: ${candidate.provider}/${candidate.model}`); + } + } + + throwVideoGenerationFailure({ attempts, lastError }); +} diff --git a/src/video-generation/types.ts b/src/video-generation/types.ts index fc538ac0946..179865b7d0d 100644 --- a/src/video-generation/types.ts +++ b/src/video-generation/types.ts @@ -18,6 +18,11 @@ export type VideoGenerationSourceAsset = { metadata?: Record; }; +export type VideoGenerationProviderConfiguredContext = { + cfg?: OpenClawConfig; + agentDir?: string; +}; + export type VideoGenerationRequest = { provider: string; model: string; @@ -61,5 +66,6 @@ export type VideoGenerationProvider = { defaultModel?: string; models?: string[]; capabilities: VideoGenerationProviderCapabilities; + isConfigured?: (ctx: VideoGenerationProviderConfiguredContext) => boolean; generateVideo: (req: VideoGenerationRequest) => Promise; };