feat: add vydra media provider

This commit is contained in:
Peter Steinberger 2026-04-06 02:15:51 +01:00
parent 7d2dc7a9fb
commit 9b2b22f350
No known key found for this signature in database
21 changed files with 1358 additions and 11 deletions

View File

@ -1272,6 +1272,7 @@
"providers/together",
"providers/venice",
"providers/vercel-ai-gateway",
"providers/vydra",
"providers/vllm",
"providers/volcengine",
"providers/xai",

View File

@ -62,6 +62,7 @@ Looking for chat channel docs (WhatsApp/Telegram/Discord/Slack/Mattermost (plugi
- [Together AI](/providers/together)
- [Venice (Venice AI, privacy-focused)](/providers/venice)
- [Vercel AI Gateway](/providers/vercel-ai-gateway)
- [Vydra](/providers/vydra)
- [vLLM (local models)](/providers/vllm)
- [Volcengine (Doubao)](/providers/volcengine)
- [xAI](/providers/xai)

123
docs/providers/vydra.md Normal file
View File

@ -0,0 +1,123 @@
---
summary: "Use Vydra image, video, and speech in OpenClaw"
read_when:
- You want Vydra media generation in OpenClaw
- You need Vydra API key setup guidance
title: "Vydra"
---
# Vydra
The bundled Vydra plugin adds:
- image generation via `vydra/grok-imagine`
- video generation via `vydra/veo3` and `vydra/kling`
- speech synthesis via Vydra's ElevenLabs-backed TTS route
OpenClaw uses the same `VYDRA_API_KEY` for all three capabilities.
## Important base URL
Use `https://www.vydra.ai/api/v1`.
Vydra's apex host (`https://vydra.ai/api/v1`) currently redirects to `www`. Some HTTP clients drop `Authorization` on that cross-host redirect, which turns a valid API key into a misleading auth failure. The bundled plugin uses the `www` base URL directly to avoid that.
## Setup
Interactive onboarding:
```bash
openclaw onboard --auth-choice vydra-api-key
```
Or set the env var directly:
```bash
export VYDRA_API_KEY="vydra_live_..."
```
## Image generation
Default image model:
- `vydra/grok-imagine`
Set it as the default image provider:
```json5
{
agents: {
defaults: {
imageGenerationModel: {
primary: "vydra/grok-imagine",
},
},
},
}
```
Current bundled support is text-to-image only. Vydra's hosted edit routes expect remote image URLs, and OpenClaw does not add a Vydra-specific upload bridge in the bundled plugin yet.
See [Image Generation](/tools/image-generation) for shared tool behavior.
## Video generation
Registered video models:
- `vydra/veo3` for text-to-video
- `vydra/kling` for image-to-video
Set Vydra as the default video provider:
```json5
{
agents: {
defaults: {
videoGenerationModel: {
primary: "vydra/veo3",
},
},
},
}
```
Notes:
- `vydra/veo3` is bundled as text-to-video only.
- `vydra/kling` currently requires a remote image URL reference. Local file uploads are rejected up front.
- The bundled plugin stays conservative and does not forward undocumented style knobs such as aspect ratio, resolution, watermark, or generated audio.
See [Video Generation](/tools/video-generation) for shared tool behavior.
## Speech synthesis
Set Vydra as the speech provider:
```json5
{
messages: {
tts: {
provider: "vydra",
providers: {
vydra: {
apiKey: "${VYDRA_API_KEY}",
voiceId: "21m00Tcm4TlvDq8ikWAM",
},
},
},
},
}
```
Defaults:
- model: `elevenlabs/tts`
- voice id: `21m00Tcm4TlvDq8ikWAM`
The bundled plugin currently exposes one known-good default voice and returns MP3 audio files.
## Related
- [Provider Directory](/providers/index)
- [Image Generation](/tools/image-generation)
- [Video Generation](/tools/video-generation)

View File

@ -1,5 +1,5 @@
---
summary: "Generate and edit images using configured providers (OpenAI, Google Gemini, fal, MiniMax, ComfyUI)"
summary: "Generate and edit images using configured providers (OpenAI, Google Gemini, fal, MiniMax, ComfyUI, Vydra)"
read_when:
- Generating images via the agent
- Configuring image generation providers and models
@ -45,6 +45,7 @@ The agent calls `image_generate` automatically. No tool allow-listing needed —
| fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` |
| MiniMax | `image-01` | Yes (subject reference) | `MINIMAX_API_KEY` or MiniMax OAuth (`minimax-portal`) |
| ComfyUI | `workflow` | Yes (1 image, workflow-configured) | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` for cloud |
| Vydra | `grok-imagine` | No | `VYDRA_API_KEY` |
Use `action: "list"` to inspect available providers and models at runtime:
@ -123,13 +124,13 @@ MiniMax image generation is available through both bundled MiniMax auth paths:
## Provider capabilities
| Capability | OpenAI | Google | fal | MiniMax | ComfyUI |
| --------------------- | -------------------- | -------------------- | ------------------- | -------------------------- | ---------------------------------- |
| Generate | Yes (up to 4) | Yes (up to 4) | Yes (up to 4) | Yes (up to 9) | Yes (workflow-defined outputs) |
| Edit/reference | Yes (up to 5 images) | Yes (up to 5 images) | Yes (1 image) | Yes (1 image, subject ref) | Yes (1 image, workflow-configured) |
| Size control | Yes | Yes | Yes | No | No |
| Aspect ratio | No | Yes | Yes (generate only) | Yes | No |
| Resolution (1K/2K/4K) | No | Yes | Yes | No | No |
| Capability | OpenAI | Google | fal | MiniMax | ComfyUI | Vydra |
| --------------------- | -------------------- | -------------------- | ------------------- | -------------------------- | ---------------------------------- | ----- |
| Generate | Yes (up to 4) | Yes (up to 4) | Yes (up to 4) | Yes (up to 9) | Yes (workflow-defined outputs) | Yes (1) |
| Edit/reference | Yes (up to 5 images) | Yes (up to 5 images) | Yes (1 image) | Yes (1 image, subject ref) | Yes (1 image, workflow-configured) | No |
| Size control | Yes | Yes | Yes | No | No | No |
| Aspect ratio | No | Yes | Yes (generate only) | Yes | No | No |
| Resolution (1K/2K/4K) | No | Yes | Yes | No | No | No |
## Related
@ -139,5 +140,6 @@ MiniMax image generation is available through both bundled MiniMax auth paths:
- [Google (Gemini)](/providers/google) — Gemini image provider setup
- [MiniMax](/providers/minimax) — MiniMax image provider setup
- [OpenAI](/providers/openai) — OpenAI Images provider setup
- [Vydra](/providers/vydra) — Vydra image, video, and speech setup
- [Configuration Reference](/gateway/configuration-reference#agent-defaults) — `imageGenerationModel` config
- [Models](/concepts/models) — model configuration and failover

View File

@ -1,5 +1,5 @@
---
summary: "Generate videos from text, images, or existing videos using 11 provider backends"
summary: "Generate videos from text, images, or existing videos using 12 provider backends"
read_when:
- Generating videos via the agent
- Configuring video generation providers and models
@ -9,7 +9,7 @@ title: "Video Generation"
# Video Generation
OpenClaw agents can generate videos from text prompts, reference images, or existing videos. Eleven provider backends are supported, each with different model options, input modes, and feature sets. The agent picks the right provider automatically based on your configuration and available API keys.
OpenClaw agents can generate videos from text prompts, reference images, or existing videos. Twelve provider backends are supported, each with different model options, input modes, and feature sets. The agent picks the right provider automatically based on your configuration and available API keys.
<Note>
The `video_generate` tool only appears when at least one video-generation provider is available. If you do not see it in your agent tools, set a provider API key or configure `agents.defaults.videoGenerationModel`.
@ -62,6 +62,7 @@ Outside of session-backed agent runs (for example, direct tool invocations), the
| Qwen | `wan2.6-t2v` | Yes | Yes (remote URL) | Yes (remote URL) | `QWEN_API_KEY` |
| Runway | `gen4.5` | Yes | 1 image | 1 video | `RUNWAYML_API_SECRET` |
| Together | `Wan-AI/Wan2.2-T2V-A14B` | Yes | 1 image | No | `TOGETHER_API_KEY` |
| Vydra | `veo3` | Yes | 1 image (`kling`) | No | `VYDRA_API_KEY` |
| xAI | `grok-imagine-video` | Yes | 1 image | 1 video | `XAI_API_KEY` |
Some providers accept additional or alternate API key env vars. See individual [provider pages](#related) for details.
@ -109,7 +110,7 @@ Not all providers support all parameters. Unsupported overrides are ignored on a
## Actions
- **generate** (default) -- create a video from the given prompt and optional reference inputs.
- **status** -- check the state of the in-flight video task for the current session without starting a new one.
- **status** -- check the state of the in-flight video task for the current session without starting another generation.
- **list** -- show available providers, models, and their capabilities.
## Model selection
@ -150,6 +151,7 @@ If a provider fails, the next candidate is tried automatically. If all candidate
| Qwen | Same DashScope backend as Alibaba. Reference inputs must be remote `http(s)` URLs; local files are rejected upfront. |
| Runway | Supports local files via data URIs. Video-to-video requires `runway/gen4_aleph`. Text-only runs expose `16:9` and `9:16` aspect ratios. |
| Together | Single image reference only. |
| Vydra | Uses `https://www.vydra.ai/api/v1` directly to avoid auth-dropping redirects. `veo3` is bundled as text-to-video only; `kling` requires a remote image URL. |
| xAI | Supports text-to-video, image-to-video, and remote video edit/extend flows. |
## Configuration
@ -189,6 +191,7 @@ openclaw config set agents.defaults.videoGenerationModel.primary "qwen/wan2.6-t2
- [Qwen](/providers/qwen)
- [Runway](/providers/runway)
- [Together AI](/providers/together)
- [Vydra](/providers/vydra)
- [xAI](/providers/xai)
- [Configuration Reference](/gateway/configuration-reference#agent-defaults)
- [Models](/concepts/models)

View File

@ -22,6 +22,7 @@ import openaiPlugin from "./openai/index.js";
import qwenPlugin from "./qwen/index.js";
import runwayPlugin from "./runway/index.js";
import togetherPlugin from "./together/index.js";
import vydraPlugin from "./vydra/index.js";
import xaiPlugin from "./xai/index.js";
const LIVE = isLiveTestEnabled();
@ -65,6 +66,7 @@ const CASES: LiveProviderCase[] = [
pluginName: "Together Provider",
providerId: "together",
},
{ plugin: vydraPlugin, pluginId: "vydra", pluginName: "Vydra Provider", providerId: "vydra" },
{ plugin: xaiPlugin, pluginId: "xai", pluginName: "xAI Plugin", providerId: "xai" },
]
.filter((entry) => (providerFilter ? providerFilter.has(entry.providerId) : true))

View File

@ -0,0 +1,128 @@
import * as providerAuth from "openclaw/plugin-sdk/provider-auth-runtime";
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildVydraImageGenerationProvider } from "./image-generation-provider.js";
describe("vydra image-generation provider", () => {
afterEach(() => {
vi.unstubAllGlobals();
vi.restoreAllMocks();
});
it("posts to the www api and downloads the generated image", async () => {
vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
apiKey: "vydra-test-key",
source: "env",
mode: "api-key",
});
const fetchMock = vi
.fn()
.mockResolvedValueOnce(
new Response(
JSON.stringify({
jobId: "job-123",
status: "completed",
imageUrl: "https://cdn.vydra.ai/generated/test.png",
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
)
.mockResolvedValueOnce(
new Response(Buffer.from("png-data"), {
status: 200,
headers: { "Content-Type": "image/png" },
}),
);
vi.stubGlobal("fetch", fetchMock);
const provider = buildVydraImageGenerationProvider();
const result = await provider.generateImage({
provider: "vydra",
model: "grok-imagine",
prompt: "draw a cat",
cfg: {},
});
expect(fetchMock).toHaveBeenNthCalledWith(
1,
"https://www.vydra.ai/api/v1/models/grok-imagine",
expect.objectContaining({
method: "POST",
body: JSON.stringify({
prompt: "draw a cat",
model: "text-to-image",
}),
}),
);
const [, init] = fetchMock.mock.calls[0] as [string, RequestInit];
const headers = new Headers(init.headers);
expect(headers.get("authorization")).toBe("Bearer vydra-test-key");
expect(result).toEqual({
images: [
{
buffer: Buffer.from("png-data"),
mimeType: "image/png",
fileName: "image-1.png",
},
],
model: "grok-imagine",
metadata: {
jobId: "job-123",
imageUrl: "https://cdn.vydra.ai/generated/test.png",
status: "completed",
},
});
});
it("polls jobs when the create response is not completed yet", async () => {
vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
apiKey: "vydra-test-key",
source: "env",
mode: "api-key",
});
const fetchMock = vi
.fn()
.mockResolvedValueOnce(
new Response(JSON.stringify({ jobId: "job-456", status: "queued" }), {
status: 200,
headers: { "Content-Type": "application/json" },
}),
)
.mockResolvedValueOnce(
new Response(
JSON.stringify({
jobId: "job-456",
status: "completed",
resultUrls: ["https://cdn.vydra.ai/generated/polled.png"],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
)
.mockResolvedValueOnce(
new Response(Buffer.from("png-data"), {
status: 200,
headers: { "Content-Type": "image/png" },
}),
);
vi.stubGlobal("fetch", fetchMock);
const provider = buildVydraImageGenerationProvider();
await provider.generateImage({
provider: "vydra",
model: "grok-imagine",
prompt: "draw a cat",
cfg: {},
});
expect(fetchMock).toHaveBeenNthCalledWith(
2,
"https://www.vydra.ai/api/v1/jobs/job-456",
expect.objectContaining({ method: "GET" }),
);
});
});

View File

@ -0,0 +1,152 @@
import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation";
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import {
assertOkOrThrowHttpError,
postJsonRequest,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
import {
DEFAULT_VYDRA_BASE_URL,
DEFAULT_VYDRA_IMAGE_MODEL,
downloadVydraAsset,
extractVydraResultUrls,
resolveVydraBaseUrlFromConfig,
resolveVydraErrorMessage,
resolveVydraResponseJobId,
resolveVydraResponseStatus,
waitForVydraJob,
} from "./shared.js";
export function buildVydraImageGenerationProvider(): ImageGenerationProvider {
return {
id: "vydra",
label: "Vydra",
defaultModel: DEFAULT_VYDRA_IMAGE_MODEL,
models: [DEFAULT_VYDRA_IMAGE_MODEL],
isConfigured: ({ agentDir }) =>
isProviderApiKeyConfigured({
provider: "vydra",
agentDir,
}),
capabilities: {
generate: {
maxCount: 1,
supportsSize: false,
supportsAspectRatio: false,
supportsResolution: false,
},
edit: {
enabled: false,
maxCount: 1,
maxInputImages: 0,
supportsSize: false,
supportsAspectRatio: false,
supportsResolution: false,
},
},
async generateImage(req) {
if ((req.inputImages?.length ?? 0) > 0) {
throw new Error(
"Vydra image generation currently supports text-to-image only in the bundled plugin.",
);
}
if ((req.count ?? 1) > 1) {
throw new Error("Vydra image generation supports at most one image per request.");
}
const auth = await resolveApiKeyForProvider({
provider: "vydra",
cfg: req.cfg,
agentDir: req.agentDir,
store: req.authStore,
});
if (!auth.apiKey) {
throw new Error("Vydra API key missing");
}
const fetchFn = fetch;
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
resolveProviderHttpRequestConfig({
baseUrl: resolveVydraBaseUrlFromConfig(req.cfg),
defaultBaseUrl: DEFAULT_VYDRA_BASE_URL,
allowPrivateNetwork: false,
defaultHeaders: {
Authorization: `Bearer ${auth.apiKey}`,
"Content-Type": "application/json",
},
provider: "vydra",
capability: "image",
transport: "http",
});
const model = req.model?.trim() || DEFAULT_VYDRA_IMAGE_MODEL;
const { response, release } = await postJsonRequest({
url: `${baseUrl}/models/${model}`,
headers,
body: {
prompt: req.prompt,
model: "text-to-image",
},
timeoutMs: req.timeoutMs,
fetchFn,
allowPrivateNetwork,
dispatcherPolicy,
});
try {
await assertOkOrThrowHttpError(response, "Vydra image generation failed");
const submitted = await response.json();
const completedPayload =
resolveVydraResponseStatus(submitted) === "completed" ||
extractVydraResultUrls(submitted, "image").length > 0
? submitted
: await (() => {
const jobId = resolveVydraResponseJobId(submitted);
if (!jobId) {
throw new Error(
resolveVydraErrorMessage(submitted) ??
"Vydra image generation response missing job id",
);
}
return waitForVydraJob({
baseUrl,
jobId,
headers,
timeoutMs: req.timeoutMs,
fetchFn,
kind: "image",
});
})();
const imageUrl = extractVydraResultUrls(completedPayload, "image")[0];
if (!imageUrl) {
throw new Error("Vydra image generation completed without an image URL");
}
const image = await downloadVydraAsset({
url: imageUrl,
kind: "image",
timeoutMs: req.timeoutMs,
fetchFn,
});
return {
images: [
{
buffer: image.buffer,
mimeType: image.mimeType,
fileName: image.fileName,
},
],
model,
metadata: {
jobId:
resolveVydraResponseJobId(completedPayload) ?? resolveVydraResponseJobId(submitted),
imageUrl,
status: resolveVydraResponseStatus(completedPayload) ?? "completed",
},
};
} finally {
await release();
}
},
};
}

49
extensions/vydra/index.ts Normal file
View File

@ -0,0 +1,49 @@
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth-api-key";
import { buildVydraImageGenerationProvider } from "./image-generation-provider.js";
import { applyVydraConfig, VYDRA_DEFAULT_IMAGE_MODEL_REF } from "./onboard.js";
import { buildVydraSpeechProvider } from "./speech-provider.js";
import { buildVydraVideoGenerationProvider } from "./video-generation-provider.js";
const PROVIDER_ID = "vydra";
export default definePluginEntry({
id: PROVIDER_ID,
name: "Vydra Provider",
description: "Bundled Vydra image, video, and speech provider",
register(api) {
api.registerProvider({
id: PROVIDER_ID,
label: "Vydra",
docsPath: "/providers/vydra",
envVars: ["VYDRA_API_KEY"],
auth: [
createProviderApiKeyAuthMethod({
providerId: PROVIDER_ID,
methodId: "api-key",
label: "Vydra API key",
hint: "Image, video, and speech API key",
optionKey: "vydraApiKey",
flagName: "--vydra-api-key",
envVar: "VYDRA_API_KEY",
promptMessage: "Enter Vydra API key",
defaultModel: VYDRA_DEFAULT_IMAGE_MODEL_REF,
expectedProviders: [PROVIDER_ID],
applyConfig: (cfg) => applyVydraConfig(cfg),
wizard: {
choiceId: "vydra-api-key",
choiceLabel: "Vydra API key",
choiceHint: "Image, video, and speech API key",
groupId: "vydra",
groupLabel: "Vydra",
groupHint: "Image, video, and speech",
onboardingScopes: ["image-generation"],
},
}),
],
});
api.registerSpeechProvider(buildVydraSpeechProvider());
api.registerImageGenerationProvider(buildVydraImageGenerationProvider());
api.registerVideoGenerationProvider(buildVydraVideoGenerationProvider());
},
});

View File

@ -0,0 +1,21 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/provider-onboard";
export const VYDRA_DEFAULT_IMAGE_MODEL_REF = "vydra/grok-imagine";
export function applyVydraConfig(cfg: OpenClawConfig): OpenClawConfig {
if (cfg.agents?.defaults?.imageGenerationModel) {
return cfg;
}
return {
...cfg,
agents: {
...cfg.agents,
defaults: {
...cfg.agents?.defaults,
imageGenerationModel: {
primary: VYDRA_DEFAULT_IMAGE_MODEL_REF,
},
},
},
};
}

View File

@ -0,0 +1,34 @@
{
"id": "vydra",
"enabledByDefault": true,
"providers": ["vydra"],
"providerAuthEnvVars": {
"vydra": ["VYDRA_API_KEY"]
},
"providerAuthChoices": [
{
"provider": "vydra",
"method": "api-key",
"choiceId": "vydra-api-key",
"choiceLabel": "Vydra API key",
"groupId": "vydra",
"groupLabel": "Vydra",
"groupHint": "Image, video, and speech",
"onboardingScopes": ["image-generation"],
"optionKey": "vydraApiKey",
"cliFlag": "--vydra-api-key",
"cliOption": "--vydra-api-key <key>",
"cliDescription": "Vydra API key"
}
],
"contracts": {
"speechProviders": ["vydra"],
"imageGenerationProviders": ["vydra"],
"videoGenerationProviders": ["vydra"]
},
"configSchema": {
"type": "object",
"additionalProperties": false,
"properties": {}
}
}

View File

@ -0,0 +1,12 @@
{
"name": "@openclaw/vydra-provider",
"version": "2026.4.6",
"private": true,
"description": "OpenClaw Vydra media provider plugin",
"type": "module",
"openclaw": {
"extensions": [
"./index.ts"
]
}
}

View File

@ -0,0 +1,20 @@
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
describePluginRegistrationContract({
pluginId: "vydra",
providerIds: ["vydra"],
speechProviderIds: ["vydra"],
imageGenerationProviderIds: ["vydra"],
videoGenerationProviderIds: ["vydra"],
requireSpeechVoices: true,
requireGenerateImage: true,
requireGenerateVideo: true,
manifestAuthChoice: {
pluginId: "vydra",
choiceId: "vydra-api-key",
choiceLabel: "Vydra API key",
groupId: "vydra",
groupLabel: "Vydra",
groupHint: "Image, video, and speech",
},
});

218
extensions/vydra/shared.ts Normal file
View File

@ -0,0 +1,218 @@
import { assertOkOrThrowHttpError, fetchWithTimeout } from "openclaw/plugin-sdk/provider-http";
export const DEFAULT_VYDRA_BASE_URL = "https://www.vydra.ai/api/v1";
export const DEFAULT_VYDRA_IMAGE_MODEL = "grok-imagine";
export const DEFAULT_VYDRA_VIDEO_MODEL = "veo3";
export const DEFAULT_VYDRA_SPEECH_MODEL = "elevenlabs/tts";
export const DEFAULT_VYDRA_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
export const DEFAULT_HTTP_TIMEOUT_MS = 120_000;
const POLL_INTERVAL_MS = 2_500;
const MAX_POLL_ATTEMPTS = 120;
type VydraMediaKind = "audio" | "image" | "video";
type VydraJobPayload = {
id?: string;
jobId?: string;
status?: string;
message?: string;
error?: string | { message?: string; detail?: string } | null;
};
function asObject(value: unknown): Record<string, unknown> | undefined {
return typeof value === "object" && value !== null && !Array.isArray(value)
? (value as Record<string, unknown>)
: undefined;
}
function addUrlValue(value: unknown, urls: Set<string>): void {
if (typeof value === "string") {
const trimmed = value.trim();
if (/^https?:\/\//iu.test(trimmed)) {
urls.add(trimmed);
}
return;
}
if (Array.isArray(value)) {
for (const entry of value) {
addUrlValue(entry, urls);
}
}
}
export function trimToUndefined(value: unknown): string | undefined {
return typeof value === "string" && value.trim() ? value.trim() : undefined;
}
export function normalizeVydraBaseUrl(value: string | undefined): string {
const fallback = DEFAULT_VYDRA_BASE_URL;
const trimmed = trimToUndefined(value);
if (!trimmed) {
return fallback;
}
try {
const url = new URL(trimmed);
if (url.hostname === "vydra.ai") {
url.hostname = "www.vydra.ai";
}
const pathname = url.pathname.replace(/\/+$/u, "");
if (!pathname) {
url.pathname = "/api/v1";
} else {
url.pathname = pathname;
}
return url.toString().replace(/\/$/u, "");
} catch {
return fallback;
}
}
export function resolveVydraBaseUrlFromConfig(cfg: unknown): string {
const models = asObject(asObject(cfg)?.models);
const providers = asObject(models?.providers);
const vydra = asObject(providers?.vydra);
return normalizeVydraBaseUrl(trimToUndefined(vydra?.baseUrl));
}
export function resolveVydraResponseJobId(payload: unknown): string | undefined {
const object = asObject(payload) as VydraJobPayload | undefined;
return trimToUndefined(object?.jobId) ?? trimToUndefined(object?.id);
}
export function resolveVydraResponseStatus(payload: unknown): string | undefined {
return trimToUndefined(asObject(payload)?.status)?.toLowerCase();
}
export function resolveVydraErrorMessage(payload: unknown): string | undefined {
const object = asObject(payload) as VydraJobPayload | undefined;
const error = object?.error;
if (typeof error === "string" && error.trim()) {
return error.trim();
}
const errorObject = asObject(error);
return (
trimToUndefined(errorObject?.message) ??
trimToUndefined(errorObject?.detail) ??
trimToUndefined(object?.message)
);
}
export function extractVydraResultUrls(payload: unknown, kind: VydraMediaKind): string[] {
const urls = new Set<string>();
const preferredKeys =
kind === "audio"
? ["audioUrl", "audioUrls"]
: kind === "image"
? ["imageUrl", "imageUrls"]
: ["videoUrl", "videoUrls"];
const sharedKeys = ["resultUrl", "resultUrls", "outputUrl", "outputUrls", "url", "urls"];
const recurseKeys = ["output", "outputs", "result", "results", "data", "asset", "assets"];
const visit = (value: unknown, depth = 0) => {
if (depth > 5) {
return;
}
if (Array.isArray(value)) {
for (const entry of value) {
visit(entry, depth + 1);
}
return;
}
const object = asObject(value);
if (!object) {
return;
}
for (const key of [...preferredKeys, ...sharedKeys]) {
addUrlValue(object[key], urls);
}
for (const key of recurseKeys) {
if (key in object) {
visit(object[key], depth + 1);
}
}
};
visit(payload);
return [...urls];
}
function inferExtension(kind: VydraMediaKind, mimeType: string): string {
const normalized = mimeType.toLowerCase();
if (normalized.includes("jpeg")) {
return "jpg";
}
if (normalized.includes("webp")) {
return "webp";
}
if (normalized.includes("wav")) {
return "wav";
}
if (normalized.includes("mpeg") || normalized.includes("mp3")) {
return "mp3";
}
if (normalized.includes("webm")) {
return "webm";
}
if (normalized.includes("quicktime")) {
return "mov";
}
return kind === "image" ? "png" : kind === "audio" ? "mp3" : "mp4";
}
export async function downloadVydraAsset(params: {
url: string;
kind: VydraMediaKind;
timeoutMs?: number;
fetchFn: typeof fetch;
}): Promise<{ buffer: Buffer; mimeType: string; fileName: string }> {
const response = await fetchWithTimeout(
params.url,
{ method: "GET" },
params.timeoutMs ?? DEFAULT_HTTP_TIMEOUT_MS,
params.fetchFn,
);
await assertOkOrThrowHttpError(response, `Vydra ${params.kind} download failed`);
const mimeType =
response.headers.get("content-type")?.trim() ||
(params.kind === "image" ? "image/png" : params.kind === "audio" ? "audio/mpeg" : "video/mp4");
const arrayBuffer = await response.arrayBuffer();
const extension = inferExtension(params.kind, mimeType);
const fileStem = params.kind === "image" ? "image" : params.kind === "audio" ? "audio" : "video";
return {
buffer: Buffer.from(arrayBuffer),
mimeType,
fileName: `${fileStem}-1.${extension}`,
};
}
export async function waitForVydraJob(params: {
baseUrl: string;
jobId: string;
headers: Headers;
timeoutMs?: number;
fetchFn: typeof fetch;
kind: VydraMediaKind;
}): Promise<unknown> {
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
const response = await fetchWithTimeout(
`${params.baseUrl}/jobs/${params.jobId}`,
{
method: "GET",
headers: params.headers,
},
params.timeoutMs ?? DEFAULT_HTTP_TIMEOUT_MS,
params.fetchFn,
);
await assertOkOrThrowHttpError(response, "Vydra job status request failed");
const payload = await response.json();
const status = resolveVydraResponseStatus(payload);
if (status === "completed" || extractVydraResultUrls(payload, params.kind).length > 0) {
return payload;
}
if (status === "failed" || status === "error" || status === "cancelled") {
throw new Error(resolveVydraErrorMessage(payload) ?? `Vydra job ${params.jobId} failed`);
}
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
}
throw new Error(`Vydra job ${params.jobId} did not finish in time`);
}

View File

@ -0,0 +1,71 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildVydraSpeechProvider } from "./speech-provider.js";
describe("vydra speech provider", () => {
const provider = buildVydraSpeechProvider();
afterEach(() => {
vi.unstubAllGlobals();
vi.restoreAllMocks();
});
it("exposes the default voice and model", async () => {
expect(provider.models).toEqual(["elevenlabs/tts"]);
const voices = await provider.listVoices?.({});
expect(voices).toEqual([
{
id: "21m00Tcm4TlvDq8ikWAM",
name: "Rachel",
},
]);
});
it("posts to the tts endpoint and downloads the audio", async () => {
const fetchMock = vi
.fn()
.mockResolvedValueOnce(
new Response(
JSON.stringify({
audioUrl: "https://cdn.vydra.ai/generated/test.mp3",
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
)
.mockResolvedValueOnce(
new Response(Buffer.from("mp3-data"), {
status: 200,
headers: { "Content-Type": "audio/mpeg" },
}),
);
vi.stubGlobal("fetch", fetchMock);
const result = await provider.synthesize({
text: "OpenClaw test",
cfg: {} as never,
providerConfig: { apiKey: "vydra-test-key" },
target: "audio-file",
timeoutMs: 30_000,
});
expect(fetchMock).toHaveBeenNthCalledWith(
1,
"https://www.vydra.ai/api/v1/models/elevenlabs/tts",
expect.objectContaining({
method: "POST",
body: JSON.stringify({
text: "OpenClaw test",
voice_id: "21m00Tcm4TlvDq8ikWAM",
}),
}),
);
const [, init] = fetchMock.mock.calls[0] as [string, RequestInit];
const headers = new Headers(init.headers);
expect(headers.get("authorization")).toBe("Bearer vydra-test-key");
expect(result.outputFormat).toBe("mp3");
expect(result.fileExtension).toBe(".mp3");
expect(result.audioBuffer).toEqual(Buffer.from("mp3-data"));
});
});

View File

@ -0,0 +1,157 @@
import {
assertOkOrThrowHttpError,
postJsonRequest,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
import type {
SpeechProviderConfig,
SpeechProviderOverrides,
SpeechProviderPlugin,
} from "openclaw/plugin-sdk/speech-core";
import {
DEFAULT_VYDRA_BASE_URL,
DEFAULT_VYDRA_SPEECH_MODEL,
DEFAULT_VYDRA_VOICE_ID,
downloadVydraAsset,
extractVydraResultUrls,
normalizeVydraBaseUrl,
trimToUndefined,
} from "./shared.js";
type VydraSpeechConfig = {
apiKey?: string;
baseUrl: string;
model: string;
voiceId: string;
};
const VYDRA_SPEECH_VOICES = [
{
id: DEFAULT_VYDRA_VOICE_ID,
name: "Rachel",
},
] as const;
function asObject(value: unknown): Record<string, unknown> | undefined {
return typeof value === "object" && value !== null && !Array.isArray(value)
? (value as Record<string, unknown>)
: undefined;
}
function normalizeVydraSpeechConfig(rawConfig: Record<string, unknown>): VydraSpeechConfig {
const providers = asObject(rawConfig.providers);
const raw = asObject(providers?.vydra) ?? asObject(rawConfig.vydra);
return {
apiKey: normalizeResolvedSecretInputString({
value: raw?.apiKey,
path: "messages.tts.providers.vydra.apiKey",
}),
baseUrl: normalizeVydraBaseUrl(
trimToUndefined(raw?.baseUrl) ?? trimToUndefined(process.env.VYDRA_BASE_URL),
),
model:
trimToUndefined(raw?.model) ??
trimToUndefined(process.env.VYDRA_TTS_MODEL) ??
DEFAULT_VYDRA_SPEECH_MODEL,
voiceId:
trimToUndefined(raw?.voiceId) ??
trimToUndefined(process.env.VYDRA_TTS_VOICE_ID) ??
DEFAULT_VYDRA_VOICE_ID,
};
}
function readVydraSpeechConfig(config: SpeechProviderConfig): VydraSpeechConfig {
const normalized = normalizeVydraSpeechConfig({});
return {
apiKey: trimToUndefined(config.apiKey) ?? normalized.apiKey,
baseUrl: normalizeVydraBaseUrl(trimToUndefined(config.baseUrl) ?? normalized.baseUrl),
model: trimToUndefined(config.model) ?? normalized.model,
voiceId: trimToUndefined(config.voiceId) ?? normalized.voiceId,
};
}
function readVydraOverrides(overrides: SpeechProviderOverrides | undefined): {
model?: string;
voiceId?: string;
} {
if (!overrides) {
return {};
}
return {
model: trimToUndefined(overrides.model),
voiceId: trimToUndefined(overrides.voiceId),
};
}
export function buildVydraSpeechProvider(): SpeechProviderPlugin {
return {
id: "vydra",
label: "Vydra",
models: [DEFAULT_VYDRA_SPEECH_MODEL],
voices: VYDRA_SPEECH_VOICES.map((voice) => voice.id),
resolveConfig: ({ rawConfig }) => normalizeVydraSpeechConfig(rawConfig),
listVoices: async () => VYDRA_SPEECH_VOICES.map((voice) => ({ ...voice })),
isConfigured: ({ providerConfig }) =>
Boolean(readVydraSpeechConfig(providerConfig).apiKey || process.env.VYDRA_API_KEY),
synthesize: async (req) => {
const config = readVydraSpeechConfig(req.providerConfig);
const overrides = readVydraOverrides(req.providerOverrides);
const apiKey = config.apiKey || process.env.VYDRA_API_KEY;
if (!apiKey) {
throw new Error("Vydra API key missing");
}
const fetchFn = fetch;
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
resolveProviderHttpRequestConfig({
baseUrl: config.baseUrl,
defaultBaseUrl: DEFAULT_VYDRA_BASE_URL,
allowPrivateNetwork: false,
defaultHeaders: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
provider: "vydra",
capability: "audio",
transport: "http",
});
const { response, release } = await postJsonRequest({
url: `${baseUrl}/models/${overrides.model ?? config.model}`,
headers,
body: {
text: req.text,
voice_id: overrides.voiceId ?? config.voiceId,
},
timeoutMs: req.timeoutMs,
fetchFn,
allowPrivateNetwork,
dispatcherPolicy,
});
try {
await assertOkOrThrowHttpError(response, "Vydra speech synthesis failed");
const payload = await response.json();
const audioUrl = extractVydraResultUrls(payload, "audio")[0];
if (!audioUrl) {
throw new Error("Vydra speech synthesis response missing audio URL");
}
const audio = await downloadVydraAsset({
url: audioUrl,
kind: "audio",
timeoutMs: req.timeoutMs,
fetchFn,
});
return {
audioBuffer: audio.buffer,
outputFormat: audio.mimeType.includes("wav") ? "wav" : "mp3",
fileExtension: audio.fileName.endsWith(".wav") ? ".wav" : ".mp3",
voiceCompatible: false,
};
} finally {
await release();
}
},
};
}

View File

@ -0,0 +1,94 @@
import * as providerAuth from "openclaw/plugin-sdk/provider-auth-runtime";
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildVydraVideoGenerationProvider } from "./video-generation-provider.js";
describe("vydra video-generation provider", () => {
afterEach(() => {
vi.unstubAllGlobals();
vi.restoreAllMocks();
});
it("submits veo3 jobs and downloads the completed video", async () => {
vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
apiKey: "vydra-test-key",
source: "env",
mode: "api-key",
});
const fetchMock = vi
.fn()
.mockResolvedValueOnce(
new Response(JSON.stringify({ jobId: "job-123", status: "processing" }), {
status: 200,
headers: { "Content-Type": "application/json" },
}),
)
.mockResolvedValueOnce(
new Response(
JSON.stringify({
jobId: "job-123",
status: "completed",
videoUrl: "https://cdn.vydra.ai/generated/test.mp4",
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
)
.mockResolvedValueOnce(
new Response(Buffer.from("mp4-data"), {
status: 200,
headers: { "Content-Type": "video/mp4" },
}),
);
vi.stubGlobal("fetch", fetchMock);
const provider = buildVydraVideoGenerationProvider();
const result = await provider.generateVideo({
provider: "vydra",
model: "veo3",
prompt: "tiny city at sunrise",
cfg: {},
});
expect(fetchMock).toHaveBeenNthCalledWith(
1,
"https://www.vydra.ai/api/v1/models/veo3",
expect.objectContaining({
method: "POST",
body: JSON.stringify({ prompt: "tiny city at sunrise" }),
}),
);
expect(fetchMock).toHaveBeenNthCalledWith(
2,
"https://www.vydra.ai/api/v1/jobs/job-123",
expect.objectContaining({ method: "GET" }),
);
expect(result.videos[0]?.mimeType).toBe("video/mp4");
expect(result.metadata).toEqual({
jobId: "job-123",
videoUrl: "https://cdn.vydra.ai/generated/test.mp4",
status: "completed",
});
});
it("requires a remote image url for kling", async () => {
vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
apiKey: "vydra-test-key",
source: "env",
mode: "api-key",
});
vi.stubGlobal("fetch", vi.fn());
const provider = buildVydraVideoGenerationProvider();
await expect(
provider.generateVideo({
provider: "vydra",
model: "kling",
prompt: "animate this image",
cfg: {},
inputImages: [{ buffer: Buffer.from("png"), mimeType: "image/png" }],
}),
).rejects.toThrow("Vydra kling currently requires a remote image URL reference.");
});
});

View File

@ -0,0 +1,165 @@
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import {
assertOkOrThrowHttpError,
postJsonRequest,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
import type { VideoGenerationProvider } from "openclaw/plugin-sdk/video-generation";
import {
DEFAULT_VYDRA_BASE_URL,
DEFAULT_VYDRA_VIDEO_MODEL,
downloadVydraAsset,
extractVydraResultUrls,
resolveVydraBaseUrlFromConfig,
resolveVydraErrorMessage,
resolveVydraResponseJobId,
resolveVydraResponseStatus,
waitForVydraJob,
} from "./shared.js";
const VYDRA_KLING_MODEL = "kling";
function resolveVydraVideoRequestBody(
req: Parameters<VideoGenerationProvider["generateVideo"]>[0],
) {
const model = req.model?.trim() || DEFAULT_VYDRA_VIDEO_MODEL;
if (model === VYDRA_KLING_MODEL) {
const input = req.inputImages?.[0];
const imageUrl = input?.url?.trim();
if (!imageUrl) {
throw new Error("Vydra kling currently requires a remote image URL reference.");
}
return {
model,
body: {
prompt: req.prompt,
image_url: imageUrl,
},
};
}
if ((req.inputImages?.length ?? 0) > 0) {
throw new Error(
`Vydra ${model} does not support image reference inputs in the bundled plugin.`,
);
}
return {
model,
body: {
prompt: req.prompt,
},
};
}
export function buildVydraVideoGenerationProvider(): VideoGenerationProvider {
return {
id: "vydra",
label: "Vydra",
defaultModel: DEFAULT_VYDRA_VIDEO_MODEL,
models: [DEFAULT_VYDRA_VIDEO_MODEL, VYDRA_KLING_MODEL],
isConfigured: ({ agentDir }) =>
isProviderApiKeyConfigured({
provider: "vydra",
agentDir,
}),
capabilities: {
maxVideos: 1,
maxInputImages: 1,
maxInputVideos: 0,
},
async generateVideo(req) {
if ((req.inputVideos?.length ?? 0) > 0) {
throw new Error("Vydra video generation does not support video reference inputs.");
}
const auth = await resolveApiKeyForProvider({
provider: "vydra",
cfg: req.cfg,
agentDir: req.agentDir,
store: req.authStore,
});
if (!auth.apiKey) {
throw new Error("Vydra API key missing");
}
const fetchFn = fetch;
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
resolveProviderHttpRequestConfig({
baseUrl: resolveVydraBaseUrlFromConfig(req.cfg),
defaultBaseUrl: DEFAULT_VYDRA_BASE_URL,
allowPrivateNetwork: false,
defaultHeaders: {
Authorization: `Bearer ${auth.apiKey}`,
"Content-Type": "application/json",
},
provider: "vydra",
capability: "video",
transport: "http",
});
const { model, body } = resolveVydraVideoRequestBody(req);
const { response, release } = await postJsonRequest({
url: `${baseUrl}/models/${model}`,
headers,
body,
timeoutMs: req.timeoutMs,
fetchFn,
allowPrivateNetwork,
dispatcherPolicy,
});
try {
await assertOkOrThrowHttpError(response, "Vydra video generation failed");
const submitted = await response.json();
const completedPayload =
resolveVydraResponseStatus(submitted) === "completed" ||
extractVydraResultUrls(submitted, "video").length > 0
? submitted
: await (() => {
const jobId = resolveVydraResponseJobId(submitted);
if (!jobId) {
throw new Error(
resolveVydraErrorMessage(submitted) ??
"Vydra video generation response missing job id",
);
}
return waitForVydraJob({
baseUrl,
jobId,
headers,
timeoutMs: req.timeoutMs,
fetchFn,
kind: "video",
});
})();
const videoUrl = extractVydraResultUrls(completedPayload, "video")[0];
if (!videoUrl) {
throw new Error("Vydra video generation completed without a video URL");
}
const video = await downloadVydraAsset({
url: videoUrl,
kind: "video",
timeoutMs: req.timeoutMs,
fetchFn,
});
return {
videos: [
{
buffer: video.buffer,
mimeType: video.mimeType,
fileName: video.fileName,
},
],
model,
metadata: {
jobId:
resolveVydraResponseJobId(completedPayload) ?? resolveVydraResponseJobId(submitted),
videoUrl,
status: resolveVydraResponseStatus(completedPayload) ?? "completed",
},
};
} finally {
await release();
}
},
};
}

View File

@ -0,0 +1,81 @@
import { describe, expect, it } from "vitest";
import { isLiveTestEnabled } from "../../src/agents/live-test-helpers.js";
import {
registerProviderPlugin,
requireRegisteredProvider,
} from "../../test/helpers/plugins/provider-registration.js";
import plugin from "./index.js";
const LIVE = isLiveTestEnabled();
const VYDRA_API_KEY = process.env.VYDRA_API_KEY?.trim() ?? "";
const ENABLE_VYDRA_VIDEO_LIVE = process.env.OPENCLAW_LIVE_VYDRA_VIDEO === "1";
const LIVE_IMAGE_MODEL = process.env.OPENCLAW_LIVE_VYDRA_IMAGE_MODEL?.trim() || "grok-imagine";
const LIVE_VIDEO_MODEL = process.env.OPENCLAW_LIVE_VYDRA_VIDEO_MODEL?.trim() || "veo3";
const registerVydraPlugin = () =>
registerProviderPlugin({
plugin,
id: "vydra",
name: "Vydra Provider",
});
describe.skipIf(!LIVE || !VYDRA_API_KEY)("vydra live", () => {
it("generates an image through the registered provider", async () => {
const { imageProviders } = await registerVydraPlugin();
const provider = requireRegisteredProvider(imageProviders, "vydra");
const result = await provider.generateImage({
provider: "vydra",
model: LIVE_IMAGE_MODEL,
prompt: "Create a minimal flat orange square centered on a white background.",
cfg: { plugins: { enabled: true } } as never,
agentDir: "/tmp/openclaw-live-vydra-image",
});
expect(result.images.length).toBeGreaterThan(0);
expect(result.images[0]?.mimeType.startsWith("image/")).toBe(true);
expect(result.images[0]?.buffer.byteLength).toBeGreaterThan(512);
}, 60_000);
it("synthesizes speech through the registered provider", async () => {
const { speechProviders } = await registerVydraPlugin();
const provider = requireRegisteredProvider(speechProviders, "vydra");
const voices = await provider.listVoices?.({});
expect(voices).toEqual(
expect.arrayContaining([expect.objectContaining({ id: "21m00Tcm4TlvDq8ikWAM" })]),
);
const result = await provider.synthesize({
text: "OpenClaw integration test OK.",
cfg: { plugins: { enabled: true } } as never,
providerConfig: { apiKey: VYDRA_API_KEY },
target: "audio-file",
timeoutMs: 45_000,
});
expect(result.outputFormat).toBe("mp3");
expect(result.audioBuffer.byteLength).toBeGreaterThan(512);
}, 60_000);
it.skipIf(!ENABLE_VYDRA_VIDEO_LIVE)(
"generates a short video through the registered provider",
async () => {
const { videoProviders } = await registerVydraPlugin();
const provider = requireRegisteredProvider(videoProviders, "vydra");
const result = await provider.generateVideo({
provider: "vydra",
model: LIVE_VIDEO_MODEL,
prompt:
"A tiny paper diorama city at sunrise with slow cinematic camera motion and no text.",
cfg: { plugins: { enabled: true } } as never,
agentDir: "/tmp/openclaw-live-vydra-video",
});
expect(result.videos.length).toBeGreaterThan(0);
expect(result.videos[0]?.mimeType.startsWith("video/")).toBe(true);
expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(1024);
},
8 * 60_000,
);
});

View File

@ -4,6 +4,7 @@ import type { OpenClawConfig } from "../config/config.js";
export const DEFAULT_LIVE_IMAGE_MODELS: Record<string, string> = {
google: "google/gemini-3.1-flash-image-preview",
openai: "openai/gpt-image-1",
vydra: "vydra/grok-imagine",
};
export function parseCaseFilter(raw?: string): Set<string> | null {

View File

@ -175,6 +175,18 @@ describeLive("image generation live (provider sweep)", () => {
size: "1024x1024",
});
}
if (availableProviders.includes("vydra")) {
liveCases.push({
id: "vydra:default-generate",
providerId: "vydra",
modelRef:
envModelMap.get("vydra") ??
configuredModels.get("vydra") ??
DEFAULT_LIVE_IMAGE_MODELS.vydra,
prompt:
"Create a minimal flat illustration of an orange cat face sticker on a white background.",
});
}
const selectedCases = liveCases.filter((entry) =>
caseFilter ? caseFilter.has(entry.id.toLowerCase()) : true,