mirror of https://github.com/openclaw/openclaw.git
feat(video): add xai and alibaba providers
This commit is contained in:
parent
5e0b58fbc6
commit
a62193c09e
|
|
@ -29,6 +29,10 @@ Docs: https://docs.openclaw.ai
|
|||
- Agents/Claude CLI: expose OpenClaw tools to background Claude CLI runs through a loopback MCP bridge and switch bundled runs to stdin + `stream-json` partial-message streaming so prompts stop riding argv, long replies show live progress, and final session/usage metadata still land cleanly. (#35676) Thanks @mylukin.
|
||||
- ACPX/runtime: embed the ACP runtime directly in the bundled `acpx` plugin, remove the extra external ACP CLI hop, harden live ACP session binding and reuse, and add a generic `reply_dispatch` hook so bundled plugins like ACPX can own reply interception without hardcoded ACP paths in core auto-reply routing. (#61319)
|
||||
- Config/schema: enrich the exported `openclaw config schema` JSON Schema with field titles and descriptions so editors, agents, and other schema consumers receive the same config help metadata. (#60067) Thanks @solavrc.
|
||||
- Agents/cache: diagnostics: add prompt-cache break diagnostics, trace live cache scenarios through embedded runner paths, and show cache reuse explicitly in `openclaw status --verbose`. Thanks @vincentkoc.
|
||||
- Agents/cache: stabilize cache-relevant system prompt fingerprints by normalizing equivalent structured prompt whitespace, line endings, hook-added system context, and runtime capability ordering so semantically unchanged prompts reuse KV/cache more reliably. Thanks @vincentkoc.
|
||||
- Agents/tool prompts: remove the duplicate in-band tool inventory from agent system prompts so tool-calling models rely on the structured tool definitions as the single source of truth, improving prompt stability and reducing stale tool guidance.
|
||||
- Tools/video generation: add bundled xAI (`grok-imagine-video`) and Alibaba Model Studio Wan video providers, plus live-test/default model wiring for both.
|
||||
- Providers/CLI: remove bundled CLI text-provider backends and the `agents.defaults.cliBackends` surface, while keeping ACP harness sessions and Gemini media understanding on the native bundled providers.
|
||||
- Matrix/exec approvals: clarify unavailable-approval replies so Matrix no longer claims chat approvals are unsupported when native exec approvals are merely unconfigured. (#61424) Thanks @gumadeiras.
|
||||
- Docs/IRC: replace public IRC hostname examples with `irc.example.com` and recommend private servers for bot coordination while listing common public networks for intentional use.
|
||||
|
|
@ -101,6 +105,7 @@ Docs: https://docs.openclaw.ai
|
|||
- Agents/errors: surface an explicit disk-full message when local session or transcript writes fail with `ENOSPC`/`disk full`, so those runs stop degrading into opaque `NO_REPLY`-style failures. Thanks @vincentkoc.
|
||||
- Exec approvals: remove heuristic command-obfuscation gating from host exec so gateway and node runs rely on explicit policy, allowlist, and strict inline-eval rules only.
|
||||
- Agents/tool results: cap live tool-result persistence and overflow-recovery truncation at 40k characters so oversized tool output stays bounded without discarding recent context entirely.
|
||||
- Discord/video replies: split text-plus-video deliveries into a text reply followed by a media-only send, and let live provider auth checks honor manifest-declared API key env vars like `MODELSTUDIO_API_KEY`.
|
||||
- Config/All Settings: keep the raw config view intact when sensitive fields are blank instead of corrupting or dropping the rendered snapshot. (#28214) Thanks @solodmd.
|
||||
- Plugin SDK/facades: back-fill bundled plugin facade sentinels before plugin-id tracking re-enters config loading, so CLI/provider startup no longer crashes with `shouldNormalizeGoogleProviderConfig is not a function` or other empty-facade reads during bundled plugin re-entry. Thanks @adam91holt.
|
||||
- Plugins/facades: back-fill facade sentinels before tracked-plugin resolution re-enters config loading, so facade exports stay defined during circular provider normalization. (#61180) Thanks @adam91holt.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
summary: "Generate videos using configured providers such as OpenAI, Google, Qwen, and MiniMax"
|
||||
summary: "Generate videos using configured providers such as Alibaba, OpenAI, Google, Qwen, and MiniMax"
|
||||
read_when:
|
||||
- Generating videos via the agent
|
||||
- Configuring video generation providers and models
|
||||
|
|
@ -17,7 +17,7 @@ The tool only appears when at least one video-generation provider is available.
|
|||
|
||||
## Quick start
|
||||
|
||||
1. Set an API key for at least one provider (for example `OPENAI_API_KEY`, `GEMINI_API_KEY`, or `QWEN_API_KEY`).
|
||||
1. Set an API key for at least one provider (for example `OPENAI_API_KEY`, `GEMINI_API_KEY`, `MODELSTUDIO_API_KEY`, or `QWEN_API_KEY`).
|
||||
2. Optionally set your preferred model:
|
||||
|
||||
```json5
|
||||
|
|
@ -38,6 +38,7 @@ The agent calls `video_generate` automatically. No tool allow-listing needed —
|
|||
|
||||
| Provider | Default model | Reference inputs | API key |
|
||||
| -------- | ------------------------------- | ------------------ | ---------------------------------------------------------- |
|
||||
| Alibaba | `wan2.6-t2v` | Yes, remote URLs | `MODELSTUDIO_API_KEY`, `DASHSCOPE_API_KEY`, `QWEN_API_KEY` |
|
||||
| BytePlus | `seedance-1-0-lite-t2v-250428` | 1 image | `BYTEPLUS_API_KEY` |
|
||||
| fal | `fal-ai/minimax/video-01-live` | 1 image | `FAL_KEY` |
|
||||
| Google | `veo-3.1-fast-generate-preview` | 1 image or 1 video | `GEMINI_API_KEY`, `GOOGLE_API_KEY` |
|
||||
|
|
@ -45,6 +46,7 @@ The agent calls `video_generate` automatically. No tool allow-listing needed —
|
|||
| OpenAI | `sora-2` | 1 image or 1 video | `OPENAI_API_KEY` |
|
||||
| Qwen | `wan2.6-t2v` | Yes, remote URLs | `QWEN_API_KEY`, `MODELSTUDIO_API_KEY`, `DASHSCOPE_API_KEY` |
|
||||
| Together | `Wan-AI/Wan2.2-T2V-A14B` | 1 image | `TOGETHER_API_KEY` |
|
||||
| xAI | `grok-imagine-video` | 1 image or 1 video | `XAI_API_KEY` |
|
||||
|
||||
Use `action: "list"` to inspect available providers and models at runtime:
|
||||
|
||||
|
|
@ -105,10 +107,12 @@ If a provider fails, the next candidate is tried automatically. If all fail, the
|
|||
|
||||
## Provider notes
|
||||
|
||||
- OpenAI uses the native video endpoint and currently defaults to `sora-2`.
|
||||
- Alibaba uses the DashScope / Model Studio async video endpoint and currently requires remote `http(s)` URLs for reference assets.
|
||||
- Google uses Gemini/Veo and supports a single image or video reference input.
|
||||
- MiniMax, Together, BytePlus, and fal currently support a single image reference input.
|
||||
- OpenAI uses the native video endpoint and currently defaults to `sora-2`.
|
||||
- Qwen supports image/video references, but the upstream DashScope video endpoint currently requires remote `http(s)` URLs for those references.
|
||||
- xAI uses the native xAI video API and supports text-to-video, image-to-video, and remote video edit/extend flows.
|
||||
|
||||
## Qwen reference inputs
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import { buildAlibabaVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
export default definePluginEntry({
|
||||
id: "alibaba",
|
||||
name: "Alibaba Model Studio Plugin",
|
||||
description: "Bundled Alibaba Model Studio video provider plugin",
|
||||
register(api) {
|
||||
api.registerVideoGenerationProvider(buildAlibabaVideoGenerationProvider());
|
||||
},
|
||||
});
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
{
|
||||
"id": "alibaba",
|
||||
"enabledByDefault": true,
|
||||
"providerAuthEnvVars": {
|
||||
"alibaba": ["MODELSTUDIO_API_KEY", "DASHSCOPE_API_KEY", "QWEN_API_KEY"]
|
||||
},
|
||||
"providerAuthChoices": [
|
||||
{
|
||||
"provider": "alibaba",
|
||||
"method": "api-key",
|
||||
"choiceId": "alibaba-model-studio-api-key",
|
||||
"choiceLabel": "Alibaba Model Studio API key",
|
||||
"groupId": "alibaba",
|
||||
"groupLabel": "Alibaba Model Studio",
|
||||
"groupHint": "DashScope / Model Studio API key",
|
||||
"optionKey": "alibabaModelStudioApiKey",
|
||||
"cliFlag": "--alibaba-model-studio-api-key",
|
||||
"cliOption": "--alibaba-model-studio-api-key <key>",
|
||||
"cliDescription": "Alibaba Model Studio API key"
|
||||
}
|
||||
],
|
||||
"contracts": {
|
||||
"videoGenerationProviders": ["alibaba"]
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"name": "@openclaw/alibaba-provider",
|
||||
"version": "2026.4.5",
|
||||
"private": true,
|
||||
"description": "OpenClaw Alibaba Model Studio video provider plugin",
|
||||
"type": "module",
|
||||
"openclaw": {
|
||||
"extensions": [
|
||||
"./index.ts"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
|
||||
|
||||
describePluginRegistrationContract({
|
||||
pluginId: "alibaba",
|
||||
videoGenerationProviderIds: ["alibaba"],
|
||||
requireGenerateVideo: true,
|
||||
});
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildAlibabaVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const {
|
||||
resolveApiKeyForProviderMock,
|
||||
postJsonRequestMock,
|
||||
fetchWithTimeoutMock,
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "alibaba-key" })),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
fetchWithTimeoutMock: vi.fn(),
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl,
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
fetchWithTimeout: fetchWithTimeoutMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("alibaba video generation provider", () => {
|
||||
afterEach(() => {
|
||||
resolveApiKeyForProviderMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
fetchWithTimeoutMock.mockReset();
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
});
|
||||
|
||||
it("submits async Wan generation, polls task status, and downloads the resulting video", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
request_id: "req-1",
|
||||
output: {
|
||||
task_id: "task-1",
|
||||
},
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
output: {
|
||||
task_status: "SUCCEEDED",
|
||||
results: [{ video_url: "https://example.com/out.mp4" }],
|
||||
},
|
||||
}),
|
||||
headers: new Headers(),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
arrayBuffer: async () => Buffer.from("mp4-bytes"),
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
});
|
||||
|
||||
const provider = buildAlibabaVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "alibaba",
|
||||
model: "wan2.6-r2v-flash",
|
||||
prompt: "animate this shot",
|
||||
cfg: {},
|
||||
inputImages: [{ url: "https://example.com/ref.png" }],
|
||||
durationSeconds: 6,
|
||||
audio: true,
|
||||
watermark: false,
|
||||
});
|
||||
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis",
|
||||
body: expect.objectContaining({
|
||||
model: "wan2.6-r2v-flash",
|
||||
input: expect.objectContaining({
|
||||
prompt: "animate this shot",
|
||||
img_url: "https://example.com/ref.png",
|
||||
}),
|
||||
parameters: expect.objectContaining({
|
||||
duration: 6,
|
||||
enable_audio: true,
|
||||
watermark: false,
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"https://dashscope-intl.aliyuncs.com/api/v1/tasks/task-1",
|
||||
expect.objectContaining({ method: "GET" }),
|
||||
120000,
|
||||
fetch,
|
||||
);
|
||||
expect(result.videos).toHaveLength(1);
|
||||
expect(result.videos[0]?.mimeType).toBe("video/mp4");
|
||||
expect(result.metadata).toEqual(
|
||||
expect.objectContaining({
|
||||
requestId: "req-1",
|
||||
taskId: "task-1",
|
||||
taskStatus: "SUCCEEDED",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("fails fast when reference inputs are local buffers instead of remote URLs", async () => {
|
||||
const provider = buildAlibabaVideoGenerationProvider();
|
||||
|
||||
await expect(
|
||||
provider.generateVideo({
|
||||
provider: "alibaba",
|
||||
model: "wan2.6-i2v",
|
||||
prompt: "animate this local frame",
|
||||
cfg: {},
|
||||
inputImages: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
"Alibaba Wan video generation currently requires remote http(s) URLs for reference images/videos.",
|
||||
);
|
||||
expect(postJsonRequestMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,293 @@
|
|||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
fetchWithTimeout,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
VideoGenerationResult,
|
||||
VideoGenerationSourceAsset,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
|
||||
const DEFAULT_ALIBABA_VIDEO_BASE_URL = "https://dashscope-intl.aliyuncs.com";
|
||||
const DEFAULT_ALIBABA_VIDEO_MODEL = "wan2.6-t2v";
|
||||
const DEFAULT_DURATION_SECONDS = 5;
|
||||
const DEFAULT_TIMEOUT_MS = 120_000;
|
||||
const POLL_INTERVAL_MS = 2_500;
|
||||
const MAX_POLL_ATTEMPTS = 120;
|
||||
const RESOLUTION_TO_SIZE: Record<string, string> = {
|
||||
"480P": "832*480",
|
||||
"720P": "1280*720",
|
||||
"1080P": "1920*1080",
|
||||
};
|
||||
|
||||
type AlibabaVideoGenerationResponse = {
|
||||
output?: {
|
||||
task_id?: string;
|
||||
task_status?: string;
|
||||
submit_time?: string;
|
||||
results?: Array<{
|
||||
video_url?: string;
|
||||
orig_prompt?: string;
|
||||
actual_prompt?: string;
|
||||
}>;
|
||||
video_url?: string;
|
||||
code?: string;
|
||||
message?: string;
|
||||
};
|
||||
request_id?: string;
|
||||
code?: string;
|
||||
message?: string;
|
||||
};
|
||||
|
||||
function resolveAlibabaVideoBaseUrl(req: VideoGenerationRequest): string {
|
||||
return req.cfg?.models?.providers?.alibaba?.baseUrl?.trim() || DEFAULT_ALIBABA_VIDEO_BASE_URL;
|
||||
}
|
||||
|
||||
function resolveDashscopeAigcApiBaseUrl(baseUrl: string): string {
|
||||
return baseUrl.replace(/\/+$/u, "");
|
||||
}
|
||||
|
||||
function resolveReferenceUrls(
|
||||
inputImages: VideoGenerationSourceAsset[] | undefined,
|
||||
inputVideos: VideoGenerationSourceAsset[] | undefined,
|
||||
): string[] {
|
||||
return [...(inputImages ?? []), ...(inputVideos ?? [])]
|
||||
.map((asset) => asset.url?.trim())
|
||||
.filter((value): value is string => Boolean(value));
|
||||
}
|
||||
|
||||
function assertAlibabaReferenceInputsSupported(
|
||||
inputImages: VideoGenerationSourceAsset[] | undefined,
|
||||
inputVideos: VideoGenerationSourceAsset[] | undefined,
|
||||
): void {
|
||||
const unsupported = [...(inputImages ?? []), ...(inputVideos ?? [])].some(
|
||||
(asset) => !asset.url?.trim() && asset.buffer,
|
||||
);
|
||||
if (unsupported) {
|
||||
throw new Error(
|
||||
"Alibaba Wan video generation currently requires remote http(s) URLs for reference images/videos.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function buildAlibabaVideoGenerationInput(req: VideoGenerationRequest): Record<string, unknown> {
|
||||
assertAlibabaReferenceInputsSupported(req.inputImages, req.inputVideos);
|
||||
const input: Record<string, unknown> = {
|
||||
prompt: req.prompt,
|
||||
};
|
||||
const referenceUrls = resolveReferenceUrls(req.inputImages, req.inputVideos);
|
||||
if (
|
||||
referenceUrls.length === 1 &&
|
||||
(req.inputImages?.length ?? 0) === 1 &&
|
||||
!req.inputVideos?.length
|
||||
) {
|
||||
input.img_url = referenceUrls[0];
|
||||
} else if (referenceUrls.length > 0) {
|
||||
input.reference_urls = referenceUrls;
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
function buildAlibabaVideoGenerationParameters(
|
||||
req: VideoGenerationRequest,
|
||||
): Record<string, unknown> | undefined {
|
||||
const parameters: Record<string, unknown> = {};
|
||||
const size =
|
||||
req.size?.trim() || (req.resolution ? RESOLUTION_TO_SIZE[req.resolution] : undefined);
|
||||
if (size) {
|
||||
parameters.size = size;
|
||||
}
|
||||
if (req.aspectRatio?.trim()) {
|
||||
parameters.aspect_ratio = req.aspectRatio.trim();
|
||||
}
|
||||
if (typeof req.durationSeconds === "number" && Number.isFinite(req.durationSeconds)) {
|
||||
parameters.duration = Math.max(1, Math.round(req.durationSeconds));
|
||||
}
|
||||
if (typeof req.audio === "boolean") {
|
||||
parameters.enable_audio = req.audio;
|
||||
}
|
||||
if (typeof req.watermark === "boolean") {
|
||||
parameters.watermark = req.watermark;
|
||||
}
|
||||
return Object.keys(parameters).length > 0 ? parameters : undefined;
|
||||
}
|
||||
|
||||
function extractVideoUrls(payload: AlibabaVideoGenerationResponse): string[] {
|
||||
const urls = [
|
||||
...(payload.output?.results?.map((entry) => entry.video_url).filter(Boolean) ?? []),
|
||||
payload.output?.video_url,
|
||||
].filter((value): value is string => typeof value === "string" && value.trim().length > 0);
|
||||
return [...new Set(urls)];
|
||||
}
|
||||
|
||||
async function pollTaskUntilComplete(params: {
|
||||
taskId: string;
|
||||
headers: Headers;
|
||||
timeoutMs?: number;
|
||||
fetchFn: typeof fetch;
|
||||
baseUrl: string;
|
||||
}): Promise<AlibabaVideoGenerationResponse> {
|
||||
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
|
||||
const response = await fetchWithTimeout(
|
||||
`${params.baseUrl}/api/v1/tasks/${params.taskId}`,
|
||||
{
|
||||
method: "GET",
|
||||
headers: params.headers,
|
||||
},
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "Alibaba Wan video-generation task poll failed");
|
||||
const payload = (await response.json()) as AlibabaVideoGenerationResponse;
|
||||
const status = payload.output?.task_status?.trim().toUpperCase();
|
||||
if (status === "SUCCEEDED") {
|
||||
return payload;
|
||||
}
|
||||
if (status === "FAILED" || status === "CANCELED") {
|
||||
throw new Error(
|
||||
payload.output?.message?.trim() ||
|
||||
payload.message?.trim() ||
|
||||
`Alibaba Wan video generation task ${params.taskId} ${status?.toLowerCase()}`,
|
||||
);
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
||||
}
|
||||
throw new Error(`Alibaba Wan video generation task ${params.taskId} did not finish in time`);
|
||||
}
|
||||
|
||||
async function downloadGeneratedVideos(params: {
|
||||
urls: string[];
|
||||
timeoutMs?: number;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<GeneratedVideoAsset[]> {
|
||||
const videos: GeneratedVideoAsset[] = [];
|
||||
for (const [index, url] of params.urls.entries()) {
|
||||
const response = await fetchWithTimeout(
|
||||
url,
|
||||
{ method: "GET" },
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "Alibaba Wan generated video download failed");
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
videos.push({
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
mimeType: response.headers.get("content-type")?.trim() || "video/mp4",
|
||||
fileName: `video-${index + 1}.mp4`,
|
||||
metadata: { sourceUrl: url },
|
||||
});
|
||||
}
|
||||
return videos;
|
||||
}
|
||||
|
||||
export function buildAlibabaVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "alibaba",
|
||||
label: "Alibaba Model Studio",
|
||||
defaultModel: DEFAULT_ALIBABA_VIDEO_MODEL,
|
||||
models: ["wan2.6-t2v", "wan2.6-i2v", "wan2.6-r2v", "wan2.6-r2v-flash", "wan2.7-r2v"],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "alibaba",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
maxInputVideos: 4,
|
||||
maxDurationSeconds: 10,
|
||||
supportsSize: true,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
supportsAudio: true,
|
||||
supportsWatermark: true,
|
||||
},
|
||||
async generateVideo(req): Promise<VideoGenerationResult> {
|
||||
const fetchFn = fetch;
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "alibaba",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("Alibaba Model Studio API key missing");
|
||||
}
|
||||
|
||||
const requestBaseUrl = resolveAlibabaVideoBaseUrl(req);
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: requestBaseUrl,
|
||||
defaultBaseUrl: DEFAULT_ALIBABA_VIDEO_BASE_URL,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
"X-DashScope-Async": "enable",
|
||||
},
|
||||
provider: "alibaba",
|
||||
capability: "video",
|
||||
transport: "http",
|
||||
});
|
||||
|
||||
const model = req.model?.trim() || DEFAULT_ALIBABA_VIDEO_MODEL;
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${resolveDashscopeAigcApiBaseUrl(baseUrl)}/api/v1/services/aigc/video-generation/video-synthesis`,
|
||||
headers,
|
||||
body: {
|
||||
model,
|
||||
input: buildAlibabaVideoGenerationInput(req),
|
||||
parameters: buildAlibabaVideoGenerationParameters({
|
||||
...req,
|
||||
durationSeconds: req.durationSeconds ?? DEFAULT_DURATION_SECONDS,
|
||||
}),
|
||||
},
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "Alibaba Wan video generation failed");
|
||||
const submitted = (await response.json()) as AlibabaVideoGenerationResponse;
|
||||
const taskId = submitted.output?.task_id?.trim();
|
||||
if (!taskId) {
|
||||
throw new Error("Alibaba Wan video generation response missing task_id");
|
||||
}
|
||||
const completed = await pollTaskUntilComplete({
|
||||
taskId,
|
||||
headers,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
baseUrl: resolveDashscopeAigcApiBaseUrl(baseUrl),
|
||||
});
|
||||
const urls = extractVideoUrls(completed);
|
||||
if (urls.length === 0) {
|
||||
throw new Error("Alibaba Wan video generation completed without output video URLs");
|
||||
}
|
||||
const videos = await downloadGeneratedVideos({
|
||||
urls,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
});
|
||||
return {
|
||||
videos,
|
||||
model,
|
||||
metadata: {
|
||||
requestId: submitted.request_id,
|
||||
taskId,
|
||||
taskStatus: completed.output?.task_status,
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -138,8 +138,9 @@ describe("discordPlugin outbound", () => {
|
|||
expect(resolveReplyToMode({ cfg, accountId: "default" })).toBe("all");
|
||||
});
|
||||
|
||||
it("forwards mediaLocalRoots to sendMessageDiscord", async () => {
|
||||
it("forwards full media send context to sendMessageDiscord", async () => {
|
||||
const sendMessageDiscord = vi.fn(async () => ({ messageId: "m1" }));
|
||||
const mediaReadFile = vi.fn(async () => Buffer.from("media"));
|
||||
|
||||
const result = await discordPlugin.outbound!.sendMedia!({
|
||||
cfg: {} as OpenClawConfig,
|
||||
|
|
@ -147,23 +148,102 @@ describe("discordPlugin outbound", () => {
|
|||
text: "hi",
|
||||
mediaUrl: "/tmp/image.png",
|
||||
mediaLocalRoots: ["/tmp/agent-root"],
|
||||
mediaReadFile,
|
||||
accountId: "work",
|
||||
threadId: "thread-123",
|
||||
replyToId: "reply-123",
|
||||
deps: {
|
||||
discord: sendMessageDiscord,
|
||||
},
|
||||
});
|
||||
|
||||
expect(sendMessageDiscord).toHaveBeenCalledWith(
|
||||
"channel:123",
|
||||
"channel:thread-123",
|
||||
"hi",
|
||||
expect.objectContaining({
|
||||
mediaUrl: "/tmp/image.png",
|
||||
mediaLocalRoots: ["/tmp/agent-root"],
|
||||
mediaReadFile,
|
||||
replyTo: "reply-123",
|
||||
}),
|
||||
);
|
||||
expect(result).toMatchObject({ channel: "discord", messageId: "m1" });
|
||||
});
|
||||
|
||||
it("splits text and video into separate sends for attached outbound delivery", async () => {
|
||||
const sendMessageDiscord = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ messageId: "text-1" })
|
||||
.mockResolvedValueOnce({ messageId: "video-1" });
|
||||
|
||||
const result = await discordPlugin.outbound!.sendMedia!({
|
||||
cfg: {} as OpenClawConfig,
|
||||
to: "channel:123",
|
||||
text: "done - tiny cyber-lobster clip incoming",
|
||||
mediaUrl: "/tmp/molty.mp4",
|
||||
accountId: "work",
|
||||
replyToId: "reply-123",
|
||||
threadId: "thread-123",
|
||||
deps: {
|
||||
discord: sendMessageDiscord,
|
||||
},
|
||||
});
|
||||
|
||||
expect(sendMessageDiscord).toHaveBeenCalledTimes(2);
|
||||
expect(sendMessageDiscord).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"channel:thread-123",
|
||||
"done - tiny cyber-lobster clip incoming",
|
||||
expect.objectContaining({
|
||||
replyTo: "reply-123",
|
||||
}),
|
||||
);
|
||||
expect(sendMessageDiscord).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
"channel:thread-123",
|
||||
"",
|
||||
expect.objectContaining({
|
||||
mediaUrl: "/tmp/molty.mp4",
|
||||
}),
|
||||
);
|
||||
expect(result).toMatchObject({ channel: "discord", messageId: "video-1" });
|
||||
});
|
||||
|
||||
it("threads poll sends through the thread target", async () => {
|
||||
const sendPollDiscord = vi.fn(async () => ({
|
||||
channelId: "channel:thread-123",
|
||||
messageId: "poll-1",
|
||||
}));
|
||||
const sendModule = await import("./send.js");
|
||||
const sendPollSpy = vi.spyOn(sendModule, "sendPollDiscord").mockImplementation(sendPollDiscord);
|
||||
try {
|
||||
const result = await discordPlugin.outbound!.sendPoll!({
|
||||
cfg: {} as OpenClawConfig,
|
||||
to: "channel:123",
|
||||
poll: {
|
||||
question: "Best shell?",
|
||||
options: ["molty", "molter"],
|
||||
},
|
||||
accountId: "work",
|
||||
threadId: "thread-123",
|
||||
});
|
||||
|
||||
expect(sendPollDiscord).toHaveBeenCalledWith(
|
||||
"channel:thread-123",
|
||||
{
|
||||
question: "Best shell?",
|
||||
options: ["molty", "molter"],
|
||||
},
|
||||
expect.objectContaining({
|
||||
accountId: "work",
|
||||
}),
|
||||
);
|
||||
expect(result).toMatchObject({ channel: "discord", messageId: "poll-1" });
|
||||
} finally {
|
||||
sendPollSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
it("uses direct Discord probe helpers for status probes", async () => {
|
||||
const runtimeProbeDiscord = vi.fn(async () => {
|
||||
throw new Error("runtime Discord probe should not be used");
|
||||
|
|
|
|||
|
|
@ -134,6 +134,43 @@ const meta = {
|
|||
};
|
||||
const REQUIRED_DISCORD_PERMISSIONS = ["ViewChannel", "SendMessages"] as const;
|
||||
const DISCORD_ACCOUNT_STARTUP_STAGGER_MS = 10_000;
|
||||
const DISCORD_VIDEO_MEDIA_EXTENSIONS = new Set([".avi", ".m4v", ".mkv", ".mov", ".mp4", ".webm"]);
|
||||
|
||||
function normalizeMediaPathForExtension(mediaUrl: string): string {
|
||||
const trimmed = mediaUrl.trim();
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
}
|
||||
try {
|
||||
const parsed = new URL(trimmed);
|
||||
return parsed.pathname.toLowerCase();
|
||||
} catch {
|
||||
const withoutHash = trimmed.split("#", 1)[0] ?? trimmed;
|
||||
const withoutQuery = withoutHash.split("?", 1)[0] ?? withoutHash;
|
||||
return withoutQuery.toLowerCase();
|
||||
}
|
||||
}
|
||||
|
||||
function isLikelyDiscordVideoMedia(mediaUrl: string): boolean {
|
||||
const normalized = normalizeMediaPathForExtension(mediaUrl);
|
||||
for (const ext of DISCORD_VIDEO_MEDIA_EXTENSIONS) {
|
||||
if (normalized.endsWith(ext)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function resolveDiscordAttachedOutboundTarget(params: {
|
||||
to: string;
|
||||
threadId?: string | number | null;
|
||||
}): string {
|
||||
if (params.threadId == null) {
|
||||
return params.to;
|
||||
}
|
||||
const threadId = String(params.threadId).trim();
|
||||
return threadId ? `channel:${threadId}` : params.to;
|
||||
}
|
||||
|
||||
function resolveRuntimeDiscordMessageActions() {
|
||||
try {
|
||||
|
|
@ -823,9 +860,9 @@ export const discordPlugin: ChannelPlugin<ResolvedDiscordAccount, DiscordProbe>
|
|||
},
|
||||
attachedResults: {
|
||||
channel: "discord",
|
||||
sendText: async ({ cfg, to, text, accountId, deps, replyToId, silent }) => {
|
||||
sendText: async ({ cfg, to, text, accountId, deps, replyToId, threadId, silent }) => {
|
||||
const send = await resolveDiscordSend(deps);
|
||||
return await send(to, text, {
|
||||
return await send(resolveDiscordAttachedOutboundTarget({ to, threadId }), text, {
|
||||
verbose: false,
|
||||
cfg,
|
||||
replyTo: replyToId ?? undefined,
|
||||
|
|
@ -839,26 +876,48 @@ export const discordPlugin: ChannelPlugin<ResolvedDiscordAccount, DiscordProbe>
|
|||
text,
|
||||
mediaUrl,
|
||||
mediaLocalRoots,
|
||||
mediaReadFile,
|
||||
accountId,
|
||||
deps,
|
||||
replyToId,
|
||||
threadId,
|
||||
silent,
|
||||
}) => {
|
||||
const send = await resolveDiscordSend(deps);
|
||||
return await send(to, text, {
|
||||
const target = resolveDiscordAttachedOutboundTarget({ to, threadId });
|
||||
if (text.trim() && mediaUrl && isLikelyDiscordVideoMedia(mediaUrl)) {
|
||||
await send(target, text, {
|
||||
verbose: false,
|
||||
cfg,
|
||||
replyTo: replyToId ?? undefined,
|
||||
accountId: accountId ?? undefined,
|
||||
silent: silent ?? undefined,
|
||||
});
|
||||
return await send(target, "", {
|
||||
verbose: false,
|
||||
cfg,
|
||||
mediaUrl,
|
||||
mediaLocalRoots,
|
||||
mediaReadFile,
|
||||
accountId: accountId ?? undefined,
|
||||
silent: silent ?? undefined,
|
||||
});
|
||||
}
|
||||
return await send(target, text, {
|
||||
verbose: false,
|
||||
cfg,
|
||||
mediaUrl,
|
||||
mediaLocalRoots,
|
||||
mediaReadFile,
|
||||
replyTo: replyToId ?? undefined,
|
||||
accountId: accountId ?? undefined,
|
||||
silent: silent ?? undefined,
|
||||
});
|
||||
},
|
||||
sendPoll: async ({ cfg, to, poll, accountId, silent }) =>
|
||||
sendPoll: async ({ cfg, to, poll, accountId, threadId, silent }) =>
|
||||
await (
|
||||
await loadDiscordSendModule()
|
||||
).sendPollDiscord(to, poll, {
|
||||
).sendPollDiscord(resolveDiscordAttachedOutboundTarget({ to, threadId }), poll, {
|
||||
cfg,
|
||||
accountId: accountId ?? undefined,
|
||||
silent: silent ?? undefined,
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import {
|
|||
registerProviderPlugin,
|
||||
requireRegisteredProvider,
|
||||
} from "../test/helpers/plugins/provider-registration.js";
|
||||
import alibabaPlugin from "./alibaba/index.js";
|
||||
import byteplusPlugin from "./byteplus/index.js";
|
||||
import falPlugin from "./fal/index.js";
|
||||
import googlePlugin from "./google/index.js";
|
||||
|
|
@ -20,6 +21,7 @@ import minimaxPlugin from "./minimax/index.js";
|
|||
import openaiPlugin from "./openai/index.js";
|
||||
import qwenPlugin from "./qwen/index.js";
|
||||
import togetherPlugin from "./together/index.js";
|
||||
import xaiPlugin from "./xai/index.js";
|
||||
|
||||
const LIVE = isLiveTestEnabled();
|
||||
const providerFilter = parseCsvFilter(process.env.OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS);
|
||||
|
|
@ -33,6 +35,12 @@ type LiveProviderCase = {
|
|||
};
|
||||
|
||||
const CASES: LiveProviderCase[] = [
|
||||
{
|
||||
plugin: alibabaPlugin,
|
||||
pluginId: "alibaba",
|
||||
pluginName: "Alibaba Model Studio Plugin",
|
||||
providerId: "alibaba",
|
||||
},
|
||||
{
|
||||
plugin: byteplusPlugin,
|
||||
pluginId: "byteplus",
|
||||
|
|
@ -55,6 +63,7 @@ const CASES: LiveProviderCase[] = [
|
|||
pluginName: "Together Provider",
|
||||
providerId: "together",
|
||||
},
|
||||
{ plugin: xaiPlugin, pluginId: "xai", pluginName: "xAI Plugin", providerId: "xai" },
|
||||
]
|
||||
.filter((entry) => (providerFilter ? providerFilter.has(entry.providerId) : true))
|
||||
.toSorted((left, right) => left.providerId.localeCompare(right.providerId));
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import { isModernXaiModel, resolveXaiForwardCompatModel } from "./provider-model
|
|||
import { resolveFallbackXaiAuth } from "./src/tool-auth-shared.js";
|
||||
import { resolveEffectiveXSearchConfig } from "./src/x-search-config.js";
|
||||
import { wrapXaiProviderStream } from "./stream.js";
|
||||
import { buildXaiVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
import { createXaiWebSearchProvider } from "./web-search.js";
|
||||
|
||||
const PROVIDER_ID = "xai";
|
||||
|
|
@ -230,6 +231,7 @@ export default defineSingleProviderPluginEntry({
|
|||
},
|
||||
register(api) {
|
||||
api.registerWebSearchProvider(createXaiWebSearchProvider());
|
||||
api.registerVideoGenerationProvider(buildXaiVideoGenerationProvider());
|
||||
api.registerTool((ctx) => createLazyCodeExecutionTool(ctx), { name: "code_execution" });
|
||||
api.registerTool((ctx) => createLazyXSearchTool(ctx), { name: "x_search" });
|
||||
},
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@
|
|||
},
|
||||
"contracts": {
|
||||
"webSearchProviders": ["grok"],
|
||||
"videoGenerationProviders": ["xai"],
|
||||
"tools": ["code_execution", "x_search"]
|
||||
},
|
||||
"configSchema": {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,10 @@
|
|||
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
|
||||
|
||||
describePluginRegistrationContract({
|
||||
pluginId: "xai",
|
||||
providerIds: ["xai"],
|
||||
webSearchProviderIds: ["grok"],
|
||||
videoGenerationProviderIds: ["xai"],
|
||||
toolNames: ["code_execution", "x_search"],
|
||||
requireGenerateVideo: true,
|
||||
});
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildXaiVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const {
|
||||
resolveApiKeyForProviderMock,
|
||||
postJsonRequestMock,
|
||||
fetchWithTimeoutMock,
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "xai-key" })),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
fetchWithTimeoutMock: vi.fn(),
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl,
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
fetchWithTimeout: fetchWithTimeoutMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("xai video generation provider", () => {
|
||||
afterEach(() => {
|
||||
resolveApiKeyForProviderMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
fetchWithTimeoutMock.mockReset();
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
});
|
||||
|
||||
it("creates, polls, and downloads a generated video", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
request_id: "req_123",
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
request_id: "req_123",
|
||||
status: "done",
|
||||
video: { url: "https://cdn.x.ai/video.mp4" },
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("mp4-bytes"),
|
||||
});
|
||||
|
||||
const provider = buildXaiVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "xai",
|
||||
model: "grok-imagine-video",
|
||||
prompt: "A tiny robot crab crossing a moonlit tide pool",
|
||||
cfg: {},
|
||||
durationSeconds: 6,
|
||||
aspectRatio: "16:9",
|
||||
resolution: "720P",
|
||||
});
|
||||
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.x.ai/v1/videos/generations",
|
||||
body: expect.objectContaining({
|
||||
model: "grok-imagine-video",
|
||||
prompt: "A tiny robot crab crossing a moonlit tide pool",
|
||||
duration: 6,
|
||||
aspect_ratio: "16:9",
|
||||
resolution: "720p",
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"https://api.x.ai/v1/videos/req_123",
|
||||
expect.objectContaining({ method: "GET" }),
|
||||
120000,
|
||||
fetch,
|
||||
);
|
||||
expect(result.videos[0]?.mimeType).toBe("video/mp4");
|
||||
expect(result.metadata).toEqual(
|
||||
expect.objectContaining({
|
||||
requestId: "req_123",
|
||||
mode: "generate",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("routes video inputs to the extension endpoint when duration is set", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
request_id: "req_extend",
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
request_id: "req_extend",
|
||||
status: "done",
|
||||
video: { url: "https://cdn.x.ai/extended.mp4" },
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("extended-bytes"),
|
||||
});
|
||||
|
||||
const provider = buildXaiVideoGenerationProvider();
|
||||
await provider.generateVideo({
|
||||
provider: "xai",
|
||||
model: "grok-imagine-video",
|
||||
prompt: "Continue the shot into a neon alleyway",
|
||||
cfg: {},
|
||||
durationSeconds: 8,
|
||||
inputVideos: [{ url: "https://example.com/input.mp4" }],
|
||||
});
|
||||
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.x.ai/v1/videos/extensions",
|
||||
body: expect.objectContaining({
|
||||
video: { url: "https://example.com/input.mp4" },
|
||||
duration: 8,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,338 @@
|
|||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
fetchWithTimeout,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
VideoGenerationSourceAsset,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
|
||||
const DEFAULT_XAI_VIDEO_BASE_URL = "https://api.x.ai/v1";
|
||||
const DEFAULT_XAI_VIDEO_MODEL = "grok-imagine-video";
|
||||
const DEFAULT_TIMEOUT_MS = 120_000;
|
||||
const POLL_INTERVAL_MS = 5_000;
|
||||
const MAX_POLL_ATTEMPTS = 120;
|
||||
const XAI_VIDEO_ASPECT_RATIOS = new Set(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"]);
|
||||
|
||||
type XaiVideoCreateResponse = {
|
||||
request_id?: string;
|
||||
error?: {
|
||||
code?: string;
|
||||
message?: string;
|
||||
} | null;
|
||||
};
|
||||
|
||||
type XaiVideoStatusResponse = {
|
||||
request_id?: string;
|
||||
status?: "queued" | "processing" | "done" | "failed" | "expired";
|
||||
video?: {
|
||||
url?: string;
|
||||
} | null;
|
||||
error?: {
|
||||
code?: string;
|
||||
message?: string;
|
||||
} | null;
|
||||
};
|
||||
|
||||
function resolveXaiVideoBaseUrl(req: VideoGenerationRequest): string {
|
||||
return req.cfg?.models?.providers?.xai?.baseUrl?.trim() || DEFAULT_XAI_VIDEO_BASE_URL;
|
||||
}
|
||||
|
||||
function toDataUrl(buffer: Buffer, mimeType: string): string {
|
||||
return `data:${mimeType};base64,${buffer.toString("base64")}`;
|
||||
}
|
||||
|
||||
function resolveImageUrl(input: VideoGenerationSourceAsset | undefined): string | undefined {
|
||||
if (!input) {
|
||||
return undefined;
|
||||
}
|
||||
if (input.url?.trim()) {
|
||||
return input.url.trim();
|
||||
}
|
||||
if (!input.buffer) {
|
||||
throw new Error("xAI image-to-video input is missing image data.");
|
||||
}
|
||||
return toDataUrl(input.buffer, input.mimeType?.trim() || "image/png");
|
||||
}
|
||||
|
||||
function resolveInputVideoUrl(input: VideoGenerationSourceAsset | undefined): string | undefined {
|
||||
if (!input) {
|
||||
return undefined;
|
||||
}
|
||||
const url = input.url?.trim();
|
||||
if (url) {
|
||||
return url;
|
||||
}
|
||||
if (input.buffer) {
|
||||
throw new Error("xAI video editing currently requires a remote mp4 URL input.");
|
||||
}
|
||||
throw new Error("xAI video editing input is missing video data.");
|
||||
}
|
||||
|
||||
function resolveDurationSeconds(params: {
|
||||
durationSeconds?: number;
|
||||
min?: number;
|
||||
max?: number;
|
||||
}): number | undefined {
|
||||
if (typeof params.durationSeconds !== "number" || !Number.isFinite(params.durationSeconds)) {
|
||||
return undefined;
|
||||
}
|
||||
const rounded = Math.round(params.durationSeconds);
|
||||
return Math.max(params.min ?? 1, Math.min(params.max ?? 15, rounded));
|
||||
}
|
||||
|
||||
function resolveAspectRatio(value: string | undefined): string | undefined {
|
||||
const trimmed = value?.trim();
|
||||
if (!trimmed || !XAI_VIDEO_ASPECT_RATIOS.has(trimmed)) {
|
||||
return undefined;
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
function resolveResolution(value: string | undefined): "480p" | "720p" | undefined {
|
||||
if (value === "480P") {
|
||||
return "480p";
|
||||
}
|
||||
if (value === "720P" || value === "1080P") {
|
||||
return "720p";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveXaiVideoMode(req: VideoGenerationRequest): "generate" | "edit" | "extend" {
|
||||
const hasVideoInput = (req.inputVideos?.length ?? 0) > 0;
|
||||
if (!hasVideoInput) {
|
||||
return "generate";
|
||||
}
|
||||
return typeof resolveDurationSeconds({
|
||||
durationSeconds: req.durationSeconds,
|
||||
min: 2,
|
||||
max: 10,
|
||||
}) === "number"
|
||||
? "extend"
|
||||
: "edit";
|
||||
}
|
||||
|
||||
function buildCreateBody(req: VideoGenerationRequest): Record<string, unknown> {
|
||||
if ((req.inputImages?.length ?? 0) > 1) {
|
||||
throw new Error("xAI video generation supports at most one reference image.");
|
||||
}
|
||||
if ((req.inputVideos?.length ?? 0) > 1) {
|
||||
throw new Error("xAI video generation supports at most one input video.");
|
||||
}
|
||||
if ((req.inputImages?.length ?? 0) > 0 && (req.inputVideos?.length ?? 0) > 0) {
|
||||
throw new Error("xAI video generation does not support image and video inputs together.");
|
||||
}
|
||||
|
||||
const mode = resolveXaiVideoMode(req);
|
||||
const body: Record<string, unknown> = {
|
||||
model: req.model?.trim() || DEFAULT_XAI_VIDEO_MODEL,
|
||||
prompt: req.prompt,
|
||||
};
|
||||
|
||||
if (mode === "generate") {
|
||||
const imageUrl = resolveImageUrl(req.inputImages?.[0]);
|
||||
if (imageUrl) {
|
||||
body.image = { url: imageUrl };
|
||||
}
|
||||
const duration = resolveDurationSeconds({
|
||||
durationSeconds: req.durationSeconds,
|
||||
min: 1,
|
||||
max: 15,
|
||||
});
|
||||
if (typeof duration === "number") {
|
||||
body.duration = duration;
|
||||
}
|
||||
const aspectRatio = resolveAspectRatio(req.aspectRatio);
|
||||
if (aspectRatio) {
|
||||
body.aspect_ratio = aspectRatio;
|
||||
}
|
||||
const resolution = resolveResolution(req.resolution);
|
||||
if (resolution) {
|
||||
body.resolution = resolution;
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
body.video = { url: resolveInputVideoUrl(req.inputVideos?.[0]) };
|
||||
if (mode === "extend") {
|
||||
const duration = resolveDurationSeconds({
|
||||
durationSeconds: req.durationSeconds,
|
||||
min: 2,
|
||||
max: 10,
|
||||
});
|
||||
if (typeof duration === "number") {
|
||||
body.duration = duration;
|
||||
}
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
function resolveCreateEndpoint(req: VideoGenerationRequest): string {
|
||||
switch (resolveXaiVideoMode(req)) {
|
||||
case "edit":
|
||||
return "/videos/edits";
|
||||
case "extend":
|
||||
return "/videos/extensions";
|
||||
case "generate":
|
||||
default:
|
||||
return "/videos/generations";
|
||||
}
|
||||
}
|
||||
|
||||
async function pollXaiVideo(params: {
|
||||
requestId: string;
|
||||
headers: Headers;
|
||||
timeoutMs?: number;
|
||||
baseUrl: string;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<XaiVideoStatusResponse> {
|
||||
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
|
||||
const response = await fetchWithTimeout(
|
||||
`${params.baseUrl}/videos/${params.requestId}`,
|
||||
{
|
||||
method: "GET",
|
||||
headers: params.headers,
|
||||
},
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "xAI video status request failed");
|
||||
const payload = (await response.json()) as XaiVideoStatusResponse;
|
||||
switch (payload.status) {
|
||||
case "done":
|
||||
return payload;
|
||||
case "failed":
|
||||
case "expired":
|
||||
throw new Error(payload.error?.message?.trim() || `xAI video generation ${payload.status}`);
|
||||
case "queued":
|
||||
case "processing":
|
||||
default:
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
||||
break;
|
||||
}
|
||||
}
|
||||
throw new Error(`xAI video generation task ${params.requestId} did not finish in time`);
|
||||
}
|
||||
|
||||
async function downloadXaiVideo(params: {
|
||||
url: string;
|
||||
timeoutMs?: number;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<GeneratedVideoAsset> {
|
||||
const response = await fetchWithTimeout(
|
||||
params.url,
|
||||
{ method: "GET" },
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "xAI generated video download failed");
|
||||
const mimeType = response.headers.get("content-type")?.trim() || "video/mp4";
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return {
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
mimeType,
|
||||
fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,
|
||||
};
|
||||
}
|
||||
|
||||
export function buildXaiVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "xai",
|
||||
label: "xAI",
|
||||
defaultModel: DEFAULT_XAI_VIDEO_MODEL,
|
||||
models: [DEFAULT_XAI_VIDEO_MODEL],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "xai",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
maxInputVideos: 1,
|
||||
maxDurationSeconds: 15,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
},
|
||||
async generateVideo(req) {
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "xai",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("xAI API key missing");
|
||||
}
|
||||
|
||||
const fetchFn = fetch;
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: resolveXaiVideoBaseUrl(req),
|
||||
defaultBaseUrl: DEFAULT_XAI_VIDEO_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
provider: "xai",
|
||||
capability: "video",
|
||||
transport: "http",
|
||||
});
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${baseUrl}${resolveCreateEndpoint(req)}`,
|
||||
headers,
|
||||
body: buildCreateBody(req),
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "xAI video generation failed");
|
||||
const submitted = (await response.json()) as XaiVideoCreateResponse;
|
||||
const requestId = submitted.request_id?.trim();
|
||||
if (!requestId) {
|
||||
throw new Error(
|
||||
submitted.error?.message?.trim() || "xAI video generation response missing request_id",
|
||||
);
|
||||
}
|
||||
const completed = await pollXaiVideo({
|
||||
requestId,
|
||||
headers,
|
||||
timeoutMs: req.timeoutMs,
|
||||
baseUrl,
|
||||
fetchFn,
|
||||
});
|
||||
const videoUrl = completed.video?.url?.trim();
|
||||
if (!videoUrl) {
|
||||
throw new Error("xAI video generation completed without an output URL");
|
||||
}
|
||||
const video = await downloadXaiVideo({
|
||||
url: videoUrl,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
});
|
||||
return {
|
||||
videos: [video],
|
||||
model: req.model?.trim() || DEFAULT_XAI_VIDEO_MODEL,
|
||||
metadata: {
|
||||
requestId,
|
||||
status: completed.status,
|
||||
videoUrl,
|
||||
mode: resolveXaiVideoMode(req),
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { collectProviderApiKeys } from "./live-auth-keys.js";
|
||||
|
||||
const ORIGINAL_MODELSTUDIO_API_KEY = process.env.MODELSTUDIO_API_KEY;
|
||||
const ORIGINAL_XAI_API_KEY = process.env.XAI_API_KEY;
|
||||
|
||||
describe("collectProviderApiKeys", () => {
|
||||
afterEach(() => {
|
||||
if (ORIGINAL_MODELSTUDIO_API_KEY === undefined) {
|
||||
delete process.env.MODELSTUDIO_API_KEY;
|
||||
} else {
|
||||
process.env.MODELSTUDIO_API_KEY = ORIGINAL_MODELSTUDIO_API_KEY;
|
||||
}
|
||||
if (ORIGINAL_XAI_API_KEY === undefined) {
|
||||
delete process.env.XAI_API_KEY;
|
||||
} else {
|
||||
process.env.XAI_API_KEY = ORIGINAL_XAI_API_KEY;
|
||||
}
|
||||
});
|
||||
|
||||
it("honors manifest-declared provider auth env vars for nonstandard provider ids", () => {
|
||||
process.env.MODELSTUDIO_API_KEY = "modelstudio-live-key";
|
||||
|
||||
expect(collectProviderApiKeys("alibaba")).toContain("modelstudio-live-key");
|
||||
});
|
||||
|
||||
it("dedupes manifest env vars against direct provider env naming", () => {
|
||||
process.env.XAI_API_KEY = "xai-live-key";
|
||||
|
||||
expect(collectProviderApiKeys("xai")).toEqual(["xai-live-key"]);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
import { getProviderEnvVars } from "../secrets/provider-env-vars.js";
|
||||
import { normalizeProviderId } from "./model-selection.js";
|
||||
|
||||
const KEY_SPLIT_RE = /[\s,;]+/g;
|
||||
|
|
@ -98,7 +99,8 @@ function resolveProviderApiKeyConfig(provider: string): ProviderApiKeyConfig {
|
|||
}
|
||||
|
||||
export function collectProviderApiKeys(provider: string): string[] {
|
||||
const config = resolveProviderApiKeyConfig(provider);
|
||||
const normalizedProvider = normalizeProviderId(provider);
|
||||
const config = resolveProviderApiKeyConfig(normalizedProvider);
|
||||
|
||||
const forcedSingle = config.liveSingle ? process.env[config.liveSingle]?.trim() : undefined;
|
||||
if (forcedSingle) {
|
||||
|
|
@ -112,6 +114,9 @@ export function collectProviderApiKeys(provider: string): string[] {
|
|||
const fallback = config.fallbackVars
|
||||
.map((envVar) => process.env[envVar]?.trim())
|
||||
.filter(Boolean) as string[];
|
||||
const manifestFallback = getProviderEnvVars(normalizedProvider)
|
||||
.map((envVar) => process.env[envVar]?.trim())
|
||||
.filter(Boolean) as string[];
|
||||
|
||||
const seen = new Set<string>();
|
||||
|
||||
|
|
@ -135,6 +140,9 @@ export function collectProviderApiKeys(provider: string): string[] {
|
|||
for (const value of fallback) {
|
||||
add(value);
|
||||
}
|
||||
for (const value of manifestFallback) {
|
||||
add(value);
|
||||
}
|
||||
|
||||
return Array.from(seen);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import type { AuthProfileStore } from "../agents/auth-profiles.js";
|
|||
import type { OpenClawConfig } from "../config/config.js";
|
||||
|
||||
export const DEFAULT_LIVE_VIDEO_MODELS: Record<string, string> = {
|
||||
alibaba: "alibaba/wan2.6-t2v",
|
||||
byteplus: "byteplus/seedance-1-0-lite-t2v-250428",
|
||||
fal: "fal/fal-ai/minimax/video-01-live",
|
||||
google: "google/veo-3.1-fast-generate-preview",
|
||||
|
|
@ -9,6 +10,7 @@ export const DEFAULT_LIVE_VIDEO_MODELS: Record<string, string> = {
|
|||
openai: "openai/sora-2",
|
||||
qwen: "qwen/wan2.6-t2v",
|
||||
together: "together/Wan-AI/Wan2.2-T2V-A14B",
|
||||
xai: "xai/grok-imagine-video",
|
||||
};
|
||||
|
||||
export function redactLiveApiKey(value: string | undefined): string {
|
||||
|
|
|
|||
Loading…
Reference in New Issue