diff --git a/CHANGELOG.md b/CHANGELOG.md index 947f80e3d33..249c8dbec27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,10 @@ Docs: https://docs.openclaw.ai - Agents/Claude CLI: expose OpenClaw tools to background Claude CLI runs through a loopback MCP bridge and switch bundled runs to stdin + `stream-json` partial-message streaming so prompts stop riding argv, long replies show live progress, and final session/usage metadata still land cleanly. (#35676) Thanks @mylukin. - ACPX/runtime: embed the ACP runtime directly in the bundled `acpx` plugin, remove the extra external ACP CLI hop, harden live ACP session binding and reuse, and add a generic `reply_dispatch` hook so bundled plugins like ACPX can own reply interception without hardcoded ACP paths in core auto-reply routing. (#61319) - Config/schema: enrich the exported `openclaw config schema` JSON Schema with field titles and descriptions so editors, agents, and other schema consumers receive the same config help metadata. (#60067) Thanks @solavrc. +- Agents/cache: diagnostics: add prompt-cache break diagnostics, trace live cache scenarios through embedded runner paths, and show cache reuse explicitly in `openclaw status --verbose`. Thanks @vincentkoc. +- Agents/cache: stabilize cache-relevant system prompt fingerprints by normalizing equivalent structured prompt whitespace, line endings, hook-added system context, and runtime capability ordering so semantically unchanged prompts reuse KV/cache more reliably. Thanks @vincentkoc. +- Agents/tool prompts: remove the duplicate in-band tool inventory from agent system prompts so tool-calling models rely on the structured tool definitions as the single source of truth, improving prompt stability and reducing stale tool guidance. +- Tools/video generation: add bundled xAI (`grok-imagine-video`) and Alibaba Model Studio Wan video providers, plus live-test/default model wiring for both. - Providers/CLI: remove bundled CLI text-provider backends and the `agents.defaults.cliBackends` surface, while keeping ACP harness sessions and Gemini media understanding on the native bundled providers. - Matrix/exec approvals: clarify unavailable-approval replies so Matrix no longer claims chat approvals are unsupported when native exec approvals are merely unconfigured. (#61424) Thanks @gumadeiras. - Docs/IRC: replace public IRC hostname examples with `irc.example.com` and recommend private servers for bot coordination while listing common public networks for intentional use. @@ -101,6 +105,7 @@ Docs: https://docs.openclaw.ai - Agents/errors: surface an explicit disk-full message when local session or transcript writes fail with `ENOSPC`/`disk full`, so those runs stop degrading into opaque `NO_REPLY`-style failures. Thanks @vincentkoc. - Exec approvals: remove heuristic command-obfuscation gating from host exec so gateway and node runs rely on explicit policy, allowlist, and strict inline-eval rules only. - Agents/tool results: cap live tool-result persistence and overflow-recovery truncation at 40k characters so oversized tool output stays bounded without discarding recent context entirely. +- Discord/video replies: split text-plus-video deliveries into a text reply followed by a media-only send, and let live provider auth checks honor manifest-declared API key env vars like `MODELSTUDIO_API_KEY`. - Config/All Settings: keep the raw config view intact when sensitive fields are blank instead of corrupting or dropping the rendered snapshot. (#28214) Thanks @solodmd. - Plugin SDK/facades: back-fill bundled plugin facade sentinels before plugin-id tracking re-enters config loading, so CLI/provider startup no longer crashes with `shouldNormalizeGoogleProviderConfig is not a function` or other empty-facade reads during bundled plugin re-entry. Thanks @adam91holt. - Plugins/facades: back-fill facade sentinels before tracked-plugin resolution re-enters config loading, so facade exports stay defined during circular provider normalization. (#61180) Thanks @adam91holt. diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md index 8939c18a26d..1594a3e388e 100644 --- a/docs/tools/video-generation.md +++ b/docs/tools/video-generation.md @@ -1,5 +1,5 @@ --- -summary: "Generate videos using configured providers such as OpenAI, Google, Qwen, and MiniMax" +summary: "Generate videos using configured providers such as Alibaba, OpenAI, Google, Qwen, and MiniMax" read_when: - Generating videos via the agent - Configuring video generation providers and models @@ -17,7 +17,7 @@ The tool only appears when at least one video-generation provider is available. ## Quick start -1. Set an API key for at least one provider (for example `OPENAI_API_KEY`, `GEMINI_API_KEY`, or `QWEN_API_KEY`). +1. Set an API key for at least one provider (for example `OPENAI_API_KEY`, `GEMINI_API_KEY`, `MODELSTUDIO_API_KEY`, or `QWEN_API_KEY`). 2. Optionally set your preferred model: ```json5 @@ -38,6 +38,7 @@ The agent calls `video_generate` automatically. No tool allow-listing needed — | Provider | Default model | Reference inputs | API key | | -------- | ------------------------------- | ------------------ | ---------------------------------------------------------- | +| Alibaba | `wan2.6-t2v` | Yes, remote URLs | `MODELSTUDIO_API_KEY`, `DASHSCOPE_API_KEY`, `QWEN_API_KEY` | | BytePlus | `seedance-1-0-lite-t2v-250428` | 1 image | `BYTEPLUS_API_KEY` | | fal | `fal-ai/minimax/video-01-live` | 1 image | `FAL_KEY` | | Google | `veo-3.1-fast-generate-preview` | 1 image or 1 video | `GEMINI_API_KEY`, `GOOGLE_API_KEY` | @@ -45,6 +46,7 @@ The agent calls `video_generate` automatically. No tool allow-listing needed — | OpenAI | `sora-2` | 1 image or 1 video | `OPENAI_API_KEY` | | Qwen | `wan2.6-t2v` | Yes, remote URLs | `QWEN_API_KEY`, `MODELSTUDIO_API_KEY`, `DASHSCOPE_API_KEY` | | Together | `Wan-AI/Wan2.2-T2V-A14B` | 1 image | `TOGETHER_API_KEY` | +| xAI | `grok-imagine-video` | 1 image or 1 video | `XAI_API_KEY` | Use `action: "list"` to inspect available providers and models at runtime: @@ -105,10 +107,12 @@ If a provider fails, the next candidate is tried automatically. If all fail, the ## Provider notes -- OpenAI uses the native video endpoint and currently defaults to `sora-2`. +- Alibaba uses the DashScope / Model Studio async video endpoint and currently requires remote `http(s)` URLs for reference assets. - Google uses Gemini/Veo and supports a single image or video reference input. - MiniMax, Together, BytePlus, and fal currently support a single image reference input. +- OpenAI uses the native video endpoint and currently defaults to `sora-2`. - Qwen supports image/video references, but the upstream DashScope video endpoint currently requires remote `http(s)` URLs for those references. +- xAI uses the native xAI video API and supports text-to-video, image-to-video, and remote video edit/extend flows. ## Qwen reference inputs diff --git a/extensions/alibaba/index.ts b/extensions/alibaba/index.ts new file mode 100644 index 00000000000..d8485bdfca5 --- /dev/null +++ b/extensions/alibaba/index.ts @@ -0,0 +1,11 @@ +import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; +import { buildAlibabaVideoGenerationProvider } from "./video-generation-provider.js"; + +export default definePluginEntry({ + id: "alibaba", + name: "Alibaba Model Studio Plugin", + description: "Bundled Alibaba Model Studio video provider plugin", + register(api) { + api.registerVideoGenerationProvider(buildAlibabaVideoGenerationProvider()); + }, +}); diff --git a/extensions/alibaba/openclaw.plugin.json b/extensions/alibaba/openclaw.plugin.json new file mode 100644 index 00000000000..8287eb71256 --- /dev/null +++ b/extensions/alibaba/openclaw.plugin.json @@ -0,0 +1,30 @@ +{ + "id": "alibaba", + "enabledByDefault": true, + "providerAuthEnvVars": { + "alibaba": ["MODELSTUDIO_API_KEY", "DASHSCOPE_API_KEY", "QWEN_API_KEY"] + }, + "providerAuthChoices": [ + { + "provider": "alibaba", + "method": "api-key", + "choiceId": "alibaba-model-studio-api-key", + "choiceLabel": "Alibaba Model Studio API key", + "groupId": "alibaba", + "groupLabel": "Alibaba Model Studio", + "groupHint": "DashScope / Model Studio API key", + "optionKey": "alibabaModelStudioApiKey", + "cliFlag": "--alibaba-model-studio-api-key", + "cliOption": "--alibaba-model-studio-api-key ", + "cliDescription": "Alibaba Model Studio API key" + } + ], + "contracts": { + "videoGenerationProviders": ["alibaba"] + }, + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": {} + } +} diff --git a/extensions/alibaba/package.json b/extensions/alibaba/package.json new file mode 100644 index 00000000000..fe37e71124e --- /dev/null +++ b/extensions/alibaba/package.json @@ -0,0 +1,12 @@ +{ + "name": "@openclaw/alibaba-provider", + "version": "2026.4.5", + "private": true, + "description": "OpenClaw Alibaba Model Studio video provider plugin", + "type": "module", + "openclaw": { + "extensions": [ + "./index.ts" + ] + } +} diff --git a/extensions/alibaba/plugin-registration.contract.test.ts b/extensions/alibaba/plugin-registration.contract.test.ts new file mode 100644 index 00000000000..2f0e360981b --- /dev/null +++ b/extensions/alibaba/plugin-registration.contract.test.ts @@ -0,0 +1,7 @@ +import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js"; + +describePluginRegistrationContract({ + pluginId: "alibaba", + videoGenerationProviderIds: ["alibaba"], + requireGenerateVideo: true, +}); diff --git a/extensions/alibaba/video-generation-provider.test.ts b/extensions/alibaba/video-generation-provider.test.ts new file mode 100644 index 00000000000..56d1bc36002 --- /dev/null +++ b/extensions/alibaba/video-generation-provider.test.ts @@ -0,0 +1,133 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { buildAlibabaVideoGenerationProvider } from "./video-generation-provider.js"; + +const { + resolveApiKeyForProviderMock, + postJsonRequestMock, + fetchWithTimeoutMock, + assertOkOrThrowHttpErrorMock, + resolveProviderHttpRequestConfigMock, +} = vi.hoisted(() => ({ + resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "alibaba-key" })), + postJsonRequestMock: vi.fn(), + fetchWithTimeoutMock: vi.fn(), + assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), + resolveProviderHttpRequestConfigMock: vi.fn((params) => ({ + baseUrl: params.baseUrl ?? params.defaultBaseUrl, + allowPrivateNetwork: false, + headers: new Headers(params.defaultHeaders), + dispatcherPolicy: undefined, + })), +})); + +vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({ + resolveApiKeyForProvider: resolveApiKeyForProviderMock, +})); + +vi.mock("openclaw/plugin-sdk/provider-http", () => ({ + assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, + fetchWithTimeout: fetchWithTimeoutMock, + postJsonRequest: postJsonRequestMock, + resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, +})); + +describe("alibaba video generation provider", () => { + afterEach(() => { + resolveApiKeyForProviderMock.mockClear(); + postJsonRequestMock.mockReset(); + fetchWithTimeoutMock.mockReset(); + assertOkOrThrowHttpErrorMock.mockClear(); + resolveProviderHttpRequestConfigMock.mockClear(); + }); + + it("submits async Wan generation, polls task status, and downloads the resulting video", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + request_id: "req-1", + output: { + task_id: "task-1", + }, + }), + }, + release: vi.fn(async () => {}), + }); + fetchWithTimeoutMock + .mockResolvedValueOnce({ + json: async () => ({ + output: { + task_status: "SUCCEEDED", + results: [{ video_url: "https://example.com/out.mp4" }], + }, + }), + headers: new Headers(), + }) + .mockResolvedValueOnce({ + arrayBuffer: async () => Buffer.from("mp4-bytes"), + headers: new Headers({ "content-type": "video/mp4" }), + }); + + const provider = buildAlibabaVideoGenerationProvider(); + const result = await provider.generateVideo({ + provider: "alibaba", + model: "wan2.6-r2v-flash", + prompt: "animate this shot", + cfg: {}, + inputImages: [{ url: "https://example.com/ref.png" }], + durationSeconds: 6, + audio: true, + watermark: false, + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis", + body: expect.objectContaining({ + model: "wan2.6-r2v-flash", + input: expect.objectContaining({ + prompt: "animate this shot", + img_url: "https://example.com/ref.png", + }), + parameters: expect.objectContaining({ + duration: 6, + enable_audio: true, + watermark: false, + }), + }), + }), + ); + expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith( + 1, + "https://dashscope-intl.aliyuncs.com/api/v1/tasks/task-1", + expect.objectContaining({ method: "GET" }), + 120000, + fetch, + ); + expect(result.videos).toHaveLength(1); + expect(result.videos[0]?.mimeType).toBe("video/mp4"); + expect(result.metadata).toEqual( + expect.objectContaining({ + requestId: "req-1", + taskId: "task-1", + taskStatus: "SUCCEEDED", + }), + ); + }); + + it("fails fast when reference inputs are local buffers instead of remote URLs", async () => { + const provider = buildAlibabaVideoGenerationProvider(); + + await expect( + provider.generateVideo({ + provider: "alibaba", + model: "wan2.6-i2v", + prompt: "animate this local frame", + cfg: {}, + inputImages: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }], + }), + ).rejects.toThrow( + "Alibaba Wan video generation currently requires remote http(s) URLs for reference images/videos.", + ); + expect(postJsonRequestMock).not.toHaveBeenCalled(); + }); +}); diff --git a/extensions/alibaba/video-generation-provider.ts b/extensions/alibaba/video-generation-provider.ts new file mode 100644 index 00000000000..9f3cff78681 --- /dev/null +++ b/extensions/alibaba/video-generation-provider.ts @@ -0,0 +1,293 @@ +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; +import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; +import { + assertOkOrThrowHttpError, + fetchWithTimeout, + postJsonRequest, + resolveProviderHttpRequestConfig, +} from "openclaw/plugin-sdk/provider-http"; +import type { + GeneratedVideoAsset, + VideoGenerationProvider, + VideoGenerationRequest, + VideoGenerationResult, + VideoGenerationSourceAsset, +} from "openclaw/plugin-sdk/video-generation"; + +const DEFAULT_ALIBABA_VIDEO_BASE_URL = "https://dashscope-intl.aliyuncs.com"; +const DEFAULT_ALIBABA_VIDEO_MODEL = "wan2.6-t2v"; +const DEFAULT_DURATION_SECONDS = 5; +const DEFAULT_TIMEOUT_MS = 120_000; +const POLL_INTERVAL_MS = 2_500; +const MAX_POLL_ATTEMPTS = 120; +const RESOLUTION_TO_SIZE: Record = { + "480P": "832*480", + "720P": "1280*720", + "1080P": "1920*1080", +}; + +type AlibabaVideoGenerationResponse = { + output?: { + task_id?: string; + task_status?: string; + submit_time?: string; + results?: Array<{ + video_url?: string; + orig_prompt?: string; + actual_prompt?: string; + }>; + video_url?: string; + code?: string; + message?: string; + }; + request_id?: string; + code?: string; + message?: string; +}; + +function resolveAlibabaVideoBaseUrl(req: VideoGenerationRequest): string { + return req.cfg?.models?.providers?.alibaba?.baseUrl?.trim() || DEFAULT_ALIBABA_VIDEO_BASE_URL; +} + +function resolveDashscopeAigcApiBaseUrl(baseUrl: string): string { + return baseUrl.replace(/\/+$/u, ""); +} + +function resolveReferenceUrls( + inputImages: VideoGenerationSourceAsset[] | undefined, + inputVideos: VideoGenerationSourceAsset[] | undefined, +): string[] { + return [...(inputImages ?? []), ...(inputVideos ?? [])] + .map((asset) => asset.url?.trim()) + .filter((value): value is string => Boolean(value)); +} + +function assertAlibabaReferenceInputsSupported( + inputImages: VideoGenerationSourceAsset[] | undefined, + inputVideos: VideoGenerationSourceAsset[] | undefined, +): void { + const unsupported = [...(inputImages ?? []), ...(inputVideos ?? [])].some( + (asset) => !asset.url?.trim() && asset.buffer, + ); + if (unsupported) { + throw new Error( + "Alibaba Wan video generation currently requires remote http(s) URLs for reference images/videos.", + ); + } +} + +function buildAlibabaVideoGenerationInput(req: VideoGenerationRequest): Record { + assertAlibabaReferenceInputsSupported(req.inputImages, req.inputVideos); + const input: Record = { + prompt: req.prompt, + }; + const referenceUrls = resolveReferenceUrls(req.inputImages, req.inputVideos); + if ( + referenceUrls.length === 1 && + (req.inputImages?.length ?? 0) === 1 && + !req.inputVideos?.length + ) { + input.img_url = referenceUrls[0]; + } else if (referenceUrls.length > 0) { + input.reference_urls = referenceUrls; + } + return input; +} + +function buildAlibabaVideoGenerationParameters( + req: VideoGenerationRequest, +): Record | undefined { + const parameters: Record = {}; + const size = + req.size?.trim() || (req.resolution ? RESOLUTION_TO_SIZE[req.resolution] : undefined); + if (size) { + parameters.size = size; + } + if (req.aspectRatio?.trim()) { + parameters.aspect_ratio = req.aspectRatio.trim(); + } + if (typeof req.durationSeconds === "number" && Number.isFinite(req.durationSeconds)) { + parameters.duration = Math.max(1, Math.round(req.durationSeconds)); + } + if (typeof req.audio === "boolean") { + parameters.enable_audio = req.audio; + } + if (typeof req.watermark === "boolean") { + parameters.watermark = req.watermark; + } + return Object.keys(parameters).length > 0 ? parameters : undefined; +} + +function extractVideoUrls(payload: AlibabaVideoGenerationResponse): string[] { + const urls = [ + ...(payload.output?.results?.map((entry) => entry.video_url).filter(Boolean) ?? []), + payload.output?.video_url, + ].filter((value): value is string => typeof value === "string" && value.trim().length > 0); + return [...new Set(urls)]; +} + +async function pollTaskUntilComplete(params: { + taskId: string; + headers: Headers; + timeoutMs?: number; + fetchFn: typeof fetch; + baseUrl: string; +}): Promise { + for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) { + const response = await fetchWithTimeout( + `${params.baseUrl}/api/v1/tasks/${params.taskId}`, + { + method: "GET", + headers: params.headers, + }, + params.timeoutMs ?? DEFAULT_TIMEOUT_MS, + params.fetchFn, + ); + await assertOkOrThrowHttpError(response, "Alibaba Wan video-generation task poll failed"); + const payload = (await response.json()) as AlibabaVideoGenerationResponse; + const status = payload.output?.task_status?.trim().toUpperCase(); + if (status === "SUCCEEDED") { + return payload; + } + if (status === "FAILED" || status === "CANCELED") { + throw new Error( + payload.output?.message?.trim() || + payload.message?.trim() || + `Alibaba Wan video generation task ${params.taskId} ${status?.toLowerCase()}`, + ); + } + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)); + } + throw new Error(`Alibaba Wan video generation task ${params.taskId} did not finish in time`); +} + +async function downloadGeneratedVideos(params: { + urls: string[]; + timeoutMs?: number; + fetchFn: typeof fetch; +}): Promise { + const videos: GeneratedVideoAsset[] = []; + for (const [index, url] of params.urls.entries()) { + const response = await fetchWithTimeout( + url, + { method: "GET" }, + params.timeoutMs ?? DEFAULT_TIMEOUT_MS, + params.fetchFn, + ); + await assertOkOrThrowHttpError(response, "Alibaba Wan generated video download failed"); + const arrayBuffer = await response.arrayBuffer(); + videos.push({ + buffer: Buffer.from(arrayBuffer), + mimeType: response.headers.get("content-type")?.trim() || "video/mp4", + fileName: `video-${index + 1}.mp4`, + metadata: { sourceUrl: url }, + }); + } + return videos; +} + +export function buildAlibabaVideoGenerationProvider(): VideoGenerationProvider { + return { + id: "alibaba", + label: "Alibaba Model Studio", + defaultModel: DEFAULT_ALIBABA_VIDEO_MODEL, + models: ["wan2.6-t2v", "wan2.6-i2v", "wan2.6-r2v", "wan2.6-r2v-flash", "wan2.7-r2v"], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: "alibaba", + agentDir, + }), + capabilities: { + maxVideos: 1, + maxInputImages: 1, + maxInputVideos: 4, + maxDurationSeconds: 10, + supportsSize: true, + supportsAspectRatio: true, + supportsResolution: true, + supportsAudio: true, + supportsWatermark: true, + }, + async generateVideo(req): Promise { + const fetchFn = fetch; + const auth = await resolveApiKeyForProvider({ + provider: "alibaba", + cfg: req.cfg, + agentDir: req.agentDir, + store: req.authStore, + }); + if (!auth.apiKey) { + throw new Error("Alibaba Model Studio API key missing"); + } + + const requestBaseUrl = resolveAlibabaVideoBaseUrl(req); + const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } = + resolveProviderHttpRequestConfig({ + baseUrl: requestBaseUrl, + defaultBaseUrl: DEFAULT_ALIBABA_VIDEO_BASE_URL, + defaultHeaders: { + Authorization: `Bearer ${auth.apiKey}`, + "Content-Type": "application/json", + "X-DashScope-Async": "enable", + }, + provider: "alibaba", + capability: "video", + transport: "http", + }); + + const model = req.model?.trim() || DEFAULT_ALIBABA_VIDEO_MODEL; + const { response, release } = await postJsonRequest({ + url: `${resolveDashscopeAigcApiBaseUrl(baseUrl)}/api/v1/services/aigc/video-generation/video-synthesis`, + headers, + body: { + model, + input: buildAlibabaVideoGenerationInput(req), + parameters: buildAlibabaVideoGenerationParameters({ + ...req, + durationSeconds: req.durationSeconds ?? DEFAULT_DURATION_SECONDS, + }), + }, + timeoutMs: req.timeoutMs, + fetchFn, + allowPrivateNetwork, + dispatcherPolicy, + }); + + try { + await assertOkOrThrowHttpError(response, "Alibaba Wan video generation failed"); + const submitted = (await response.json()) as AlibabaVideoGenerationResponse; + const taskId = submitted.output?.task_id?.trim(); + if (!taskId) { + throw new Error("Alibaba Wan video generation response missing task_id"); + } + const completed = await pollTaskUntilComplete({ + taskId, + headers, + timeoutMs: req.timeoutMs, + fetchFn, + baseUrl: resolveDashscopeAigcApiBaseUrl(baseUrl), + }); + const urls = extractVideoUrls(completed); + if (urls.length === 0) { + throw new Error("Alibaba Wan video generation completed without output video URLs"); + } + const videos = await downloadGeneratedVideos({ + urls, + timeoutMs: req.timeoutMs, + fetchFn, + }); + return { + videos, + model, + metadata: { + requestId: submitted.request_id, + taskId, + taskStatus: completed.output?.task_status, + }, + }; + } finally { + await release(); + } + }, + }; +} diff --git a/extensions/discord/src/channel.test.ts b/extensions/discord/src/channel.test.ts index bcea3b5d49a..5bce980c032 100644 --- a/extensions/discord/src/channel.test.ts +++ b/extensions/discord/src/channel.test.ts @@ -138,8 +138,9 @@ describe("discordPlugin outbound", () => { expect(resolveReplyToMode({ cfg, accountId: "default" })).toBe("all"); }); - it("forwards mediaLocalRoots to sendMessageDiscord", async () => { + it("forwards full media send context to sendMessageDiscord", async () => { const sendMessageDiscord = vi.fn(async () => ({ messageId: "m1" })); + const mediaReadFile = vi.fn(async () => Buffer.from("media")); const result = await discordPlugin.outbound!.sendMedia!({ cfg: {} as OpenClawConfig, @@ -147,23 +148,102 @@ describe("discordPlugin outbound", () => { text: "hi", mediaUrl: "/tmp/image.png", mediaLocalRoots: ["/tmp/agent-root"], + mediaReadFile, accountId: "work", + threadId: "thread-123", + replyToId: "reply-123", deps: { discord: sendMessageDiscord, }, }); expect(sendMessageDiscord).toHaveBeenCalledWith( - "channel:123", + "channel:thread-123", "hi", expect.objectContaining({ mediaUrl: "/tmp/image.png", mediaLocalRoots: ["/tmp/agent-root"], + mediaReadFile, + replyTo: "reply-123", }), ); expect(result).toMatchObject({ channel: "discord", messageId: "m1" }); }); + it("splits text and video into separate sends for attached outbound delivery", async () => { + const sendMessageDiscord = vi + .fn() + .mockResolvedValueOnce({ messageId: "text-1" }) + .mockResolvedValueOnce({ messageId: "video-1" }); + + const result = await discordPlugin.outbound!.sendMedia!({ + cfg: {} as OpenClawConfig, + to: "channel:123", + text: "done - tiny cyber-lobster clip incoming", + mediaUrl: "/tmp/molty.mp4", + accountId: "work", + replyToId: "reply-123", + threadId: "thread-123", + deps: { + discord: sendMessageDiscord, + }, + }); + + expect(sendMessageDiscord).toHaveBeenCalledTimes(2); + expect(sendMessageDiscord).toHaveBeenNthCalledWith( + 1, + "channel:thread-123", + "done - tiny cyber-lobster clip incoming", + expect.objectContaining({ + replyTo: "reply-123", + }), + ); + expect(sendMessageDiscord).toHaveBeenNthCalledWith( + 2, + "channel:thread-123", + "", + expect.objectContaining({ + mediaUrl: "/tmp/molty.mp4", + }), + ); + expect(result).toMatchObject({ channel: "discord", messageId: "video-1" }); + }); + + it("threads poll sends through the thread target", async () => { + const sendPollDiscord = vi.fn(async () => ({ + channelId: "channel:thread-123", + messageId: "poll-1", + })); + const sendModule = await import("./send.js"); + const sendPollSpy = vi.spyOn(sendModule, "sendPollDiscord").mockImplementation(sendPollDiscord); + try { + const result = await discordPlugin.outbound!.sendPoll!({ + cfg: {} as OpenClawConfig, + to: "channel:123", + poll: { + question: "Best shell?", + options: ["molty", "molter"], + }, + accountId: "work", + threadId: "thread-123", + }); + + expect(sendPollDiscord).toHaveBeenCalledWith( + "channel:thread-123", + { + question: "Best shell?", + options: ["molty", "molter"], + }, + expect.objectContaining({ + accountId: "work", + }), + ); + expect(result).toMatchObject({ channel: "discord", messageId: "poll-1" }); + } finally { + sendPollSpy.mockRestore(); + } + }); + it("uses direct Discord probe helpers for status probes", async () => { const runtimeProbeDiscord = vi.fn(async () => { throw new Error("runtime Discord probe should not be used"); diff --git a/extensions/discord/src/channel.ts b/extensions/discord/src/channel.ts index 349e9384432..758440f202f 100644 --- a/extensions/discord/src/channel.ts +++ b/extensions/discord/src/channel.ts @@ -134,6 +134,43 @@ const meta = { }; const REQUIRED_DISCORD_PERMISSIONS = ["ViewChannel", "SendMessages"] as const; const DISCORD_ACCOUNT_STARTUP_STAGGER_MS = 10_000; +const DISCORD_VIDEO_MEDIA_EXTENSIONS = new Set([".avi", ".m4v", ".mkv", ".mov", ".mp4", ".webm"]); + +function normalizeMediaPathForExtension(mediaUrl: string): string { + const trimmed = mediaUrl.trim(); + if (!trimmed) { + return ""; + } + try { + const parsed = new URL(trimmed); + return parsed.pathname.toLowerCase(); + } catch { + const withoutHash = trimmed.split("#", 1)[0] ?? trimmed; + const withoutQuery = withoutHash.split("?", 1)[0] ?? withoutHash; + return withoutQuery.toLowerCase(); + } +} + +function isLikelyDiscordVideoMedia(mediaUrl: string): boolean { + const normalized = normalizeMediaPathForExtension(mediaUrl); + for (const ext of DISCORD_VIDEO_MEDIA_EXTENSIONS) { + if (normalized.endsWith(ext)) { + return true; + } + } + return false; +} + +function resolveDiscordAttachedOutboundTarget(params: { + to: string; + threadId?: string | number | null; +}): string { + if (params.threadId == null) { + return params.to; + } + const threadId = String(params.threadId).trim(); + return threadId ? `channel:${threadId}` : params.to; +} function resolveRuntimeDiscordMessageActions() { try { @@ -823,9 +860,9 @@ export const discordPlugin: ChannelPlugin }, attachedResults: { channel: "discord", - sendText: async ({ cfg, to, text, accountId, deps, replyToId, silent }) => { + sendText: async ({ cfg, to, text, accountId, deps, replyToId, threadId, silent }) => { const send = await resolveDiscordSend(deps); - return await send(to, text, { + return await send(resolveDiscordAttachedOutboundTarget({ to, threadId }), text, { verbose: false, cfg, replyTo: replyToId ?? undefined, @@ -839,26 +876,48 @@ export const discordPlugin: ChannelPlugin text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, replyToId, + threadId, silent, }) => { const send = await resolveDiscordSend(deps); - return await send(to, text, { + const target = resolveDiscordAttachedOutboundTarget({ to, threadId }); + if (text.trim() && mediaUrl && isLikelyDiscordVideoMedia(mediaUrl)) { + await send(target, text, { + verbose: false, + cfg, + replyTo: replyToId ?? undefined, + accountId: accountId ?? undefined, + silent: silent ?? undefined, + }); + return await send(target, "", { + verbose: false, + cfg, + mediaUrl, + mediaLocalRoots, + mediaReadFile, + accountId: accountId ?? undefined, + silent: silent ?? undefined, + }); + } + return await send(target, text, { verbose: false, cfg, mediaUrl, mediaLocalRoots, + mediaReadFile, replyTo: replyToId ?? undefined, accountId: accountId ?? undefined, silent: silent ?? undefined, }); }, - sendPoll: async ({ cfg, to, poll, accountId, silent }) => + sendPoll: async ({ cfg, to, poll, accountId, threadId, silent }) => await ( await loadDiscordSendModule() - ).sendPollDiscord(to, poll, { + ).sendPollDiscord(resolveDiscordAttachedOutboundTarget({ to, threadId }), poll, { cfg, accountId: accountId ?? undefined, silent: silent ?? undefined, diff --git a/extensions/video-generation-providers.live.test.ts b/extensions/video-generation-providers.live.test.ts index a7879148625..1630a64afc7 100644 --- a/extensions/video-generation-providers.live.test.ts +++ b/extensions/video-generation-providers.live.test.ts @@ -13,6 +13,7 @@ import { registerProviderPlugin, requireRegisteredProvider, } from "../test/helpers/plugins/provider-registration.js"; +import alibabaPlugin from "./alibaba/index.js"; import byteplusPlugin from "./byteplus/index.js"; import falPlugin from "./fal/index.js"; import googlePlugin from "./google/index.js"; @@ -20,6 +21,7 @@ import minimaxPlugin from "./minimax/index.js"; import openaiPlugin from "./openai/index.js"; import qwenPlugin from "./qwen/index.js"; import togetherPlugin from "./together/index.js"; +import xaiPlugin from "./xai/index.js"; const LIVE = isLiveTestEnabled(); const providerFilter = parseCsvFilter(process.env.OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS); @@ -33,6 +35,12 @@ type LiveProviderCase = { }; const CASES: LiveProviderCase[] = [ + { + plugin: alibabaPlugin, + pluginId: "alibaba", + pluginName: "Alibaba Model Studio Plugin", + providerId: "alibaba", + }, { plugin: byteplusPlugin, pluginId: "byteplus", @@ -55,6 +63,7 @@ const CASES: LiveProviderCase[] = [ pluginName: "Together Provider", providerId: "together", }, + { plugin: xaiPlugin, pluginId: "xai", pluginName: "xAI Plugin", providerId: "xai" }, ] .filter((entry) => (providerFilter ? providerFilter.has(entry.providerId) : true)) .toSorted((left, right) => left.providerId.localeCompare(right.providerId)); diff --git a/extensions/xai/index.ts b/extensions/xai/index.ts index 3efae042fa1..30662e77347 100644 --- a/extensions/xai/index.ts +++ b/extensions/xai/index.ts @@ -16,6 +16,7 @@ import { isModernXaiModel, resolveXaiForwardCompatModel } from "./provider-model import { resolveFallbackXaiAuth } from "./src/tool-auth-shared.js"; import { resolveEffectiveXSearchConfig } from "./src/x-search-config.js"; import { wrapXaiProviderStream } from "./stream.js"; +import { buildXaiVideoGenerationProvider } from "./video-generation-provider.js"; import { createXaiWebSearchProvider } from "./web-search.js"; const PROVIDER_ID = "xai"; @@ -230,6 +231,7 @@ export default defineSingleProviderPluginEntry({ }, register(api) { api.registerWebSearchProvider(createXaiWebSearchProvider()); + api.registerVideoGenerationProvider(buildXaiVideoGenerationProvider()); api.registerTool((ctx) => createLazyCodeExecutionTool(ctx), { name: "code_execution" }); api.registerTool((ctx) => createLazyXSearchTool(ctx), { name: "x_search" }); }, diff --git a/extensions/xai/openclaw.plugin.json b/extensions/xai/openclaw.plugin.json index 20e16eaba92..25eb9735e3a 100644 --- a/extensions/xai/openclaw.plugin.json +++ b/extensions/xai/openclaw.plugin.json @@ -77,6 +77,7 @@ }, "contracts": { "webSearchProviders": ["grok"], + "videoGenerationProviders": ["xai"], "tools": ["code_execution", "x_search"] }, "configSchema": { diff --git a/extensions/xai/plugin-registration.contract.test.ts b/extensions/xai/plugin-registration.contract.test.ts new file mode 100644 index 00000000000..1de5bd17bd2 --- /dev/null +++ b/extensions/xai/plugin-registration.contract.test.ts @@ -0,0 +1,10 @@ +import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js"; + +describePluginRegistrationContract({ + pluginId: "xai", + providerIds: ["xai"], + webSearchProviderIds: ["grok"], + videoGenerationProviderIds: ["xai"], + toolNames: ["code_execution", "x_search"], + requireGenerateVideo: true, +}); diff --git a/extensions/xai/video-generation-provider.test.ts b/extensions/xai/video-generation-provider.test.ts new file mode 100644 index 00000000000..314ee80ac0f --- /dev/null +++ b/extensions/xai/video-generation-provider.test.ts @@ -0,0 +1,146 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { buildXaiVideoGenerationProvider } from "./video-generation-provider.js"; + +const { + resolveApiKeyForProviderMock, + postJsonRequestMock, + fetchWithTimeoutMock, + assertOkOrThrowHttpErrorMock, + resolveProviderHttpRequestConfigMock, +} = vi.hoisted(() => ({ + resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "xai-key" })), + postJsonRequestMock: vi.fn(), + fetchWithTimeoutMock: vi.fn(), + assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), + resolveProviderHttpRequestConfigMock: vi.fn((params) => ({ + baseUrl: params.baseUrl ?? params.defaultBaseUrl, + allowPrivateNetwork: false, + headers: new Headers(params.defaultHeaders), + dispatcherPolicy: undefined, + })), +})); + +vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({ + resolveApiKeyForProvider: resolveApiKeyForProviderMock, +})); + +vi.mock("openclaw/plugin-sdk/provider-http", () => ({ + assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, + fetchWithTimeout: fetchWithTimeoutMock, + postJsonRequest: postJsonRequestMock, + resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, +})); + +describe("xai video generation provider", () => { + afterEach(() => { + resolveApiKeyForProviderMock.mockClear(); + postJsonRequestMock.mockReset(); + fetchWithTimeoutMock.mockReset(); + assertOkOrThrowHttpErrorMock.mockClear(); + resolveProviderHttpRequestConfigMock.mockClear(); + }); + + it("creates, polls, and downloads a generated video", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + request_id: "req_123", + }), + }, + release: vi.fn(async () => {}), + }); + fetchWithTimeoutMock + .mockResolvedValueOnce({ + json: async () => ({ + request_id: "req_123", + status: "done", + video: { url: "https://cdn.x.ai/video.mp4" }, + }), + }) + .mockResolvedValueOnce({ + headers: new Headers({ "content-type": "video/mp4" }), + arrayBuffer: async () => Buffer.from("mp4-bytes"), + }); + + const provider = buildXaiVideoGenerationProvider(); + const result = await provider.generateVideo({ + provider: "xai", + model: "grok-imagine-video", + prompt: "A tiny robot crab crossing a moonlit tide pool", + cfg: {}, + durationSeconds: 6, + aspectRatio: "16:9", + resolution: "720P", + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://api.x.ai/v1/videos/generations", + body: expect.objectContaining({ + model: "grok-imagine-video", + prompt: "A tiny robot crab crossing a moonlit tide pool", + duration: 6, + aspect_ratio: "16:9", + resolution: "720p", + }), + }), + ); + expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith( + 1, + "https://api.x.ai/v1/videos/req_123", + expect.objectContaining({ method: "GET" }), + 120000, + fetch, + ); + expect(result.videos[0]?.mimeType).toBe("video/mp4"); + expect(result.metadata).toEqual( + expect.objectContaining({ + requestId: "req_123", + mode: "generate", + }), + ); + }); + + it("routes video inputs to the extension endpoint when duration is set", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + request_id: "req_extend", + }), + }, + release: vi.fn(async () => {}), + }); + fetchWithTimeoutMock + .mockResolvedValueOnce({ + json: async () => ({ + request_id: "req_extend", + status: "done", + video: { url: "https://cdn.x.ai/extended.mp4" }, + }), + }) + .mockResolvedValueOnce({ + headers: new Headers({ "content-type": "video/mp4" }), + arrayBuffer: async () => Buffer.from("extended-bytes"), + }); + + const provider = buildXaiVideoGenerationProvider(); + await provider.generateVideo({ + provider: "xai", + model: "grok-imagine-video", + prompt: "Continue the shot into a neon alleyway", + cfg: {}, + durationSeconds: 8, + inputVideos: [{ url: "https://example.com/input.mp4" }], + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://api.x.ai/v1/videos/extensions", + body: expect.objectContaining({ + video: { url: "https://example.com/input.mp4" }, + duration: 8, + }), + }), + ); + }); +}); diff --git a/extensions/xai/video-generation-provider.ts b/extensions/xai/video-generation-provider.ts new file mode 100644 index 00000000000..ed45b0bd8ff --- /dev/null +++ b/extensions/xai/video-generation-provider.ts @@ -0,0 +1,338 @@ +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; +import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; +import { + assertOkOrThrowHttpError, + fetchWithTimeout, + postJsonRequest, + resolveProviderHttpRequestConfig, +} from "openclaw/plugin-sdk/provider-http"; +import type { + GeneratedVideoAsset, + VideoGenerationProvider, + VideoGenerationRequest, + VideoGenerationSourceAsset, +} from "openclaw/plugin-sdk/video-generation"; + +const DEFAULT_XAI_VIDEO_BASE_URL = "https://api.x.ai/v1"; +const DEFAULT_XAI_VIDEO_MODEL = "grok-imagine-video"; +const DEFAULT_TIMEOUT_MS = 120_000; +const POLL_INTERVAL_MS = 5_000; +const MAX_POLL_ATTEMPTS = 120; +const XAI_VIDEO_ASPECT_RATIOS = new Set(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"]); + +type XaiVideoCreateResponse = { + request_id?: string; + error?: { + code?: string; + message?: string; + } | null; +}; + +type XaiVideoStatusResponse = { + request_id?: string; + status?: "queued" | "processing" | "done" | "failed" | "expired"; + video?: { + url?: string; + } | null; + error?: { + code?: string; + message?: string; + } | null; +}; + +function resolveXaiVideoBaseUrl(req: VideoGenerationRequest): string { + return req.cfg?.models?.providers?.xai?.baseUrl?.trim() || DEFAULT_XAI_VIDEO_BASE_URL; +} + +function toDataUrl(buffer: Buffer, mimeType: string): string { + return `data:${mimeType};base64,${buffer.toString("base64")}`; +} + +function resolveImageUrl(input: VideoGenerationSourceAsset | undefined): string | undefined { + if (!input) { + return undefined; + } + if (input.url?.trim()) { + return input.url.trim(); + } + if (!input.buffer) { + throw new Error("xAI image-to-video input is missing image data."); + } + return toDataUrl(input.buffer, input.mimeType?.trim() || "image/png"); +} + +function resolveInputVideoUrl(input: VideoGenerationSourceAsset | undefined): string | undefined { + if (!input) { + return undefined; + } + const url = input.url?.trim(); + if (url) { + return url; + } + if (input.buffer) { + throw new Error("xAI video editing currently requires a remote mp4 URL input."); + } + throw new Error("xAI video editing input is missing video data."); +} + +function resolveDurationSeconds(params: { + durationSeconds?: number; + min?: number; + max?: number; +}): number | undefined { + if (typeof params.durationSeconds !== "number" || !Number.isFinite(params.durationSeconds)) { + return undefined; + } + const rounded = Math.round(params.durationSeconds); + return Math.max(params.min ?? 1, Math.min(params.max ?? 15, rounded)); +} + +function resolveAspectRatio(value: string | undefined): string | undefined { + const trimmed = value?.trim(); + if (!trimmed || !XAI_VIDEO_ASPECT_RATIOS.has(trimmed)) { + return undefined; + } + return trimmed; +} + +function resolveResolution(value: string | undefined): "480p" | "720p" | undefined { + if (value === "480P") { + return "480p"; + } + if (value === "720P" || value === "1080P") { + return "720p"; + } + return undefined; +} + +function resolveXaiVideoMode(req: VideoGenerationRequest): "generate" | "edit" | "extend" { + const hasVideoInput = (req.inputVideos?.length ?? 0) > 0; + if (!hasVideoInput) { + return "generate"; + } + return typeof resolveDurationSeconds({ + durationSeconds: req.durationSeconds, + min: 2, + max: 10, + }) === "number" + ? "extend" + : "edit"; +} + +function buildCreateBody(req: VideoGenerationRequest): Record { + if ((req.inputImages?.length ?? 0) > 1) { + throw new Error("xAI video generation supports at most one reference image."); + } + if ((req.inputVideos?.length ?? 0) > 1) { + throw new Error("xAI video generation supports at most one input video."); + } + if ((req.inputImages?.length ?? 0) > 0 && (req.inputVideos?.length ?? 0) > 0) { + throw new Error("xAI video generation does not support image and video inputs together."); + } + + const mode = resolveXaiVideoMode(req); + const body: Record = { + model: req.model?.trim() || DEFAULT_XAI_VIDEO_MODEL, + prompt: req.prompt, + }; + + if (mode === "generate") { + const imageUrl = resolveImageUrl(req.inputImages?.[0]); + if (imageUrl) { + body.image = { url: imageUrl }; + } + const duration = resolveDurationSeconds({ + durationSeconds: req.durationSeconds, + min: 1, + max: 15, + }); + if (typeof duration === "number") { + body.duration = duration; + } + const aspectRatio = resolveAspectRatio(req.aspectRatio); + if (aspectRatio) { + body.aspect_ratio = aspectRatio; + } + const resolution = resolveResolution(req.resolution); + if (resolution) { + body.resolution = resolution; + } + return body; + } + + body.video = { url: resolveInputVideoUrl(req.inputVideos?.[0]) }; + if (mode === "extend") { + const duration = resolveDurationSeconds({ + durationSeconds: req.durationSeconds, + min: 2, + max: 10, + }); + if (typeof duration === "number") { + body.duration = duration; + } + } + return body; +} + +function resolveCreateEndpoint(req: VideoGenerationRequest): string { + switch (resolveXaiVideoMode(req)) { + case "edit": + return "/videos/edits"; + case "extend": + return "/videos/extensions"; + case "generate": + default: + return "/videos/generations"; + } +} + +async function pollXaiVideo(params: { + requestId: string; + headers: Headers; + timeoutMs?: number; + baseUrl: string; + fetchFn: typeof fetch; +}): Promise { + for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) { + const response = await fetchWithTimeout( + `${params.baseUrl}/videos/${params.requestId}`, + { + method: "GET", + headers: params.headers, + }, + params.timeoutMs ?? DEFAULT_TIMEOUT_MS, + params.fetchFn, + ); + await assertOkOrThrowHttpError(response, "xAI video status request failed"); + const payload = (await response.json()) as XaiVideoStatusResponse; + switch (payload.status) { + case "done": + return payload; + case "failed": + case "expired": + throw new Error(payload.error?.message?.trim() || `xAI video generation ${payload.status}`); + case "queued": + case "processing": + default: + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)); + break; + } + } + throw new Error(`xAI video generation task ${params.requestId} did not finish in time`); +} + +async function downloadXaiVideo(params: { + url: string; + timeoutMs?: number; + fetchFn: typeof fetch; +}): Promise { + const response = await fetchWithTimeout( + params.url, + { method: "GET" }, + params.timeoutMs ?? DEFAULT_TIMEOUT_MS, + params.fetchFn, + ); + await assertOkOrThrowHttpError(response, "xAI generated video download failed"); + const mimeType = response.headers.get("content-type")?.trim() || "video/mp4"; + const arrayBuffer = await response.arrayBuffer(); + return { + buffer: Buffer.from(arrayBuffer), + mimeType, + fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`, + }; +} + +export function buildXaiVideoGenerationProvider(): VideoGenerationProvider { + return { + id: "xai", + label: "xAI", + defaultModel: DEFAULT_XAI_VIDEO_MODEL, + models: [DEFAULT_XAI_VIDEO_MODEL], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: "xai", + agentDir, + }), + capabilities: { + maxVideos: 1, + maxInputImages: 1, + maxInputVideos: 1, + maxDurationSeconds: 15, + supportsAspectRatio: true, + supportsResolution: true, + }, + async generateVideo(req) { + const auth = await resolveApiKeyForProvider({ + provider: "xai", + cfg: req.cfg, + agentDir: req.agentDir, + store: req.authStore, + }); + if (!auth.apiKey) { + throw new Error("xAI API key missing"); + } + + const fetchFn = fetch; + const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } = + resolveProviderHttpRequestConfig({ + baseUrl: resolveXaiVideoBaseUrl(req), + defaultBaseUrl: DEFAULT_XAI_VIDEO_BASE_URL, + allowPrivateNetwork: false, + defaultHeaders: { + Authorization: `Bearer ${auth.apiKey}`, + "Content-Type": "application/json", + }, + provider: "xai", + capability: "video", + transport: "http", + }); + const { response, release } = await postJsonRequest({ + url: `${baseUrl}${resolveCreateEndpoint(req)}`, + headers, + body: buildCreateBody(req), + timeoutMs: req.timeoutMs, + fetchFn, + allowPrivateNetwork, + dispatcherPolicy, + }); + try { + await assertOkOrThrowHttpError(response, "xAI video generation failed"); + const submitted = (await response.json()) as XaiVideoCreateResponse; + const requestId = submitted.request_id?.trim(); + if (!requestId) { + throw new Error( + submitted.error?.message?.trim() || "xAI video generation response missing request_id", + ); + } + const completed = await pollXaiVideo({ + requestId, + headers, + timeoutMs: req.timeoutMs, + baseUrl, + fetchFn, + }); + const videoUrl = completed.video?.url?.trim(); + if (!videoUrl) { + throw new Error("xAI video generation completed without an output URL"); + } + const video = await downloadXaiVideo({ + url: videoUrl, + timeoutMs: req.timeoutMs, + fetchFn, + }); + return { + videos: [video], + model: req.model?.trim() || DEFAULT_XAI_VIDEO_MODEL, + metadata: { + requestId, + status: completed.status, + videoUrl, + mode: resolveXaiVideoMode(req), + }, + }; + } finally { + await release(); + } + }, + }; +} diff --git a/src/agents/live-auth-keys.test.ts b/src/agents/live-auth-keys.test.ts new file mode 100644 index 00000000000..5f532a31734 --- /dev/null +++ b/src/agents/live-auth-keys.test.ts @@ -0,0 +1,32 @@ +import { afterEach, describe, expect, it } from "vitest"; +import { collectProviderApiKeys } from "./live-auth-keys.js"; + +const ORIGINAL_MODELSTUDIO_API_KEY = process.env.MODELSTUDIO_API_KEY; +const ORIGINAL_XAI_API_KEY = process.env.XAI_API_KEY; + +describe("collectProviderApiKeys", () => { + afterEach(() => { + if (ORIGINAL_MODELSTUDIO_API_KEY === undefined) { + delete process.env.MODELSTUDIO_API_KEY; + } else { + process.env.MODELSTUDIO_API_KEY = ORIGINAL_MODELSTUDIO_API_KEY; + } + if (ORIGINAL_XAI_API_KEY === undefined) { + delete process.env.XAI_API_KEY; + } else { + process.env.XAI_API_KEY = ORIGINAL_XAI_API_KEY; + } + }); + + it("honors manifest-declared provider auth env vars for nonstandard provider ids", () => { + process.env.MODELSTUDIO_API_KEY = "modelstudio-live-key"; + + expect(collectProviderApiKeys("alibaba")).toContain("modelstudio-live-key"); + }); + + it("dedupes manifest env vars against direct provider env naming", () => { + process.env.XAI_API_KEY = "xai-live-key"; + + expect(collectProviderApiKeys("xai")).toEqual(["xai-live-key"]); + }); +}); diff --git a/src/agents/live-auth-keys.ts b/src/agents/live-auth-keys.ts index 7732053e791..f9fa59c87b3 100644 --- a/src/agents/live-auth-keys.ts +++ b/src/agents/live-auth-keys.ts @@ -1,3 +1,4 @@ +import { getProviderEnvVars } from "../secrets/provider-env-vars.js"; import { normalizeProviderId } from "./model-selection.js"; const KEY_SPLIT_RE = /[\s,;]+/g; @@ -98,7 +99,8 @@ function resolveProviderApiKeyConfig(provider: string): ProviderApiKeyConfig { } export function collectProviderApiKeys(provider: string): string[] { - const config = resolveProviderApiKeyConfig(provider); + const normalizedProvider = normalizeProviderId(provider); + const config = resolveProviderApiKeyConfig(normalizedProvider); const forcedSingle = config.liveSingle ? process.env[config.liveSingle]?.trim() : undefined; if (forcedSingle) { @@ -112,6 +114,9 @@ export function collectProviderApiKeys(provider: string): string[] { const fallback = config.fallbackVars .map((envVar) => process.env[envVar]?.trim()) .filter(Boolean) as string[]; + const manifestFallback = getProviderEnvVars(normalizedProvider) + .map((envVar) => process.env[envVar]?.trim()) + .filter(Boolean) as string[]; const seen = new Set(); @@ -135,6 +140,9 @@ export function collectProviderApiKeys(provider: string): string[] { for (const value of fallback) { add(value); } + for (const value of manifestFallback) { + add(value); + } return Array.from(seen); } diff --git a/src/video-generation/live-test-helpers.ts b/src/video-generation/live-test-helpers.ts index d32acfceb45..3730a403f99 100644 --- a/src/video-generation/live-test-helpers.ts +++ b/src/video-generation/live-test-helpers.ts @@ -2,6 +2,7 @@ import type { AuthProfileStore } from "../agents/auth-profiles.js"; import type { OpenClawConfig } from "../config/config.js"; export const DEFAULT_LIVE_VIDEO_MODELS: Record = { + alibaba: "alibaba/wan2.6-t2v", byteplus: "byteplus/seedance-1-0-lite-t2v-250428", fal: "fal/fal-ai/minimax/video-01-live", google: "google/veo-3.1-fast-generate-preview", @@ -9,6 +10,7 @@ export const DEFAULT_LIVE_VIDEO_MODELS: Record = { openai: "openai/sora-2", qwen: "qwen/wan2.6-t2v", together: "together/Wan-AI/Wan2.2-T2V-A14B", + xai: "xai/grok-imagine-video", }; export function redactLiveApiKey(value: string | undefined): string {