feat(video): add xai and alibaba providers

2026-04-05 23:06:18 +01:00 · 2026-04-05 23:06:18 +01:00 · a62193c09e
parent 5e0b58fbc6
commit a62193c09e
19 changed files with 1193 additions and 11 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -29,6 +29,10 @@ Docs: https://docs.openclaw.ai
 - Agents/Claude CLI: expose OpenClaw tools to background Claude CLI runs through a loopback MCP bridge and switch bundled runs to stdin + `stream-json` partial-message streaming so prompts stop riding argv, long replies show live progress, and final session/usage metadata still land cleanly. (#35676) Thanks @mylukin.
 - ACPX/runtime: embed the ACP runtime directly in the bundled `acpx` plugin, remove the extra external ACP CLI hop, harden live ACP session binding and reuse, and add a generic `reply_dispatch` hook so bundled plugins like ACPX can own reply interception without hardcoded ACP paths in core auto-reply routing. (#61319)
 - Config/schema: enrich the exported `openclaw config schema` JSON Schema with field titles and descriptions so editors, agents, and other schema consumers receive the same config help metadata. (#60067) Thanks @solavrc.
+- Agents/cache: diagnostics: add prompt-cache break diagnostics, trace live cache scenarios through embedded runner paths, and show cache reuse explicitly in `openclaw status --verbose`. Thanks @vincentkoc.
+- Agents/cache: stabilize cache-relevant system prompt fingerprints by normalizing equivalent structured prompt whitespace, line endings, hook-added system context, and runtime capability ordering so semantically unchanged prompts reuse KV/cache more reliably. Thanks @vincentkoc.
+- Agents/tool prompts: remove the duplicate in-band tool inventory from agent system prompts so tool-calling models rely on the structured tool definitions as the single source of truth, improving prompt stability and reducing stale tool guidance.
+- Tools/video generation: add bundled xAI (`grok-imagine-video`) and Alibaba Model Studio Wan video providers, plus live-test/default model wiring for both.
 - Providers/CLI: remove bundled CLI text-provider backends and the `agents.defaults.cliBackends` surface, while keeping ACP harness sessions and Gemini media understanding on the native bundled providers.
 - Matrix/exec approvals: clarify unavailable-approval replies so Matrix no longer claims chat approvals are unsupported when native exec approvals are merely unconfigured. (#61424) Thanks @gumadeiras.
 - Docs/IRC: replace public IRC hostname examples with `irc.example.com` and recommend private servers for bot coordination while listing common public networks for intentional use.
@ -101,6 +105,7 @@ Docs: https://docs.openclaw.ai
 - Agents/errors: surface an explicit disk-full message when local session or transcript writes fail with `ENOSPC`/`disk full`, so those runs stop degrading into opaque `NO_REPLY`-style failures. Thanks @vincentkoc.
 - Exec approvals: remove heuristic command-obfuscation gating from host exec so gateway and node runs rely on explicit policy, allowlist, and strict inline-eval rules only.
 - Agents/tool results: cap live tool-result persistence and overflow-recovery truncation at 40k characters so oversized tool output stays bounded without discarding recent context entirely.
+- Discord/video replies: split text-plus-video deliveries into a text reply followed by a media-only send, and let live provider auth checks honor manifest-declared API key env vars like `MODELSTUDIO_API_KEY`.
 - Config/All Settings: keep the raw config view intact when sensitive fields are blank instead of corrupting or dropping the rendered snapshot. (#28214) Thanks @solodmd.
 - Plugin SDK/facades: back-fill bundled plugin facade sentinels before plugin-id tracking re-enters config loading, so CLI/provider startup no longer crashes with `shouldNormalizeGoogleProviderConfig is not a function` or other empty-facade reads during bundled plugin re-entry. Thanks @adam91holt.
 - Plugins/facades: back-fill facade sentinels before tracked-plugin resolution re-enters config loading, so facade exports stay defined during circular provider normalization. (#61180) Thanks @adam91holt.
--- a/docs/tools/video-generation.md
+++ b/docs/tools/video-generation.md
@ -1,5 +1,5 @@
 ---
-summary: "Generate videos using configured providers such as OpenAI, Google, Qwen, and MiniMax"
+summary: "Generate videos using configured providers such as Alibaba, OpenAI, Google, Qwen, and MiniMax"
 read_when:
  - Generating videos via the agent
  - Configuring video generation providers and models
@ -17,7 +17,7 @@ The tool only appears when at least one video-generation provider is available.

 ## Quick start

-1. Set an API key for at least one provider (for example `OPENAI_API_KEY`, `GEMINI_API_KEY`, or `QWEN_API_KEY`).
+1. Set an API key for at least one provider (for example `OPENAI_API_KEY`, `GEMINI_API_KEY`, `MODELSTUDIO_API_KEY`, or `QWEN_API_KEY`).
 2. Optionally set your preferred model:

 ```json5
@ -38,6 +38,7 @@ The agent calls `video_generate` automatically. No tool allow-listing needed —

 | Provider | Default model                   | Reference inputs   | API key                                                    |
 | -------- | ------------------------------- | ------------------ | ---------------------------------------------------------- |
+| Alibaba  | `wan2.6-t2v`                    | Yes, remote URLs   | `MODELSTUDIO_API_KEY`, `DASHSCOPE_API_KEY`, `QWEN_API_KEY` |
 | BytePlus | `seedance-1-0-lite-t2v-250428`  | 1 image            | `BYTEPLUS_API_KEY`                                         |
 | fal      | `fal-ai/minimax/video-01-live`  | 1 image            | `FAL_KEY`                                                  |
 | Google   | `veo-3.1-fast-generate-preview` | 1 image or 1 video | `GEMINI_API_KEY`, `GOOGLE_API_KEY`                         |
@ -45,6 +46,7 @@ The agent calls `video_generate` automatically. No tool allow-listing needed —
 | OpenAI   | `sora-2`                        | 1 image or 1 video | `OPENAI_API_KEY`                                           |
 | Qwen     | `wan2.6-t2v`                    | Yes, remote URLs   | `QWEN_API_KEY`, `MODELSTUDIO_API_KEY`, `DASHSCOPE_API_KEY` |
 | Together | `Wan-AI/Wan2.2-T2V-A14B`        | 1 image            | `TOGETHER_API_KEY`                                         |
+| xAI      | `grok-imagine-video`            | 1 image or 1 video | `XAI_API_KEY`                                              |

 Use `action: "list"` to inspect available providers and models at runtime:

@ -105,10 +107,12 @@ If a provider fails, the next candidate is tried automatically. If all fail, the

 ## Provider notes

- OpenAI uses the native video endpoint and currently defaults to `sora-2`.
+- Alibaba uses the DashScope / Model Studio async video endpoint and currently requires remote `http(s)` URLs for reference assets.
 - Google uses Gemini/Veo and supports a single image or video reference input.
 - MiniMax, Together, BytePlus, and fal currently support a single image reference input.
+- OpenAI uses the native video endpoint and currently defaults to `sora-2`.
 - Qwen supports image/video references, but the upstream DashScope video endpoint currently requires remote `http(s)` URLs for those references.
+- xAI uses the native xAI video API and supports text-to-video, image-to-video, and remote video edit/extend flows.

 ## Qwen reference inputs

--- a/extensions/alibaba/index.ts
+++ b/extensions/alibaba/index.ts
@ -0,0 +1,11 @@
+import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
+import { buildAlibabaVideoGenerationProvider } from "./video-generation-provider.js";
+
+export default definePluginEntry({
+  id: "alibaba",
+  name: "Alibaba Model Studio Plugin",
+  description: "Bundled Alibaba Model Studio video provider plugin",
+  register(api) {
+    api.registerVideoGenerationProvider(buildAlibabaVideoGenerationProvider());
+  },
+});
--- a/extensions/alibaba/openclaw.plugin.json
+++ b/extensions/alibaba/openclaw.plugin.json
@ -0,0 +1,30 @@
+{
+  "id": "alibaba",
+  "enabledByDefault": true,
+  "providerAuthEnvVars": {
+    "alibaba": ["MODELSTUDIO_API_KEY", "DASHSCOPE_API_KEY", "QWEN_API_KEY"]
+  },
+  "providerAuthChoices": [
+    {
+      "provider": "alibaba",
+      "method": "api-key",
+      "choiceId": "alibaba-model-studio-api-key",
+      "choiceLabel": "Alibaba Model Studio API key",
+      "groupId": "alibaba",
+      "groupLabel": "Alibaba Model Studio",
+      "groupHint": "DashScope / Model Studio API key",
+      "optionKey": "alibabaModelStudioApiKey",
+      "cliFlag": "--alibaba-model-studio-api-key",
+      "cliOption": "--alibaba-model-studio-api-key <key>",
+      "cliDescription": "Alibaba Model Studio API key"
+    }
+  ],
+  "contracts": {
+    "videoGenerationProviders": ["alibaba"]
+  },
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {}
+  }
+}
--- a/extensions/alibaba/package.json
+++ b/extensions/alibaba/package.json
@ -0,0 +1,12 @@
+{
+  "name": "@openclaw/alibaba-provider",
+  "version": "2026.4.5",
+  "private": true,
+  "description": "OpenClaw Alibaba Model Studio video provider plugin",
+  "type": "module",
+  "openclaw": {
+    "extensions": [
+      "./index.ts"
+    ]
+  }
+}
--- a/extensions/alibaba/plugin-registration.contract.test.ts
+++ b/extensions/alibaba/plugin-registration.contract.test.ts
@ -0,0 +1,7 @@
+import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
+
+describePluginRegistrationContract({
+  pluginId: "alibaba",
+  videoGenerationProviderIds: ["alibaba"],
+  requireGenerateVideo: true,
+});
--- a/extensions/alibaba/video-generation-provider.test.ts
+++ b/extensions/alibaba/video-generation-provider.test.ts
@ -0,0 +1,133 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { buildAlibabaVideoGenerationProvider } from "./video-generation-provider.js";
+
+const {
+  resolveApiKeyForProviderMock,
+  postJsonRequestMock,
+  fetchWithTimeoutMock,
+  assertOkOrThrowHttpErrorMock,
+  resolveProviderHttpRequestConfigMock,
+} = vi.hoisted(() => ({
+  resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "alibaba-key" })),
+  postJsonRequestMock: vi.fn(),
+  fetchWithTimeoutMock: vi.fn(),
+  assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
+  resolveProviderHttpRequestConfigMock: vi.fn((params) => ({
+    baseUrl: params.baseUrl ?? params.defaultBaseUrl,
+    allowPrivateNetwork: false,
+    headers: new Headers(params.defaultHeaders),
+    dispatcherPolicy: undefined,
+  })),
+}));
+
+vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
+  resolveApiKeyForProvider: resolveApiKeyForProviderMock,
+}));
+
+vi.mock("openclaw/plugin-sdk/provider-http", () => ({
+  assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
+  fetchWithTimeout: fetchWithTimeoutMock,
+  postJsonRequest: postJsonRequestMock,
+  resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
+}));
+
+describe("alibaba video generation provider", () => {
+  afterEach(() => {
+    resolveApiKeyForProviderMock.mockClear();
+    postJsonRequestMock.mockReset();
+    fetchWithTimeoutMock.mockReset();
+    assertOkOrThrowHttpErrorMock.mockClear();
+    resolveProviderHttpRequestConfigMock.mockClear();
+  });
+
+  it("submits async Wan generation, polls task status, and downloads the resulting video", async () => {
+    postJsonRequestMock.mockResolvedValue({
+      response: {
+        json: async () => ({
+          request_id: "req-1",
+          output: {
+            task_id: "task-1",
+          },
+        }),
+      },
+      release: vi.fn(async () => {}),
+    });
+    fetchWithTimeoutMock
+      .mockResolvedValueOnce({
+        json: async () => ({
+          output: {
+            task_status: "SUCCEEDED",
+            results: [{ video_url: "https://example.com/out.mp4" }],
+          },
+        }),
+        headers: new Headers(),
+      })
+      .mockResolvedValueOnce({
+        arrayBuffer: async () => Buffer.from("mp4-bytes"),
+        headers: new Headers({ "content-type": "video/mp4" }),
+      });
+
+    const provider = buildAlibabaVideoGenerationProvider();
+    const result = await provider.generateVideo({
+      provider: "alibaba",
+      model: "wan2.6-r2v-flash",
+      prompt: "animate this shot",
+      cfg: {},
+      inputImages: [{ url: "https://example.com/ref.png" }],
+      durationSeconds: 6,
+      audio: true,
+      watermark: false,
+    });
+
+    expect(postJsonRequestMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        url: "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis",
+        body: expect.objectContaining({
+          model: "wan2.6-r2v-flash",
+          input: expect.objectContaining({
+            prompt: "animate this shot",
+            img_url: "https://example.com/ref.png",
+          }),
+          parameters: expect.objectContaining({
+            duration: 6,
+            enable_audio: true,
+            watermark: false,
+          }),
+        }),
+      }),
+    );
+    expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith(
+      1,
+      "https://dashscope-intl.aliyuncs.com/api/v1/tasks/task-1",
+      expect.objectContaining({ method: "GET" }),
+      120000,
+      fetch,
+    );
+    expect(result.videos).toHaveLength(1);
+    expect(result.videos[0]?.mimeType).toBe("video/mp4");
+    expect(result.metadata).toEqual(
+      expect.objectContaining({
+        requestId: "req-1",
+        taskId: "task-1",
+        taskStatus: "SUCCEEDED",
+      }),
+    );
+  });
+
+  it("fails fast when reference inputs are local buffers instead of remote URLs", async () => {
+    const provider = buildAlibabaVideoGenerationProvider();
+
+    await expect(
+      provider.generateVideo({
+        provider: "alibaba",
+        model: "wan2.6-i2v",
+        prompt: "animate this local frame",
+        cfg: {},
+        inputImages: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
+      }),
+    ).rejects.toThrow(
+      "Alibaba Wan video generation currently requires remote http(s) URLs for reference images/videos.",
+    );
+    expect(postJsonRequestMock).not.toHaveBeenCalled();
+  });
+});
--- a/extensions/alibaba/video-generation-provider.ts
+++ b/extensions/alibaba/video-generation-provider.ts
@ -0,0 +1,293 @@
+import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
+import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
+import {
+  assertOkOrThrowHttpError,
+  fetchWithTimeout,
+  postJsonRequest,
+  resolveProviderHttpRequestConfig,
+} from "openclaw/plugin-sdk/provider-http";
+import type {
+  GeneratedVideoAsset,
+  VideoGenerationProvider,
+  VideoGenerationRequest,
+  VideoGenerationResult,
+  VideoGenerationSourceAsset,
+} from "openclaw/plugin-sdk/video-generation";
+
+const DEFAULT_ALIBABA_VIDEO_BASE_URL = "https://dashscope-intl.aliyuncs.com";
+const DEFAULT_ALIBABA_VIDEO_MODEL = "wan2.6-t2v";
+const DEFAULT_DURATION_SECONDS = 5;
+const DEFAULT_TIMEOUT_MS = 120_000;
+const POLL_INTERVAL_MS = 2_500;
+const MAX_POLL_ATTEMPTS = 120;
+const RESOLUTION_TO_SIZE: Record<string, string> = {
+  "480P": "832*480",
+  "720P": "1280*720",
+  "1080P": "1920*1080",
+};
+
+type AlibabaVideoGenerationResponse = {
+  output?: {
+    task_id?: string;
+    task_status?: string;
+    submit_time?: string;
+    results?: Array<{
+      video_url?: string;
+      orig_prompt?: string;
+      actual_prompt?: string;
+    }>;
+    video_url?: string;
+    code?: string;
+    message?: string;
+  };
+  request_id?: string;
+  code?: string;
+  message?: string;
+};
+
+function resolveAlibabaVideoBaseUrl(req: VideoGenerationRequest): string {
+  return req.cfg?.models?.providers?.alibaba?.baseUrl?.trim() || DEFAULT_ALIBABA_VIDEO_BASE_URL;
+}
+
+function resolveDashscopeAigcApiBaseUrl(baseUrl: string): string {
+  return baseUrl.replace(/\/+$/u, "");
+}
+
+function resolveReferenceUrls(
+  inputImages: VideoGenerationSourceAsset[] | undefined,
+  inputVideos: VideoGenerationSourceAsset[] | undefined,
+): string[] {
+  return [...(inputImages ?? []), ...(inputVideos ?? [])]
+    .map((asset) => asset.url?.trim())
+    .filter((value): value is string => Boolean(value));
+}
+
+function assertAlibabaReferenceInputsSupported(
+  inputImages: VideoGenerationSourceAsset[] | undefined,
+  inputVideos: VideoGenerationSourceAsset[] | undefined,
+): void {
+  const unsupported = [...(inputImages ?? []), ...(inputVideos ?? [])].some(
+    (asset) => !asset.url?.trim() && asset.buffer,
+  );
+  if (unsupported) {
+    throw new Error(
+      "Alibaba Wan video generation currently requires remote http(s) URLs for reference images/videos.",
+    );
+  }
+}
+
+function buildAlibabaVideoGenerationInput(req: VideoGenerationRequest): Record<string, unknown> {
+  assertAlibabaReferenceInputsSupported(req.inputImages, req.inputVideos);
+  const input: Record<string, unknown> = {
+    prompt: req.prompt,
+  };
+  const referenceUrls = resolveReferenceUrls(req.inputImages, req.inputVideos);
+  if (
+    referenceUrls.length === 1 &&
+    (req.inputImages?.length ?? 0) === 1 &&
+    !req.inputVideos?.length
+  ) {
+    input.img_url = referenceUrls[0];
+  } else if (referenceUrls.length > 0) {
+    input.reference_urls = referenceUrls;
+  }
+  return input;
+}
+
+function buildAlibabaVideoGenerationParameters(
+  req: VideoGenerationRequest,
+): Record<string, unknown> | undefined {
+  const parameters: Record<string, unknown> = {};
+  const size =
+    req.size?.trim() || (req.resolution ? RESOLUTION_TO_SIZE[req.resolution] : undefined);
+  if (size) {
+    parameters.size = size;
+  }
+  if (req.aspectRatio?.trim()) {
+    parameters.aspect_ratio = req.aspectRatio.trim();
+  }
+  if (typeof req.durationSeconds === "number" && Number.isFinite(req.durationSeconds)) {
+    parameters.duration = Math.max(1, Math.round(req.durationSeconds));
+  }
+  if (typeof req.audio === "boolean") {
+    parameters.enable_audio = req.audio;
+  }
+  if (typeof req.watermark === "boolean") {
+    parameters.watermark = req.watermark;
+  }
+  return Object.keys(parameters).length > 0 ? parameters : undefined;
+}
+
+function extractVideoUrls(payload: AlibabaVideoGenerationResponse): string[] {
+  const urls = [
+    ...(payload.output?.results?.map((entry) => entry.video_url).filter(Boolean) ?? []),
+    payload.output?.video_url,
+  ].filter((value): value is string => typeof value === "string" && value.trim().length > 0);
+  return [...new Set(urls)];
+}
+
+async function pollTaskUntilComplete(params: {
+  taskId: string;
+  headers: Headers;
+  timeoutMs?: number;
+  fetchFn: typeof fetch;
+  baseUrl: string;
+}): Promise<AlibabaVideoGenerationResponse> {
+  for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
+    const response = await fetchWithTimeout(
+      `${params.baseUrl}/api/v1/tasks/${params.taskId}`,
+      {
+        method: "GET",
+        headers: params.headers,
+      },
+      params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
+      params.fetchFn,
+    );
+    await assertOkOrThrowHttpError(response, "Alibaba Wan video-generation task poll failed");
+    const payload = (await response.json()) as AlibabaVideoGenerationResponse;
+    const status = payload.output?.task_status?.trim().toUpperCase();
+    if (status === "SUCCEEDED") {
+      return payload;
+    }
+    if (status === "FAILED" || status === "CANCELED") {
+      throw new Error(
+        payload.output?.message?.trim() ||
+          payload.message?.trim() ||
+          `Alibaba Wan video generation task ${params.taskId} ${status?.toLowerCase()}`,
+      );
+    }
+    await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
+  }
+  throw new Error(`Alibaba Wan video generation task ${params.taskId} did not finish in time`);
+}
+
+async function downloadGeneratedVideos(params: {
+  urls: string[];
+  timeoutMs?: number;
+  fetchFn: typeof fetch;
+}): Promise<GeneratedVideoAsset[]> {
+  const videos: GeneratedVideoAsset[] = [];
+  for (const [index, url] of params.urls.entries()) {
+    const response = await fetchWithTimeout(
+      url,
+      { method: "GET" },
+      params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
+      params.fetchFn,
+    );
+    await assertOkOrThrowHttpError(response, "Alibaba Wan generated video download failed");
+    const arrayBuffer = await response.arrayBuffer();
+    videos.push({
+      buffer: Buffer.from(arrayBuffer),
+      mimeType: response.headers.get("content-type")?.trim() || "video/mp4",
+      fileName: `video-${index + 1}.mp4`,
+      metadata: { sourceUrl: url },
+    });
+  }
+  return videos;
+}
+
+export function buildAlibabaVideoGenerationProvider(): VideoGenerationProvider {
+  return {
+    id: "alibaba",
+    label: "Alibaba Model Studio",
+    defaultModel: DEFAULT_ALIBABA_VIDEO_MODEL,
+    models: ["wan2.6-t2v", "wan2.6-i2v", "wan2.6-r2v", "wan2.6-r2v-flash", "wan2.7-r2v"],
+    isConfigured: ({ agentDir }) =>
+      isProviderApiKeyConfigured({
+        provider: "alibaba",
+        agentDir,
+      }),
+    capabilities: {
+      maxVideos: 1,
+      maxInputImages: 1,
+      maxInputVideos: 4,
+      maxDurationSeconds: 10,
+      supportsSize: true,
+      supportsAspectRatio: true,
+      supportsResolution: true,
+      supportsAudio: true,
+      supportsWatermark: true,
+    },
+    async generateVideo(req): Promise<VideoGenerationResult> {
+      const fetchFn = fetch;
+      const auth = await resolveApiKeyForProvider({
+        provider: "alibaba",
+        cfg: req.cfg,
+        agentDir: req.agentDir,
+        store: req.authStore,
+      });
+      if (!auth.apiKey) {
+        throw new Error("Alibaba Model Studio API key missing");
+      }
+
+      const requestBaseUrl = resolveAlibabaVideoBaseUrl(req);
+      const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
+        resolveProviderHttpRequestConfig({
+          baseUrl: requestBaseUrl,
+          defaultBaseUrl: DEFAULT_ALIBABA_VIDEO_BASE_URL,
+          defaultHeaders: {
+            Authorization: `Bearer ${auth.apiKey}`,
+            "Content-Type": "application/json",
+            "X-DashScope-Async": "enable",
+          },
+          provider: "alibaba",
+          capability: "video",
+          transport: "http",
+        });
+
+      const model = req.model?.trim() || DEFAULT_ALIBABA_VIDEO_MODEL;
+      const { response, release } = await postJsonRequest({
+        url: `${resolveDashscopeAigcApiBaseUrl(baseUrl)}/api/v1/services/aigc/video-generation/video-synthesis`,
+        headers,
+        body: {
+          model,
+          input: buildAlibabaVideoGenerationInput(req),
+          parameters: buildAlibabaVideoGenerationParameters({
+            ...req,
+            durationSeconds: req.durationSeconds ?? DEFAULT_DURATION_SECONDS,
+          }),
+        },
+        timeoutMs: req.timeoutMs,
+        fetchFn,
+        allowPrivateNetwork,
+        dispatcherPolicy,
+      });
+
+      try {
+        await assertOkOrThrowHttpError(response, "Alibaba Wan video generation failed");
+        const submitted = (await response.json()) as AlibabaVideoGenerationResponse;
+        const taskId = submitted.output?.task_id?.trim();
+        if (!taskId) {
+          throw new Error("Alibaba Wan video generation response missing task_id");
+        }
+        const completed = await pollTaskUntilComplete({
+          taskId,
+          headers,
+          timeoutMs: req.timeoutMs,
+          fetchFn,
+          baseUrl: resolveDashscopeAigcApiBaseUrl(baseUrl),
+        });
+        const urls = extractVideoUrls(completed);
+        if (urls.length === 0) {
+          throw new Error("Alibaba Wan video generation completed without output video URLs");
+        }
+        const videos = await downloadGeneratedVideos({
+          urls,
+          timeoutMs: req.timeoutMs,
+          fetchFn,
+        });
+        return {
+          videos,
+          model,
+          metadata: {
+            requestId: submitted.request_id,
+            taskId,
+            taskStatus: completed.output?.task_status,
+          },
+        };
+      } finally {
+        await release();
+      }
+    },
+  };
+}
--- a/extensions/discord/src/channel.test.ts
+++ b/extensions/discord/src/channel.test.ts
@ -138,8 +138,9 @@ describe("discordPlugin outbound", () => {
    expect(resolveReplyToMode({ cfg, accountId: "default" })).toBe("all");
  });

-  it("forwards mediaLocalRoots to sendMessageDiscord", async () => {
+  it("forwards full media send context to sendMessageDiscord", async () => {
    const sendMessageDiscord = vi.fn(async () => ({ messageId: "m1" }));
+    const mediaReadFile = vi.fn(async () => Buffer.from("media"));

    const result = await discordPlugin.outbound!.sendMedia!({
      cfg: {} as OpenClawConfig,
@ -147,23 +148,102 @@ describe("discordPlugin outbound", () => {
      text: "hi",
      mediaUrl: "/tmp/image.png",
      mediaLocalRoots: ["/tmp/agent-root"],
+      mediaReadFile,
      accountId: "work",
+      threadId: "thread-123",
+      replyToId: "reply-123",
      deps: {
        discord: sendMessageDiscord,
      },
    });

    expect(sendMessageDiscord).toHaveBeenCalledWith(
-      "channel:123",
+      "channel:thread-123",
      "hi",
      expect.objectContaining({
        mediaUrl: "/tmp/image.png",
        mediaLocalRoots: ["/tmp/agent-root"],
+        mediaReadFile,
+        replyTo: "reply-123",
      }),
    );
    expect(result).toMatchObject({ channel: "discord", messageId: "m1" });
  });

+  it("splits text and video into separate sends for attached outbound delivery", async () => {
+    const sendMessageDiscord = vi
+      .fn()
+      .mockResolvedValueOnce({ messageId: "text-1" })
+      .mockResolvedValueOnce({ messageId: "video-1" });
+
+    const result = await discordPlugin.outbound!.sendMedia!({
+      cfg: {} as OpenClawConfig,
+      to: "channel:123",
+      text: "done - tiny cyber-lobster clip incoming",
+      mediaUrl: "/tmp/molty.mp4",
+      accountId: "work",
+      replyToId: "reply-123",
+      threadId: "thread-123",
+      deps: {
+        discord: sendMessageDiscord,
+      },
+    });
+
+    expect(sendMessageDiscord).toHaveBeenCalledTimes(2);
+    expect(sendMessageDiscord).toHaveBeenNthCalledWith(
+      1,
+      "channel:thread-123",
+      "done - tiny cyber-lobster clip incoming",
+      expect.objectContaining({
+        replyTo: "reply-123",
+      }),
+    );
+    expect(sendMessageDiscord).toHaveBeenNthCalledWith(
+      2,
+      "channel:thread-123",
+      "",
+      expect.objectContaining({
+        mediaUrl: "/tmp/molty.mp4",
+      }),
+    );
+    expect(result).toMatchObject({ channel: "discord", messageId: "video-1" });
+  });
+
+  it("threads poll sends through the thread target", async () => {
+    const sendPollDiscord = vi.fn(async () => ({
+      channelId: "channel:thread-123",
+      messageId: "poll-1",
+    }));
+    const sendModule = await import("./send.js");
+    const sendPollSpy = vi.spyOn(sendModule, "sendPollDiscord").mockImplementation(sendPollDiscord);
+    try {
+      const result = await discordPlugin.outbound!.sendPoll!({
+        cfg: {} as OpenClawConfig,
+        to: "channel:123",
+        poll: {
+          question: "Best shell?",
+          options: ["molty", "molter"],
+        },
+        accountId: "work",
+        threadId: "thread-123",
+      });
+
+      expect(sendPollDiscord).toHaveBeenCalledWith(
+        "channel:thread-123",
+        {
+          question: "Best shell?",
+          options: ["molty", "molter"],
+        },
+        expect.objectContaining({
+          accountId: "work",
+        }),
+      );
+      expect(result).toMatchObject({ channel: "discord", messageId: "poll-1" });
+    } finally {
+      sendPollSpy.mockRestore();
+    }
+  });
+
  it("uses direct Discord probe helpers for status probes", async () => {
    const runtimeProbeDiscord = vi.fn(async () => {
      throw new Error("runtime Discord probe should not be used");
--- a/extensions/discord/src/channel.ts
+++ b/extensions/discord/src/channel.ts
@ -134,6 +134,43 @@ const meta = {
 };
 const REQUIRED_DISCORD_PERMISSIONS = ["ViewChannel", "SendMessages"] as const;
 const DISCORD_ACCOUNT_STARTUP_STAGGER_MS = 10_000;
+const DISCORD_VIDEO_MEDIA_EXTENSIONS = new Set([".avi", ".m4v", ".mkv", ".mov", ".mp4", ".webm"]);
+
+function normalizeMediaPathForExtension(mediaUrl: string): string {
+  const trimmed = mediaUrl.trim();
+  if (!trimmed) {
+    return "";
+  }
+  try {
+    const parsed = new URL(trimmed);
+    return parsed.pathname.toLowerCase();
+  } catch {
+    const withoutHash = trimmed.split("#", 1)[0] ?? trimmed;
+    const withoutQuery = withoutHash.split("?", 1)[0] ?? withoutHash;
+    return withoutQuery.toLowerCase();
+  }
+}
+
+function isLikelyDiscordVideoMedia(mediaUrl: string): boolean {
+  const normalized = normalizeMediaPathForExtension(mediaUrl);
+  for (const ext of DISCORD_VIDEO_MEDIA_EXTENSIONS) {
+    if (normalized.endsWith(ext)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+function resolveDiscordAttachedOutboundTarget(params: {
+  to: string;
+  threadId?: string | number | null;
+}): string {
+  if (params.threadId == null) {
+    return params.to;
+  }
+  const threadId = String(params.threadId).trim();
+  return threadId ? `channel:${threadId}` : params.to;
+}

 function resolveRuntimeDiscordMessageActions() {
  try {
@ -823,9 +860,9 @@ export const discordPlugin: ChannelPlugin<ResolvedDiscordAccount, DiscordProbe>
      },
      attachedResults: {
        channel: "discord",
-        sendText: async ({ cfg, to, text, accountId, deps, replyToId, silent }) => {
+        sendText: async ({ cfg, to, text, accountId, deps, replyToId, threadId, silent }) => {
          const send = await resolveDiscordSend(deps);
-          return await send(to, text, {
+          return await send(resolveDiscordAttachedOutboundTarget({ to, threadId }), text, {
            verbose: false,
            cfg,
            replyTo: replyToId ?? undefined,
@ -839,26 +876,48 @@ export const discordPlugin: ChannelPlugin<ResolvedDiscordAccount, DiscordProbe>
          text,
          mediaUrl,
          mediaLocalRoots,
+          mediaReadFile,
          accountId,
          deps,
          replyToId,
+          threadId,
          silent,
        }) => {
          const send = await resolveDiscordSend(deps);
-          return await send(to, text, {
+          const target = resolveDiscordAttachedOutboundTarget({ to, threadId });
+          if (text.trim() && mediaUrl && isLikelyDiscordVideoMedia(mediaUrl)) {
+            await send(target, text, {
+              verbose: false,
+              cfg,
+              replyTo: replyToId ?? undefined,
+              accountId: accountId ?? undefined,
+              silent: silent ?? undefined,
+            });
+            return await send(target, "", {
+              verbose: false,
+              cfg,
+              mediaUrl,
+              mediaLocalRoots,
+              mediaReadFile,
+              accountId: accountId ?? undefined,
+              silent: silent ?? undefined,
+            });
+          }
+          return await send(target, text, {
            verbose: false,
            cfg,
            mediaUrl,
            mediaLocalRoots,
+            mediaReadFile,
            replyTo: replyToId ?? undefined,
            accountId: accountId ?? undefined,
            silent: silent ?? undefined,
          });
        },
-        sendPoll: async ({ cfg, to, poll, accountId, silent }) =>
+        sendPoll: async ({ cfg, to, poll, accountId, threadId, silent }) =>
          await (
            await loadDiscordSendModule()
-          ).sendPollDiscord(to, poll, {
+          ).sendPollDiscord(resolveDiscordAttachedOutboundTarget({ to, threadId }), poll, {
            cfg,
            accountId: accountId ?? undefined,
            silent: silent ?? undefined,
--- a/extensions/video-generation-providers.live.test.ts
+++ b/extensions/video-generation-providers.live.test.ts
@ -13,6 +13,7 @@ import {
  registerProviderPlugin,
  requireRegisteredProvider,
 } from "../test/helpers/plugins/provider-registration.js";
+import alibabaPlugin from "./alibaba/index.js";
 import byteplusPlugin from "./byteplus/index.js";
 import falPlugin from "./fal/index.js";
 import googlePlugin from "./google/index.js";
@ -20,6 +21,7 @@ import minimaxPlugin from "./minimax/index.js";
 import openaiPlugin from "./openai/index.js";
 import qwenPlugin from "./qwen/index.js";
 import togetherPlugin from "./together/index.js";
+import xaiPlugin from "./xai/index.js";

 const LIVE = isLiveTestEnabled();
 const providerFilter = parseCsvFilter(process.env.OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS);
@ -33,6 +35,12 @@ type LiveProviderCase = {
 };

 const CASES: LiveProviderCase[] = [
+  {
+    plugin: alibabaPlugin,
+    pluginId: "alibaba",
+    pluginName: "Alibaba Model Studio Plugin",
+    providerId: "alibaba",
+  },
  {
    plugin: byteplusPlugin,
    pluginId: "byteplus",
@ -55,6 +63,7 @@ const CASES: LiveProviderCase[] = [
    pluginName: "Together Provider",
    providerId: "together",
  },
+  { plugin: xaiPlugin, pluginId: "xai", pluginName: "xAI Plugin", providerId: "xai" },
 ]
  .filter((entry) => (providerFilter ? providerFilter.has(entry.providerId) : true))
  .toSorted((left, right) => left.providerId.localeCompare(right.providerId));
--- a/extensions/xai/index.ts
+++ b/extensions/xai/index.ts
@ -16,6 +16,7 @@ import { isModernXaiModel, resolveXaiForwardCompatModel } from "./provider-model
 import { resolveFallbackXaiAuth } from "./src/tool-auth-shared.js";
 import { resolveEffectiveXSearchConfig } from "./src/x-search-config.js";
 import { wrapXaiProviderStream } from "./stream.js";
+import { buildXaiVideoGenerationProvider } from "./video-generation-provider.js";
 import { createXaiWebSearchProvider } from "./web-search.js";

 const PROVIDER_ID = "xai";
@ -230,6 +231,7 @@ export default defineSingleProviderPluginEntry({
  },
  register(api) {
    api.registerWebSearchProvider(createXaiWebSearchProvider());
+    api.registerVideoGenerationProvider(buildXaiVideoGenerationProvider());
    api.registerTool((ctx) => createLazyCodeExecutionTool(ctx), { name: "code_execution" });
    api.registerTool((ctx) => createLazyXSearchTool(ctx), { name: "x_search" });
  },
--- a/extensions/xai/openclaw.plugin.json
+++ b/extensions/xai/openclaw.plugin.json
@ -77,6 +77,7 @@
  },
  "contracts": {
    "webSearchProviders": ["grok"],
+    "videoGenerationProviders": ["xai"],
    "tools": ["code_execution", "x_search"]
  },
  "configSchema": {
--- a/extensions/xai/plugin-registration.contract.test.ts
+++ b/extensions/xai/plugin-registration.contract.test.ts
@ -0,0 +1,10 @@
+import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
+
+describePluginRegistrationContract({
+  pluginId: "xai",
+  providerIds: ["xai"],
+  webSearchProviderIds: ["grok"],
+  videoGenerationProviderIds: ["xai"],
+  toolNames: ["code_execution", "x_search"],
+  requireGenerateVideo: true,
+});
--- a/extensions/xai/video-generation-provider.test.ts
+++ b/extensions/xai/video-generation-provider.test.ts
@ -0,0 +1,146 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { buildXaiVideoGenerationProvider } from "./video-generation-provider.js";
+
+const {
+  resolveApiKeyForProviderMock,
+  postJsonRequestMock,
+  fetchWithTimeoutMock,
+  assertOkOrThrowHttpErrorMock,
+  resolveProviderHttpRequestConfigMock,
+} = vi.hoisted(() => ({
+  resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "xai-key" })),
+  postJsonRequestMock: vi.fn(),
+  fetchWithTimeoutMock: vi.fn(),
+  assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
+  resolveProviderHttpRequestConfigMock: vi.fn((params) => ({
+    baseUrl: params.baseUrl ?? params.defaultBaseUrl,
+    allowPrivateNetwork: false,
+    headers: new Headers(params.defaultHeaders),
+    dispatcherPolicy: undefined,
+  })),
+}));
+
+vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
+  resolveApiKeyForProvider: resolveApiKeyForProviderMock,
+}));
+
+vi.mock("openclaw/plugin-sdk/provider-http", () => ({
+  assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
+  fetchWithTimeout: fetchWithTimeoutMock,
+  postJsonRequest: postJsonRequestMock,
+  resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
+}));
+
+describe("xai video generation provider", () => {
+  afterEach(() => {
+    resolveApiKeyForProviderMock.mockClear();
+    postJsonRequestMock.mockReset();
+    fetchWithTimeoutMock.mockReset();
+    assertOkOrThrowHttpErrorMock.mockClear();
+    resolveProviderHttpRequestConfigMock.mockClear();
+  });
+
+  it("creates, polls, and downloads a generated video", async () => {
+    postJsonRequestMock.mockResolvedValue({
+      response: {
+        json: async () => ({
+          request_id: "req_123",
+        }),
+      },
+      release: vi.fn(async () => {}),
+    });
+    fetchWithTimeoutMock
+      .mockResolvedValueOnce({
+        json: async () => ({
+          request_id: "req_123",
+          status: "done",
+          video: { url: "https://cdn.x.ai/video.mp4" },
+        }),
+      })
+      .mockResolvedValueOnce({
+        headers: new Headers({ "content-type": "video/mp4" }),
+        arrayBuffer: async () => Buffer.from("mp4-bytes"),
+      });
+
+    const provider = buildXaiVideoGenerationProvider();
+    const result = await provider.generateVideo({
+      provider: "xai",
+      model: "grok-imagine-video",
+      prompt: "A tiny robot crab crossing a moonlit tide pool",
+      cfg: {},
+      durationSeconds: 6,
+      aspectRatio: "16:9",
+      resolution: "720P",
+    });
+
+    expect(postJsonRequestMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        url: "https://api.x.ai/v1/videos/generations",
+        body: expect.objectContaining({
+          model: "grok-imagine-video",
+          prompt: "A tiny robot crab crossing a moonlit tide pool",
+          duration: 6,
+          aspect_ratio: "16:9",
+          resolution: "720p",
+        }),
+      }),
+    );
+    expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith(
+      1,
+      "https://api.x.ai/v1/videos/req_123",
+      expect.objectContaining({ method: "GET" }),
+      120000,
+      fetch,
+    );
+    expect(result.videos[0]?.mimeType).toBe("video/mp4");
+    expect(result.metadata).toEqual(
+      expect.objectContaining({
+        requestId: "req_123",
+        mode: "generate",
+      }),
+    );
+  });
+
+  it("routes video inputs to the extension endpoint when duration is set", async () => {
+    postJsonRequestMock.mockResolvedValue({
+      response: {
+        json: async () => ({
+          request_id: "req_extend",
+        }),
+      },
+      release: vi.fn(async () => {}),
+    });
+    fetchWithTimeoutMock
+      .mockResolvedValueOnce({
+        json: async () => ({
+          request_id: "req_extend",
+          status: "done",
+          video: { url: "https://cdn.x.ai/extended.mp4" },
+        }),
+      })
+      .mockResolvedValueOnce({
+        headers: new Headers({ "content-type": "video/mp4" }),
+        arrayBuffer: async () => Buffer.from("extended-bytes"),
+      });
+
+    const provider = buildXaiVideoGenerationProvider();
+    await provider.generateVideo({
+      provider: "xai",
+      model: "grok-imagine-video",
+      prompt: "Continue the shot into a neon alleyway",
+      cfg: {},
+      durationSeconds: 8,
+      inputVideos: [{ url: "https://example.com/input.mp4" }],
+    });
+
+    expect(postJsonRequestMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        url: "https://api.x.ai/v1/videos/extensions",
+        body: expect.objectContaining({
+          video: { url: "https://example.com/input.mp4" },
+          duration: 8,
+        }),
+      }),
+    );
+  });
+});
--- a/extensions/xai/video-generation-provider.ts
+++ b/extensions/xai/video-generation-provider.ts
@ -0,0 +1,338 @@
+import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
+import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
+import {
+  assertOkOrThrowHttpError,
+  fetchWithTimeout,
+  postJsonRequest,
+  resolveProviderHttpRequestConfig,
+} from "openclaw/plugin-sdk/provider-http";
+import type {
+  GeneratedVideoAsset,
+  VideoGenerationProvider,
+  VideoGenerationRequest,
+  VideoGenerationSourceAsset,
+} from "openclaw/plugin-sdk/video-generation";
+
+const DEFAULT_XAI_VIDEO_BASE_URL = "https://api.x.ai/v1";
+const DEFAULT_XAI_VIDEO_MODEL = "grok-imagine-video";
+const DEFAULT_TIMEOUT_MS = 120_000;
+const POLL_INTERVAL_MS = 5_000;
+const MAX_POLL_ATTEMPTS = 120;
+const XAI_VIDEO_ASPECT_RATIOS = new Set(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"]);
+
+type XaiVideoCreateResponse = {
+  request_id?: string;
+  error?: {
+    code?: string;
+    message?: string;
+  } | null;
+};
+
+type XaiVideoStatusResponse = {
+  request_id?: string;
+  status?: "queued" | "processing" | "done" | "failed" | "expired";
+  video?: {
+    url?: string;
+  } | null;
+  error?: {
+    code?: string;
+    message?: string;
+  } | null;
+};
+
+function resolveXaiVideoBaseUrl(req: VideoGenerationRequest): string {
+  return req.cfg?.models?.providers?.xai?.baseUrl?.trim() || DEFAULT_XAI_VIDEO_BASE_URL;
+}
+
+function toDataUrl(buffer: Buffer, mimeType: string): string {
+  return `data:${mimeType};base64,${buffer.toString("base64")}`;
+}
+
+function resolveImageUrl(input: VideoGenerationSourceAsset | undefined): string | undefined {
+  if (!input) {
+    return undefined;
+  }
+  if (input.url?.trim()) {
+    return input.url.trim();
+  }
+  if (!input.buffer) {
+    throw new Error("xAI image-to-video input is missing image data.");
+  }
+  return toDataUrl(input.buffer, input.mimeType?.trim() || "image/png");
+}
+
+function resolveInputVideoUrl(input: VideoGenerationSourceAsset | undefined): string | undefined {
+  if (!input) {
+    return undefined;
+  }
+  const url = input.url?.trim();
+  if (url) {
+    return url;
+  }
+  if (input.buffer) {
+    throw new Error("xAI video editing currently requires a remote mp4 URL input.");
+  }
+  throw new Error("xAI video editing input is missing video data.");
+}
+
+function resolveDurationSeconds(params: {
+  durationSeconds?: number;
+  min?: number;
+  max?: number;
+}): number | undefined {
+  if (typeof params.durationSeconds !== "number" || !Number.isFinite(params.durationSeconds)) {
+    return undefined;
+  }
+  const rounded = Math.round(params.durationSeconds);
+  return Math.max(params.min ?? 1, Math.min(params.max ?? 15, rounded));
+}
+
+function resolveAspectRatio(value: string | undefined): string | undefined {
+  const trimmed = value?.trim();
+  if (!trimmed || !XAI_VIDEO_ASPECT_RATIOS.has(trimmed)) {
+    return undefined;
+  }
+  return trimmed;
+}
+
+function resolveResolution(value: string | undefined): "480p" | "720p" | undefined {
+  if (value === "480P") {
+    return "480p";
+  }
+  if (value === "720P" || value === "1080P") {
+    return "720p";
+  }
+  return undefined;
+}
+
+function resolveXaiVideoMode(req: VideoGenerationRequest): "generate" | "edit" | "extend" {
+  const hasVideoInput = (req.inputVideos?.length ?? 0) > 0;
+  if (!hasVideoInput) {
+    return "generate";
+  }
+  return typeof resolveDurationSeconds({
+    durationSeconds: req.durationSeconds,
+    min: 2,
+    max: 10,
+  }) === "number"
+    ? "extend"
+    : "edit";
+}
+
+function buildCreateBody(req: VideoGenerationRequest): Record<string, unknown> {
+  if ((req.inputImages?.length ?? 0) > 1) {
+    throw new Error("xAI video generation supports at most one reference image.");
+  }
+  if ((req.inputVideos?.length ?? 0) > 1) {
+    throw new Error("xAI video generation supports at most one input video.");
+  }
+  if ((req.inputImages?.length ?? 0) > 0 && (req.inputVideos?.length ?? 0) > 0) {
+    throw new Error("xAI video generation does not support image and video inputs together.");
+  }
+
+  const mode = resolveXaiVideoMode(req);
+  const body: Record<string, unknown> = {
+    model: req.model?.trim() || DEFAULT_XAI_VIDEO_MODEL,
+    prompt: req.prompt,
+  };
+
+  if (mode === "generate") {
+    const imageUrl = resolveImageUrl(req.inputImages?.[0]);
+    if (imageUrl) {
+      body.image = { url: imageUrl };
+    }
+    const duration = resolveDurationSeconds({
+      durationSeconds: req.durationSeconds,
+      min: 1,
+      max: 15,
+    });
+    if (typeof duration === "number") {
+      body.duration = duration;
+    }
+    const aspectRatio = resolveAspectRatio(req.aspectRatio);
+    if (aspectRatio) {
+      body.aspect_ratio = aspectRatio;
+    }
+    const resolution = resolveResolution(req.resolution);
+    if (resolution) {
+      body.resolution = resolution;
+    }
+    return body;
+  }
+
+  body.video = { url: resolveInputVideoUrl(req.inputVideos?.[0]) };
+  if (mode === "extend") {
+    const duration = resolveDurationSeconds({
+      durationSeconds: req.durationSeconds,
+      min: 2,
+      max: 10,
+    });
+    if (typeof duration === "number") {
+      body.duration = duration;
+    }
+  }
+  return body;
+}
+
+function resolveCreateEndpoint(req: VideoGenerationRequest): string {
+  switch (resolveXaiVideoMode(req)) {
+    case "edit":
+      return "/videos/edits";
+    case "extend":
+      return "/videos/extensions";
+    case "generate":
+    default:
+      return "/videos/generations";
+  }
+}
+
+async function pollXaiVideo(params: {
+  requestId: string;
+  headers: Headers;
+  timeoutMs?: number;
+  baseUrl: string;
+  fetchFn: typeof fetch;
+}): Promise<XaiVideoStatusResponse> {
+  for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
+    const response = await fetchWithTimeout(
+      `${params.baseUrl}/videos/${params.requestId}`,
+      {
+        method: "GET",
+        headers: params.headers,
+      },
+      params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
+      params.fetchFn,
+    );
+    await assertOkOrThrowHttpError(response, "xAI video status request failed");
+    const payload = (await response.json()) as XaiVideoStatusResponse;
+    switch (payload.status) {
+      case "done":
+        return payload;
+      case "failed":
+      case "expired":
+        throw new Error(payload.error?.message?.trim() || `xAI video generation ${payload.status}`);
+      case "queued":
+      case "processing":
+      default:
+        await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
+        break;
+    }
+  }
+  throw new Error(`xAI video generation task ${params.requestId} did not finish in time`);
+}
+
+async function downloadXaiVideo(params: {
+  url: string;
+  timeoutMs?: number;
+  fetchFn: typeof fetch;
+}): Promise<GeneratedVideoAsset> {
+  const response = await fetchWithTimeout(
+    params.url,
+    { method: "GET" },
+    params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
+    params.fetchFn,
+  );
+  await assertOkOrThrowHttpError(response, "xAI generated video download failed");
+  const mimeType = response.headers.get("content-type")?.trim() || "video/mp4";
+  const arrayBuffer = await response.arrayBuffer();
+  return {
+    buffer: Buffer.from(arrayBuffer),
+    mimeType,
+    fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,
+  };
+}
+
+export function buildXaiVideoGenerationProvider(): VideoGenerationProvider {
+  return {
+    id: "xai",
+    label: "xAI",
+    defaultModel: DEFAULT_XAI_VIDEO_MODEL,
+    models: [DEFAULT_XAI_VIDEO_MODEL],
+    isConfigured: ({ agentDir }) =>
+      isProviderApiKeyConfigured({
+        provider: "xai",
+        agentDir,
+      }),
+    capabilities: {
+      maxVideos: 1,
+      maxInputImages: 1,
+      maxInputVideos: 1,
+      maxDurationSeconds: 15,
+      supportsAspectRatio: true,
+      supportsResolution: true,
+    },
+    async generateVideo(req) {
+      const auth = await resolveApiKeyForProvider({
+        provider: "xai",
+        cfg: req.cfg,
+        agentDir: req.agentDir,
+        store: req.authStore,
+      });
+      if (!auth.apiKey) {
+        throw new Error("xAI API key missing");
+      }
+
+      const fetchFn = fetch;
+      const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
+        resolveProviderHttpRequestConfig({
+          baseUrl: resolveXaiVideoBaseUrl(req),
+          defaultBaseUrl: DEFAULT_XAI_VIDEO_BASE_URL,
+          allowPrivateNetwork: false,
+          defaultHeaders: {
+            Authorization: `Bearer ${auth.apiKey}`,
+            "Content-Type": "application/json",
+          },
+          provider: "xai",
+          capability: "video",
+          transport: "http",
+        });
+      const { response, release } = await postJsonRequest({
+        url: `${baseUrl}${resolveCreateEndpoint(req)}`,
+        headers,
+        body: buildCreateBody(req),
+        timeoutMs: req.timeoutMs,
+        fetchFn,
+        allowPrivateNetwork,
+        dispatcherPolicy,
+      });
+      try {
+        await assertOkOrThrowHttpError(response, "xAI video generation failed");
+        const submitted = (await response.json()) as XaiVideoCreateResponse;
+        const requestId = submitted.request_id?.trim();
+        if (!requestId) {
+          throw new Error(
+            submitted.error?.message?.trim() || "xAI video generation response missing request_id",
+          );
+        }
+        const completed = await pollXaiVideo({
+          requestId,
+          headers,
+          timeoutMs: req.timeoutMs,
+          baseUrl,
+          fetchFn,
+        });
+        const videoUrl = completed.video?.url?.trim();
+        if (!videoUrl) {
+          throw new Error("xAI video generation completed without an output URL");
+        }
+        const video = await downloadXaiVideo({
+          url: videoUrl,
+          timeoutMs: req.timeoutMs,
+          fetchFn,
+        });
+        return {
+          videos: [video],
+          model: req.model?.trim() || DEFAULT_XAI_VIDEO_MODEL,
+          metadata: {
+            requestId,
+            status: completed.status,
+            videoUrl,
+            mode: resolveXaiVideoMode(req),
+          },
+        };
+      } finally {
+        await release();
+      }
+    },
+  };
+}
--- a/src/agents/live-auth-keys.test.ts
+++ b/src/agents/live-auth-keys.test.ts
@ -0,0 +1,32 @@
+import { afterEach, describe, expect, it } from "vitest";
+import { collectProviderApiKeys } from "./live-auth-keys.js";
+
+const ORIGINAL_MODELSTUDIO_API_KEY = process.env.MODELSTUDIO_API_KEY;
+const ORIGINAL_XAI_API_KEY = process.env.XAI_API_KEY;
+
+describe("collectProviderApiKeys", () => {
+  afterEach(() => {
+    if (ORIGINAL_MODELSTUDIO_API_KEY === undefined) {
+      delete process.env.MODELSTUDIO_API_KEY;
+    } else {
+      process.env.MODELSTUDIO_API_KEY = ORIGINAL_MODELSTUDIO_API_KEY;
+    }
+    if (ORIGINAL_XAI_API_KEY === undefined) {
+      delete process.env.XAI_API_KEY;
+    } else {
+      process.env.XAI_API_KEY = ORIGINAL_XAI_API_KEY;
+    }
+  });
+
+  it("honors manifest-declared provider auth env vars for nonstandard provider ids", () => {
+    process.env.MODELSTUDIO_API_KEY = "modelstudio-live-key";
+
+    expect(collectProviderApiKeys("alibaba")).toContain("modelstudio-live-key");
+  });
+
+  it("dedupes manifest env vars against direct provider env naming", () => {
+    process.env.XAI_API_KEY = "xai-live-key";
+
+    expect(collectProviderApiKeys("xai")).toEqual(["xai-live-key"]);
+  });
+});
--- a/src/agents/live-auth-keys.ts
+++ b/src/agents/live-auth-keys.ts
@ -1,3 +1,4 @@
+import { getProviderEnvVars } from "../secrets/provider-env-vars.js";
 import { normalizeProviderId } from "./model-selection.js";

 const KEY_SPLIT_RE = /[\s,;]+/g;
@ -98,7 +99,8 @@ function resolveProviderApiKeyConfig(provider: string): ProviderApiKeyConfig {
 }

 export function collectProviderApiKeys(provider: string): string[] {
-  const config = resolveProviderApiKeyConfig(provider);
+  const normalizedProvider = normalizeProviderId(provider);
+  const config = resolveProviderApiKeyConfig(normalizedProvider);

  const forcedSingle = config.liveSingle ? process.env[config.liveSingle]?.trim() : undefined;
  if (forcedSingle) {
@ -112,6 +114,9 @@ export function collectProviderApiKeys(provider: string): string[] {
  const fallback = config.fallbackVars
    .map((envVar) => process.env[envVar]?.trim())
    .filter(Boolean) as string[];
+  const manifestFallback = getProviderEnvVars(normalizedProvider)
+    .map((envVar) => process.env[envVar]?.trim())
+    .filter(Boolean) as string[];

  const seen = new Set<string>();

@ -135,6 +140,9 @@ export function collectProviderApiKeys(provider: string): string[] {
  for (const value of fallback) {
    add(value);
  }
+  for (const value of manifestFallback) {
+    add(value);
+  }

  return Array.from(seen);
 }
--- a/src/video-generation/live-test-helpers.ts
+++ b/src/video-generation/live-test-helpers.ts
@ -2,6 +2,7 @@ import type { AuthProfileStore } from "../agents/auth-profiles.js";
 import type { OpenClawConfig } from "../config/config.js";

 export const DEFAULT_LIVE_VIDEO_MODELS: Record<string, string> = {
+  alibaba: "alibaba/wan2.6-t2v",
  byteplus: "byteplus/seedance-1-0-lite-t2v-250428",
  fal: "fal/fal-ai/minimax/video-01-live",
  google: "google/veo-3.1-fast-generate-preview",
@ -9,6 +10,7 @@ export const DEFAULT_LIVE_VIDEO_MODELS: Record<string, string> = {
  openai: "openai/sora-2",
  qwen: "qwen/wan2.6-t2v",
  together: "together/Wan-AI/Wan2.2-T2V-A14B",
+  xai: "xai/grok-imagine-video",
 };

 export function redactLiveApiKey(value: string | undefined): string {