diff --git a/CHANGELOG.md b/CHANGELOG.md
index 13d3d08ee90..248ee48fdd0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -102,6 +102,7 @@ Docs: https://docs.openclaw.ai
 - Plugins/facades: guard bundled plugin facade loads with a cache-first sentinel so circular re-entry stops crashing `xai`, `sglang`, and `vllm` during gateway plugin startup. (#57508) Thanks @openperf.
 - Agents/MCP: dispose bundled MCP runtimes after one-shot `openclaw agent --local` runs finish, while preserving bundled MCP state across in-run retries so local JSON runs exit cleanly without restarting stateful MCP tools mid-run.
 - Gateway/OpenAI HTTP: restore default operator scopes for bearer-authenticated requests that omit `x-openclaw-scopes`, so headless `/v1/chat/completions` and session-history callers work again after the recent method-scope hardening. (#57596) Thanks @openperf.
+- Gateway/attachments: offload large inbound images without leaking `media://` markers into text-only runs, preserve mixed attachment order for model input/transcripts, and fail closed when model image capability cannot be resolved. (#55513) Thanks @Syysean.
 
 ## 2026.3.28
 
diff --git a/src/agents/cli-runner/types.ts b/src/agents/cli-runner/types.ts
index 7b4f30f207d..b138206c66f 100644
--- a/src/agents/cli-runner/types.ts
+++ b/src/agents/cli-runner/types.ts
@@ -4,6 +4,7 @@ import type { OpenClawConfig } from "../../config/config.js";
 import type { CliSessionBinding } from "../../config/sessions.js";
 import type { SessionSystemPromptReport } from "../../config/sessions/types.js";
 import type { CliBackendConfig } from "../../config/types.js";
+import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js";
 import type { ResolvedCliBackend } from "../cli-backends.js";
 
 export type RunCliAgentParams = {
@@ -28,6 +29,7 @@ export type RunCliAgentParams = {
   bootstrapPromptWarningSignaturesSeen?: string[];
   bootstrapPromptWarningSignature?: string;
   images?: ImageContent[];
+  imageOrder?: PromptImageOrderEntry[];
 };
 
 export type CliPreparedBackend = {
diff --git a/src/agents/command/attempt-execution.ts b/src/agents/command/attempt-execution.ts
index 81d47d09a10..d53ece66e1b 100644
--- a/src/agents/command/attempt-execution.ts
+++ b/src/agents/command/attempt-execution.ts
@@ -368,6 +368,7 @@ export function runAgentAttempt(params: {
         bootstrapPromptWarningSignaturesSeen,
         bootstrapPromptWarningSignature,
         images: params.isFallbackRetry ? undefined : params.opts.images,
+        imageOrder: params.isFallbackRetry ? undefined : params.opts.imageOrder,
         streamParams: params.opts.streamParams,
       });
     return runCliWithSession(cliSessionBinding?.sessionId).catch(async (err) => {
@@ -455,6 +456,7 @@ export function runAgentAttempt(params: {
     skillsSnapshot: params.skillsSnapshot,
     prompt: effectivePrompt,
     images: params.isFallbackRetry ? undefined : params.opts.images,
+    imageOrder: params.isFallbackRetry ? undefined : params.opts.imageOrder,
     clientTools: params.opts.clientTools,
     provider: params.providerOverride,
     model: params.modelOverride,
diff --git a/src/agents/command/types.ts b/src/agents/command/types.ts
index 4c677a9c9e3..8c3e1f209b1 100644
--- a/src/agents/command/types.ts
+++ b/src/agents/command/types.ts
@@ -2,6 +2,7 @@ import type { AgentInternalEvent } from "../../agents/internal-events.js";
 import type { ClientToolDefinition } from "../../agents/pi-embedded-runner/run/params.js";
 import type { SpawnedRunMetadata } from "../../agents/spawned-context.js";
 import type { ChannelOutboundTargetMode } from "../../channels/plugins/types.js";
+import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js";
 import type { InputProvenance } from "../../sessions/input-provenance.js";
 
 /** Image content block for Claude API multimodal messages. */
@@ -35,6 +36,8 @@ export type AgentCommandOpts = {
   message: string;
   /** Optional image attachments for multimodal messages. */
   images?: ImageContent[];
+  /** Original inline/offloaded attachment order for inbound images. */
+  imageOrder?: PromptImageOrderEntry[];
   /** Optional client-provided tools (OpenResponses hosted tools). */
   clientTools?: ClientToolDefinition[];
   /** Agent id override (must exist in config). */
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index 3a90978a0f4..d44925b9e9d 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -506,6 +506,7 @@ export async function runEmbeddedPiAgent(
             skillsSnapshot: params.skillsSnapshot,
             prompt,
             images: params.images,
+            imageOrder: params.imageOrder,
             clientTools: params.clientTools,
             disableTools: params.disableTools,
             provider,
diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts
index 35adb1e3ec4..4cc890ee8d3 100644
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -1477,6 +1477,7 @@ export async function runEmbeddedAttempt(
             workspaceDir: effectiveWorkspace,
             model: params.model,
             existingImages: params.images,
+            imageOrder: params.imageOrder,
             maxBytes: MAX_IMAGE_BYTES,
             maxDimensionPx: resolveImageSanitizationLimits(params.config).maxDimensionPx,
             workspaceOnly: effectiveFsWorkspaceOnly,
diff --git a/src/agents/pi-embedded-runner/run/images.test.ts b/src/agents/pi-embedded-runner/run/images.test.ts
index b0dc1008ede..adf5d9c923b 100644
--- a/src/agents/pi-embedded-runner/run/images.test.ts
+++ b/src/agents/pi-embedded-runner/run/images.test.ts
@@ -8,7 +8,9 @@ import {
   detectAndLoadPromptImages,
   detectImageReferences,
   loadImageFromRef,
+  mergePromptAttachmentImages,
   modelSupportsImages,
+  splitPromptAndAttachmentRefs,
 } from "./images.js";
 
 function expectNoPromptImages(result: { detectedRefs: unknown[]; images: unknown[] }) {
@@ -289,6 +291,47 @@ describe("detectAndLoadPromptImages", () => {
     expectNoPromptImages(result);
   });
 
+  it("preserves attachment order when offloaded refs and inline images are mixed", async () => {
+    const merged = mergePromptAttachmentImages({
+      imageOrder: ["offloaded", "inline"],
+      existingImages: [{ type: "image", data: "small-b", mimeType: "image/png" }],
+      offloadedImages: [{ type: "image", data: "large-a", mimeType: "image/jpeg" }],
+    });
+
+    expect(merged).toEqual([
+      { type: "image", data: "large-a", mimeType: "image/jpeg" },
+      { type: "image", data: "small-b", mimeType: "image/png" },
+    ]);
+  });
+
+  it("classifies trailing offloaded refs separately from prompt refs", () => {
+    const prompt =
+      "compare [media attached: media://inbound/prompt-ref.png] and ./prompt-b.png\n[media attached: media://inbound/att-b.png]";
+    const refs = detectImageReferences(prompt);
+
+    const split = splitPromptAndAttachmentRefs({
+      prompt,
+      refs,
+      imageOrder: ["inline", "offloaded"],
+    });
+
+    expect(split.promptRefs).toEqual([
+      {
+        raw: "media://inbound/prompt-ref.png",
+        type: "media-uri",
+        resolved: "media://inbound/prompt-ref.png",
+      },
+      { raw: "./prompt-b.png", type: "path", resolved: "./prompt-b.png" },
+    ]);
+    expect(split.attachmentRefs).toEqual([
+      {
+        raw: "media://inbound/att-b.png",
+        type: "media-uri",
+        resolved: "media://inbound/att-b.png",
+      },
+    ]);
+  });
+
   it("blocks prompt image refs outside workspace when sandbox workspaceOnly is enabled", async () => {
     const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-native-image-sandbox-"));
     const sandboxRoot = path.join(stateDir, "sandbox");
diff --git a/src/agents/pi-embedded-runner/run/images.ts b/src/agents/pi-embedded-runner/run/images.ts
index 686a7791e5a..531f79eafbc 100644
--- a/src/agents/pi-embedded-runner/run/images.ts
+++ b/src/agents/pi-embedded-runner/run/images.ts
@@ -1,6 +1,8 @@
 import path from "node:path";
 import type { ImageContent } from "@mariozechner/pi-ai";
 import { assertNoWindowsNetworkPath, safeFileURLToPath } from "../../../infra/local-file-access.js";
+import type { PromptImageOrderEntry } from "../../../media/prompt-image-order.js";
+import { resolveMediaBufferPath, getMediaDir } from "../../../media/store.js";
 import { loadWebMedia } from "../../../media/web-media.js";
 import { resolveUserPath } from "../../../utils.js";
 import type { ImageSanitizationLimits } from "../../image-sanitization.js";
@@ -38,6 +40,32 @@ const FILE_URL_REGEX_SOURCE = "file://[^\\s<>\"'`\\]]+\\.(?:" + IMAGE_EXTENSION_
 const PATH_REGEX_SOURCE =
   "(?:^|\\s|[\"'`(])((\\.\\.?/|[~/])[^\\s\"'`()\\[\\]]*\\.(?:" + IMAGE_EXTENSION_PATTERN + "))";
 
+/**
+ * Matches the opaque media URI written by the Gateway's claim-check offload:
+ *   media://inbound/<uuid-or-id>
+ *
+ * Uses an exclusion-based character class rather than a whitelist so that
+ * Unicode filenames (e.g. Chinese characters) preserved by sanitizeFilename
+ * in store.ts are matched correctly.
+ *
+ * Explicitly excluded from the ID segment:
+ *   ]      — closes the surrounding [media attached: ...] bracket
+ *   \s     — any whitespace (space, newline, tab) — terminates the token
+ *   /      — forward slash path separator (traversal prevention)
+ *   \      — back slash path separator (traversal prevention)
+ *   \x00   — null byte (path injection prevention)
+ *
+ * resolveMediaBufferPath applies its own guards against these characters, but
+ * excluding them here provides defence-in-depth at the parsing layer.
+ *
+ * Example valid IDs:
+ *   "1c77ce17-20b9-4546-be64-6e36a9adcb2c.png"
+ *   "photo---1c77ce17-20b9-4546-be64-6e36a9adcb2c.png"
+ *   "图片---1c77ce17-20b9-4546-be64-6e36a9adcb2c.png"
+ */
+// eslint-disable-next-line no-control-regex
+const MEDIA_URI_REGEX = /\bmedia:\/\/inbound\/([^\]\s/\\\x00]+)/;
+
 /**
  * Result of detecting an image reference in text.
  */
@@ -45,8 +73,8 @@ export interface DetectedImageRef {
   /** The raw matched string from the prompt */
   raw: string;
   /** The type of reference */
-  type: "path";
-  /** The resolved/normalized path */
+  type: "path" | "media-uri";
+  /** The resolved/normalized path, or the raw media URI for media-uri type */
   resolved: string;
 }
 
@@ -62,6 +90,105 @@ function normalizeRefForDedupe(raw: string): string {
   return process.platform === "win32" ? raw.toLowerCase() : raw;
 }
 
+export function mergePromptAttachmentImages(params: {
+  imageOrder?: PromptImageOrderEntry[];
+  existingImages?: ImageContent[];
+  offloadedImages?: Array<ImageContent | null>;
+  promptRefImages?: ImageContent[];
+}): ImageContent[] {
+  const promptImages: ImageContent[] = [];
+  const existingImages = params.existingImages ?? [];
+  const offloadedImages = params.offloadedImages ?? [];
+
+  if (params.imageOrder && params.imageOrder.length > 0) {
+    let inlineIndex = 0;
+    let offloadedIndex = 0;
+    for (const entry of params.imageOrder) {
+      if (entry === "inline") {
+        const image = existingImages[inlineIndex++];
+        if (image) {
+          promptImages.push(image);
+        }
+        continue;
+      }
+      const image = offloadedImages[offloadedIndex++];
+      if (image) {
+        promptImages.push(image);
+      }
+    }
+    while (inlineIndex < existingImages.length) {
+      promptImages.push(existingImages[inlineIndex++]);
+    }
+    while (offloadedIndex < offloadedImages.length) {
+      const image = offloadedImages[offloadedIndex++];
+      if (image) {
+        promptImages.push(image);
+      }
+    }
+  } else {
+    promptImages.push(...existingImages);
+    for (const image of offloadedImages) {
+      if (image) {
+        promptImages.push(image);
+      }
+    }
+  }
+
+  promptImages.push(...(params.promptRefImages ?? []));
+  return promptImages;
+}
+
+function extractTrailingAttachmentMediaUris(prompt: string, count: number): string[] {
+  if (count <= 0) {
+    return [];
+  }
+
+  const lines = prompt.split(/\r?\n/);
+  const uris: string[] = [];
+  for (let index = lines.length - 1; index >= 0 && uris.length < count; index--) {
+    const line = lines[index]?.trim();
+    if (!line || line.includes("\0")) {
+      break;
+    }
+    const match = line.match(/^\[media attached:\s*(media:\/\/inbound\/[^\]\s/\\]+)\]$/);
+    if (!match?.[1]) {
+      break;
+    }
+    uris.unshift(match[1]);
+  }
+  return uris;
+}
+
+export function splitPromptAndAttachmentRefs(params: {
+  prompt: string;
+  refs: DetectedImageRef[];
+  imageOrder?: PromptImageOrderEntry[];
+}): {
+  promptRefs: DetectedImageRef[];
+  attachmentRefs: DetectedImageRef[];
+} {
+  const offloadedCount = params.imageOrder?.filter((entry) => entry === "offloaded").length ?? 0;
+  if (offloadedCount === 0) {
+    return { promptRefs: params.refs, attachmentRefs: [] };
+  }
+
+  const attachmentUris = new Set(extractTrailingAttachmentMediaUris(params.prompt, offloadedCount));
+  if (attachmentUris.size === 0) {
+    return { promptRefs: params.refs, attachmentRefs: [] };
+  }
+
+  const promptRefs: DetectedImageRef[] = [];
+  const attachmentRefs: DetectedImageRef[] = [];
+  for (const ref of params.refs) {
+    if (ref.type === "media-uri" && attachmentUris.has(ref.resolved)) {
+      attachmentRefs.push(ref);
+      continue;
+    }
+    promptRefs.push(ref);
+  }
+  return { promptRefs, attachmentRefs };
+}
+
 async function sanitizeImagesWithLog(
   images: ImageContent[],
   label: string,
@@ -87,6 +214,7 @@ async function sanitizeImagesWithLog(
  * - Home paths: ~/Pictures/screenshot.png
  * - file:// URLs: file:///path/to/image.png
  * - Message attachments: [Image: source: /path/to/image.jpg]
+ * - Gateway claim-check URIs: [media attached: media://inbound/<id>]
  *
  * @param prompt The user prompt text to scan
  * @returns Array of detected image references
@@ -135,6 +263,20 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] {
       continue;
     }
 
+    // Check for a Gateway claim-check URI first (media://inbound/<id>).
+    // This must be tested before the extension-based path regex because the
+    // URI has no file extension suffix in its base form.
+    const mediaUriMatch = content.match(MEDIA_URI_REGEX);
+    if (mediaUriMatch) {
+      const uri = `media://inbound/${mediaUriMatch[1]}`;
+      const dedupeKey = normalizeRefForDedupe(uri);
+      if (!seen.has(dedupeKey)) {
+        seen.add(dedupeKey);
+        refs.push({ raw: uri, type: "media-uri", resolved: uri });
+      }
+      continue;
+    }
+
     // Extract path before the (mime/type) or | delimiter
     // Format is: path (type) | url  OR  just: path (type)
     // Path may contain spaces (e.g., "ChatGPT Image Apr 21.png")
@@ -205,6 +347,44 @@ export async function loadImageFromRef(
     sandbox?: { root: string; bridge: SandboxFsBridge };
   },
 ): Promise<ImageContent | null> {
+  // Handle Gateway claim-check URIs (media://inbound/<id>).
+  // These are written by the Gateway's offload path and point to files that
+  // the Gateway has already validated and persisted. They are intentionally
+  // exempt from workspaceOnly checks because they live in the media store
+  // managed by the Gateway, not in the agent workspace.
+  if (ref.type === "media-uri") {
+    const uriMatch = ref.resolved.match(MEDIA_URI_REGEX);
+    if (!uriMatch) {
+      log.debug(`Native image: malformed media URI, skipping: ${ref.resolved}`);
+      return null;
+    }
+    const mediaId = uriMatch[1];
+    try {
+      // resolveMediaBufferPath accepts the media ID (with optional extension
+      // and original-filename prefix) and returns the absolute path of the
+      // persisted file. It applies its own guards against path traversal,
+      // symlinks, and null bytes.
+      const physicalPath = await resolveMediaBufferPath(mediaId, "inbound");
+      const media = await loadWebMedia(physicalPath, {
+        maxBytes: options?.maxBytes,
+        localRoots: [getMediaDir()],
+      });
+      if (media.kind !== "image") {
+        log.debug(`Native image: media store entry is not an image: ${mediaId}`);
+        return null;
+      }
+      const mimeType = media.contentType ?? "image/jpeg";
+      const data = media.buffer.toString("base64");
+      log.debug(`Native image: loaded media-uri ${ref.resolved} -> ${physicalPath}`);
+      return { type: "image", data, mimeType };
+    } catch (err) {
+      log.debug(
+        `Native image: failed to load media-uri ${ref.resolved}: ${err instanceof Error ? err.message : String(err)}`,
+      );
+      return null;
+    }
+  }
+
   try {
     let targetPath = ref.resolved;
 
@@ -292,6 +472,7 @@ export async function detectAndLoadPromptImages(params: {
   workspaceDir: string;
   model: { input?: string[] };
   existingImages?: ImageContent[];
+  imageOrder?: PromptImageOrderEntry[];
   maxBytes?: number;
   maxDimensionPx?: number;
   workspaceOnly?: boolean;
@@ -326,20 +507,25 @@ export async function detectAndLoadPromptImages(params: {
   }
 
   log.debug(`Native image: detected ${allRefs.length} image refs in prompt`);
-
-  const promptImages: ImageContent[] = [...(params.existingImages ?? [])];
+  const { promptRefs, attachmentRefs } = splitPromptAndAttachmentRefs({
+    prompt: params.prompt,
+    refs: allRefs,
+    imageOrder: params.imageOrder,
+  });
+  const promptRefImages: ImageContent[] = [];
+  const offloadedImages: Array<ImageContent | null> = [];
 
   let loadedCount = 0;
   let skippedCount = 0;
 
-  for (const ref of allRefs) {
+  for (const ref of promptRefs) {
     const image = await loadImageFromRef(ref, params.workspaceDir, {
       maxBytes: params.maxBytes,
       workspaceOnly: params.workspaceOnly,
       sandbox: params.sandbox,
     });
     if (image) {
-      promptImages.push(image);
+      promptRefImages.push(image);
       loadedCount++;
       log.debug(`Native image: loaded ${ref.type} ${ref.resolved}`);
     } else {
@@ -347,6 +533,28 @@ export async function detectAndLoadPromptImages(params: {
     }
   }
 
+  for (const ref of attachmentRefs) {
+    const image = await loadImageFromRef(ref, params.workspaceDir, {
+      maxBytes: params.maxBytes,
+      workspaceOnly: params.workspaceOnly,
+      sandbox: params.sandbox,
+    });
+    offloadedImages.push(image);
+    if (image) {
+      loadedCount++;
+      log.debug(`Native image: loaded ${ref.type} ${ref.resolved}`);
+    } else {
+      skippedCount++;
+    }
+  }
+
+  const promptImages = mergePromptAttachmentImages({
+    imageOrder: params.imageOrder,
+    existingImages: params.existingImages,
+    offloadedImages,
+    promptRefImages,
+  });
+
   const imageSanitization: ImageSanitizationLimits = {
     maxDimensionPx: params.maxDimensionPx,
   };
diff --git a/src/agents/pi-embedded-runner/run/params.ts b/src/agents/pi-embedded-runner/run/params.ts
index 9ed4411cd34..88d4f0d114e 100644
--- a/src/agents/pi-embedded-runner/run/params.ts
+++ b/src/agents/pi-embedded-runner/run/params.ts
@@ -2,6 +2,7 @@ import type { ImageContent } from "@mariozechner/pi-ai";
 import type { ReasoningLevel, ThinkLevel, VerboseLevel } from "../../../auto-reply/thinking.js";
 import type { ReplyPayload } from "../../../auto-reply/types.js";
 import type { OpenClawConfig } from "../../../config/config.js";
+import type { PromptImageOrderEntry } from "../../../media/prompt-image-order.js";
 import type { enqueueCommand } from "../../../process/command-queue.js";
 import type { InputProvenance } from "../../../sessions/input-provenance.js";
 import type { ExecElevatedDefaults, ExecToolDefaults } from "../../bash-tools.js";
@@ -76,6 +77,7 @@ export type RunEmbeddedPiAgentParams = {
   skillsSnapshot?: SkillSnapshot;
   prompt: string;
   images?: ImageContent[];
+  imageOrder?: PromptImageOrderEntry[];
   /** Optional client-provided tools (OpenResponses hosted tools). */
   clientTools?: ClientToolDefinition[];
   /** Disable built-in tools for this run (LLM-only mode). */
diff --git a/src/agents/skills-install.ts b/src/agents/skills-install.ts
index f1f7d8a4480..b5db83c89d1 100644
--- a/src/agents/skills-install.ts
+++ b/src/agents/skills-install.ts
@@ -552,7 +552,6 @@ export async function installSkill(params: SkillInstallRequest): Promise<SkillIn
       // Hook errors are non-fatal — built-in scanner results still apply.
     }
   }
-
   // Warn when install is triggered from a non-bundled source.
   // Workspace/project/personal agent skills can contain attacker-controlled metadata.
   const trustedInstallSources = new Set(["openclaw-bundled", "openclaw-managed", "openclaw-extra"]);
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index dbff95be444..10525ce189e 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -316,6 +316,7 @@ export async function runAgentTurnWithFallback(params: {
                       bootstrapPromptWarningSignaturesSeen.length - 1
                     ],
                   images: params.opts?.images,
+                  imageOrder: params.opts?.imageOrder,
                 });
                 bootstrapPromptWarningSignaturesSeen = resolveBootstrapWarningSignaturesSeen(
                   result.meta?.systemPromptReport,
@@ -416,6 +417,7 @@ export async function runAgentTurnWithFallback(params: {
                 bootstrapContextMode: params.opts?.bootstrapContextMode,
                 bootstrapContextRunKind: params.opts?.isHeartbeat ? "heartbeat" : "default",
                 images: params.opts?.images,
+                imageOrder: params.opts?.imageOrder,
                 abortSignal: params.opts?.abortSignal,
                 blockReplyBreak: params.resolvedBlockStreamingBreak,
                 blockReplyChunking: params.blockReplyChunking,
diff --git a/src/auto-reply/types.ts b/src/auto-reply/types.ts
index 638dda42d8f..369ace07ee5 100644
--- a/src/auto-reply/types.ts
+++ b/src/auto-reply/types.ts
@@ -1,5 +1,6 @@
 import type { ImageContent } from "@mariozechner/pi-ai";
 import type { InteractiveReply } from "../interactive/payload.js";
+import type { PromptImageOrderEntry } from "../media/prompt-image-order.js";
 import type { TypingController } from "./reply/typing.js";
 
 export type BlockReplyContext = {
@@ -28,6 +29,8 @@ export type GetReplyOptions = {
   abortSignal?: AbortSignal;
   /** Optional inbound images (used for webchat attachments). */
   images?: ImageContent[];
+  /** Original inline/offloaded attachment order for inbound images. */
+  imageOrder?: PromptImageOrderEntry[];
   /** Notifies when an agent run actually starts (useful for webchat command handling). */
   onAgentRunStart?: (runId: string) => void;
   onReplyStart?: () => Promise<void> | void;
diff --git a/src/gateway/chat-attachments.ts b/src/gateway/chat-attachments.ts
index 53dcf6a288e..6315f6057a4 100644
--- a/src/gateway/chat-attachments.ts
+++ b/src/gateway/chat-attachments.ts
@@ -1,5 +1,7 @@
 import { estimateBase64DecodedBytes } from "../media/base64.js";
+import type { PromptImageOrderEntry } from "../media/prompt-image-order.js";
 import { sniffMimeFromBase64 } from "../media/sniff-mime-from-base64.js";
+import { deleteMediaBuffer, saveMediaBuffer } from "../media/store.js";
 
 export type ChatAttachment = {
   type?: string;
@@ -14,12 +16,52 @@ export type ChatImageContent = {
   mimeType: string;
 };
 
+/**
+ * Metadata for an attachment that was offloaded to the media store.
+ *
+ * Included in ParsedMessageWithImages.offloadedRefs so that callers can
+ * persist structured media metadata for transcripts. Without this, consumers
+ * that derive MediaPath/MediaPaths from the `images` array (e.g.
+ * persistChatSendImages and buildChatSendTranscriptMessage in chat.ts) would
+ * silently omit all large attachments that were offloaded to disk.
+ */
+export type OffloadedRef = {
+  /** Opaque media URI injected into the message, e.g. "media://inbound/<id>" */
+  mediaRef: string;
+  /** The raw media ID from SavedMedia.id, usable with resolveMediaBufferPath */
+  id: string;
+  /** Absolute filesystem path returned by saveMediaBuffer — used for transcript MediaPath */
+  path: string;
+  /** MIME type of the offloaded attachment */
+  mimeType: string;
+  /** The label / filename of the original attachment */
+  label: string;
+};
+
 export type ParsedMessageWithImages = {
   message: string;
+  /** Small attachments (≤ OFFLOAD_THRESHOLD_BYTES) passed inline to the model */
   images: ChatImageContent[];
+  /** Original accepted attachment order after inline/offloaded split. */
+  imageOrder: PromptImageOrderEntry[];
+  /**
+   * Large attachments (> OFFLOAD_THRESHOLD_BYTES) that were offloaded to the
+   * media store. Each entry corresponds to a `[media attached: media://inbound/<id>]`
+   * marker appended to `message`.
+   *
+   * Callers MUST persist this list separately for transcript media metadata.
+   * It is intentionally separate from `images` because downstream model calls
+   * do not receive these as inline image blocks.
+   *
+   * ⚠️  Call sites (chat.ts, agent.ts, server-node-events.ts) MUST also pass
+   * `supportsImages: modelSupportsImages(model)` so that text-only model runs
+   * do not inject unresolvable media:// markers into prompt text.
+   */
+  offloadedRefs: OffloadedRef[];
 };
 
 type AttachmentLog = {
+  info?: (message: string) => void;
   warn: (message: string) => void;
 };
 
@@ -29,6 +71,59 @@ type NormalizedAttachment = {
   base64: string;
 };
 
+type SavedMedia = {
+  id: string;
+  path?: string;
+};
+
+const OFFLOAD_THRESHOLD_BYTES = 2_000_000;
+
+const MIME_TO_EXT: Record<string, string> = {
+  "image/jpeg": ".jpg",
+  "image/jpg": ".jpg",
+  "image/png": ".png",
+  "image/webp": ".webp",
+  "image/gif": ".gif",
+  "image/heic": ".heic",
+  "image/heif": ".heif",
+  // bmp/tiff excluded from SUPPORTED_OFFLOAD_MIMES to avoid extension-loss
+  // bug in store.ts; entries kept here for future extension support
+  "image/bmp": ".bmp",
+  "image/tiff": ".tiff",
+};
+
+// Module-level Set for O(1) lookup — not rebuilt on every attachment iteration.
+//
+// heic/heif are included only if store.ts's extensionForMime maps them to an
+// extension. If it does not (same extension-loss risk as bmp/tiff), remove
+// them from this set.
+const SUPPORTED_OFFLOAD_MIMES = new Set([
+  "image/jpeg",
+  "image/jpg",
+  "image/png",
+  "image/webp",
+  "image/gif",
+  "image/heic",
+  "image/heif",
+]);
+
+/**
+ * Raised when the Gateway cannot persist an attachment to the media store.
+ *
+ * Distinct from ordinary input-validation errors so that Gateway handlers can
+ * map it to a server-side 5xx status rather than a client 4xx.
+ *
+ * Example causes: ENOSPC, EPERM, unexpected saveMediaBuffer return shape.
+ */
+export class MediaOffloadError extends Error {
+  readonly cause: unknown;
+  constructor(message: string, options?: ErrorOptions) {
+    super(message, options);
+    this.name = "MediaOffloadError";
+    this.cause = options?.cause;
+  }
+}
+
 function normalizeMime(mime?: string): string | undefined {
   if (!mime) {
     return undefined;
@@ -42,8 +137,75 @@ function isImageMime(mime?: string): boolean {
 }
 
 function isValidBase64(value: string): boolean {
-  // Minimal validation; avoid full decode allocations for large payloads.
-  return value.length > 0 && value.length % 4 === 0 && /^[A-Za-z0-9+/]+={0,2}$/.test(value);
+  if (value.length === 0 || value.length % 4 !== 0) {
+    return false;
+  }
+  // A full O(n) regex scan is safe: no overlapping quantifiers, fails linearly.
+  // Prevents adversarial payloads padded with megabytes of whitespace from
+  // bypassing length thresholds.
+  return /^[A-Za-z0-9+/]+={0,2}$/.test(value);
+}
+
+/**
+ * Confirms that the decoded buffer produced by Buffer.from(b64, 'base64')
+ * matches the pre-decode size estimate.
+ *
+ * Node's Buffer.from silently drops invalid base64 characters rather than
+ * throwing. A material size discrepancy means the source string contained
+ * embedded garbage that was silently stripped, which would produce a corrupted
+ * file on disk. ±3 bytes of slack accounts for base64 padding rounding.
+ *
+ * IMPORTANT: this is an input-validation check (4xx client error).
+ * It MUST be called OUTSIDE the MediaOffloadError try/catch so that
+ * corrupt-input errors are not misclassified as 5xx server errors.
+ */
+function verifyDecodedSize(buffer: Buffer, estimatedBytes: number, label: string): void {
+  if (Math.abs(buffer.byteLength - estimatedBytes) > 3) {
+    throw new Error(
+      `attachment ${label}: base64 contains invalid characters ` +
+        `(expected ~${estimatedBytes} bytes decoded, got ${buffer.byteLength})`,
+    );
+  }
+}
+
+function ensureExtension(label: string, mime: string): string {
+  if (/\.[a-zA-Z0-9]+$/.test(label)) {
+    return label;
+  }
+  const ext = MIME_TO_EXT[mime.toLowerCase()] ?? "";
+  return ext ? `${label}${ext}` : label;
+}
+
+/**
+ * Type guard for the return value of saveMediaBuffer.
+ *
+ * Also validates that the returned ID:
+ * - is a non-empty string
+ * - contains no path separators (/ or \) or null bytes
+ *
+ * Catching a bad shape here produces a cleaner error than a cryptic failure
+ * deeper in the stack, and is treated as a 5xx infrastructure error.
+ */
+function assertSavedMedia(value: unknown, label: string): SavedMedia {
+  if (
+    value !== null &&
+    typeof value === "object" &&
+    "id" in value &&
+    typeof (value as Record<string, unknown>).id === "string"
+  ) {
+    const id = (value as Record<string, unknown>).id as string;
+    if (id.length === 0) {
+      throw new Error(`attachment ${label}: saveMediaBuffer returned an empty media ID`);
+    }
+    if (id.includes("/") || id.includes("\\") || id.includes("\0")) {
+      throw new Error(
+        `attachment ${label}: saveMediaBuffer returned an unsafe media ID ` +
+          `(contains path separator or null byte)`,
+      );
+    }
+    return value as SavedMedia;
+  }
+  throw new Error(`attachment ${label}: saveMediaBuffer returned an unexpected shape`);
 }
 
 function normalizeAttachment(
@@ -64,7 +226,6 @@ function normalizeAttachment(
 
   let base64 = content.trim();
   if (opts.stripDataUrlPrefix) {
-    // Strip data URL prefix if present (e.g., "data:image/jpeg;base64,...").
     const dataUrlMatch = /^data:[^;]+;base64,(.*)$/.exec(base64);
     if (dataUrlMatch) {
       base64 = dataUrlMatch[1];
@@ -91,57 +252,211 @@ function validateAttachmentBase64OrThrow(
 
 /**
  * Parse attachments and extract images as structured content blocks.
- * Returns the message text and an array of image content blocks
- * compatible with Claude API's image format.
+ * Returns the message text, inline image blocks, and offloaded media refs.
+ *
+ * ## Offload behaviour
+ * Attachments whose decoded size exceeds OFFLOAD_THRESHOLD_BYTES are saved to
+ * disk via saveMediaBuffer and replaced with an opaque `media://inbound/<id>`
+ * URI appended to the message. The agent resolves these URIs via
+ * resolveMediaBufferPath before passing them to the model.
+ *
+ * ## Transcript metadata
+ * Callers MUST use `result.offloadedRefs` to persist structured media metadata
+ * for transcripts. These refs are intentionally excluded from `result.images`
+ * because they are not passed inline to the model.
+ *
+ * ## Text-only model runs
+ * Pass `supportsImages: false` for text-only model runs so that no media://
+ * markers are injected into prompt text.
+ *
+ * ⚠️  Call sites in chat.ts, agent.ts, and server-node-events.ts MUST be
+ * updated to pass `supportsImages: modelSupportsImages(model)`. Until they do,
+ * text-only model runs receive unresolvable media:// markers in their prompt.
+ *
+ * ## Cleanup on failure
+ * On any parse failure after files have already been offloaded, best-effort
+ * cleanup is performed before rethrowing so that malformed requests do not
+ * accumulate orphaned files on disk ahead of the periodic TTL sweep.
+ *
+ * ## Known ordering limitation
+ * In mixed large/small batches, the model receives images in a different order
+ * than the original attachment list because detectAndLoadPromptImages
+ * initialises from existingImages first, then appends prompt-detected refs.
+ * A future refactor should unify all image references into a single ordered list.
+ *
+ * @throws {MediaOffloadError} Infrastructure failure saving to media store → 5xx.
+ * @throws {Error} Input validation failure → 4xx.
  */
 export async function parseMessageWithAttachments(
   message: string,
   attachments: ChatAttachment[] | undefined,
-  opts?: { maxBytes?: number; log?: AttachmentLog },
+  opts?: { maxBytes?: number; log?: AttachmentLog; supportsImages?: boolean },
 ): Promise<ParsedMessageWithImages> {
-  const maxBytes = opts?.maxBytes ?? 5_000_000; // decoded bytes (5,000,000)
+  const maxBytes = opts?.maxBytes ?? 5_000_000;
   const log = opts?.log;
+
   if (!attachments || attachments.length === 0) {
-    return { message, images: [] };
+    return { message, images: [], imageOrder: [], offloadedRefs: [] };
+  }
+
+  // For text-only models drop all attachments cleanly. Do not save files or
+  // inject media:// markers that would never be resolved and would leak
+  // internal path references into the model's prompt.
+  if (opts?.supportsImages === false) {
+    if (attachments.length > 0) {
+      log?.warn(
+        `parseMessageWithAttachments: ${attachments.length} attachment(s) dropped — model does not support images`,
+      );
+    }
+    return { message, images: [], imageOrder: [], offloadedRefs: [] };
   }
 
   const images: ChatImageContent[] = [];
+  const imageOrder: PromptImageOrderEntry[] = [];
+  const offloadedRefs: OffloadedRef[] = [];
+  let updatedMessage = message;
 
-  for (const [idx, att] of attachments.entries()) {
-    if (!att) {
-      continue;
-    }
-    const normalized = normalizeAttachment(att, idx, {
-      stripDataUrlPrefix: true,
-      requireImageMime: false,
-    });
-    validateAttachmentBase64OrThrow(normalized, { maxBytes });
-    const { base64: b64, label, mime } = normalized;
+  // Track IDs of files saved during this request for cleanup if a later
+  // attachment fails validation and the entire parse is aborted.
+  const savedMediaIds: string[] = [];
 
-    const providedMime = normalizeMime(mime);
-    const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64));
-    if (sniffedMime && !isImageMime(sniffedMime)) {
-      log?.warn(`attachment ${label}: detected non-image (${sniffedMime}), dropping`);
-      continue;
-    }
-    if (!sniffedMime && !isImageMime(providedMime)) {
-      log?.warn(`attachment ${label}: unable to detect image mime type, dropping`);
-      continue;
-    }
-    if (sniffedMime && providedMime && sniffedMime !== providedMime) {
-      log?.warn(
-        `attachment ${label}: mime mismatch (${providedMime} -> ${sniffedMime}), using sniffed`,
-      );
-    }
+  try {
+    for (const [idx, att] of attachments.entries()) {
+      if (!att) {
+        continue;
+      }
 
-    images.push({
-      type: "image",
-      data: b64,
-      mimeType: sniffedMime ?? providedMime ?? mime,
-    });
+      const normalized = normalizeAttachment(att, idx, {
+        stripDataUrlPrefix: true,
+        requireImageMime: false,
+      });
+
+      const { base64: b64, label, mime } = normalized;
+
+      if (!isValidBase64(b64)) {
+        throw new Error(`attachment ${label}: invalid base64 content`);
+      }
+
+      const sizeBytes = estimateBase64DecodedBytes(b64);
+      if (sizeBytes <= 0) {
+        log?.warn(`attachment ${label}: estimated size is zero, dropping`);
+        continue;
+      }
+
+      if (sizeBytes > maxBytes) {
+        throw new Error(
+          `attachment ${label}: exceeds size limit (${sizeBytes} > ${maxBytes} bytes)`,
+        );
+      }
+
+      const providedMime = normalizeMime(mime);
+      const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64));
+
+      if (sniffedMime && !isImageMime(sniffedMime)) {
+        log?.warn(`attachment ${label}: detected non-image (${sniffedMime}), dropping`);
+        continue;
+      }
+      if (!sniffedMime && !isImageMime(providedMime)) {
+        log?.warn(`attachment ${label}: unable to detect image mime type, dropping`);
+        continue;
+      }
+      if (sniffedMime && providedMime && sniffedMime !== providedMime) {
+        log?.warn(
+          `attachment ${label}: mime mismatch (${providedMime} -> ${sniffedMime}), using sniffed`,
+        );
+      }
+
+      // Third fallback normalises `mime` so a raw un-normalised string (e.g.
+      // "IMAGE/JPEG") does not silently bypass the SUPPORTED_OFFLOAD_MIMES check.
+      const finalMime = sniffedMime ?? providedMime ?? normalizeMime(mime) ?? mime;
+
+      let isOffloaded = false;
+
+      if (sizeBytes > OFFLOAD_THRESHOLD_BYTES) {
+        const isSupportedForOffload = SUPPORTED_OFFLOAD_MIMES.has(finalMime);
+
+        if (!isSupportedForOffload) {
+          // Passing this inline would reintroduce the OOM risk this PR prevents.
+          throw new Error(
+            `attachment ${label}: format ${finalMime} is too large to pass inline ` +
+              `(${sizeBytes} > ${OFFLOAD_THRESHOLD_BYTES} bytes) and cannot be offloaded. ` +
+              `Please convert to JPEG, PNG, WEBP, GIF, HEIC, or HEIF.`,
+          );
+        }
+
+        // Decode and run input-validation BEFORE the MediaOffloadError try/catch.
+        // verifyDecodedSize is a 4xx client error and must not be wrapped as a
+        // 5xx MediaOffloadError.
+        const buffer = Buffer.from(b64, "base64");
+        verifyDecodedSize(buffer, sizeBytes, label);
+
+        // Only the storage operation is wrapped so callers can distinguish
+        // infrastructure failures (5xx) from input errors (4xx).
+        try {
+          const labelWithExt = ensureExtension(label, finalMime);
+
+          const rawResult = await saveMediaBuffer(
+            buffer,
+            finalMime,
+            "inbound",
+            maxBytes,
+            labelWithExt,
+          );
+
+          const savedMedia = assertSavedMedia(rawResult, label);
+
+          // Track for cleanup if a subsequent attachment fails.
+          savedMediaIds.push(savedMedia.id);
+
+          // Opaque URI — compatible with workspaceOnly sandboxes and decouples
+          // the Gateway from the agent's filesystem layout.
+          const mediaRef = `media://inbound/${savedMedia.id}`;
+
+          updatedMessage += `\n[media attached: ${mediaRef}]`;
+          log?.info?.(`[Gateway] Intercepted large image payload. Saved: ${mediaRef}`);
+
+          // Record for transcript metadata — separate from `images` because
+          // these are not passed inline to the model.
+          offloadedRefs.push({
+            mediaRef,
+            id: savedMedia.id,
+            path: savedMedia.path ?? "",
+            mimeType: finalMime,
+            label,
+          });
+          imageOrder.push("offloaded");
+
+          isOffloaded = true;
+        } catch (err) {
+          const errorMessage = err instanceof Error ? err.message : String(err);
+          throw new MediaOffloadError(
+            `[Gateway Error] Failed to save intercepted media to disk: ${errorMessage}`,
+            { cause: err },
+          );
+        }
+      }
+
+      if (isOffloaded) {
+        continue;
+      }
+
+      images.push({ type: "image", data: b64, mimeType: finalMime });
+      imageOrder.push("inline");
+    }
+  } catch (err) {
+    // Best-effort cleanup before rethrowing.
+    if (savedMediaIds.length > 0) {
+      await Promise.allSettled(savedMediaIds.map((id) => deleteMediaBuffer(id, "inbound")));
+    }
+    throw err;
   }
 
-  return { message, images };
+  return {
+    message: updatedMessage !== message ? updatedMessage.trimEnd() : message,
+    images,
+    imageOrder,
+    offloadedRefs,
+  };
 }
 
 /**
@@ -153,7 +468,8 @@ export function buildMessageWithAttachments(
   attachments: ChatAttachment[] | undefined,
   opts?: { maxBytes?: number },
 ): string {
-  const maxBytes = opts?.maxBytes ?? 2_000_000; // 2 MB
+  const maxBytes = opts?.maxBytes ?? 2_000_000;
+
   if (!attachments || attachments.length === 0) {
     return message;
   }
@@ -164,21 +480,22 @@ export function buildMessageWithAttachments(
     if (!att) {
       continue;
     }
+
     const normalized = normalizeAttachment(att, idx, {
       stripDataUrlPrefix: false,
       requireImageMime: true,
     });
     validateAttachmentBase64OrThrow(normalized, { maxBytes });
-    const { base64, label, mime } = normalized;
 
+    const { base64, label, mime } = normalized;
     const safeLabel = label.replace(/\s+/g, "_");
-    const dataUrl = `![${safeLabel}](data:${mime};base64,${base64})`;
-    blocks.push(dataUrl);
+    blocks.push(`![${safeLabel}](data:${mime};base64,${base64})`);
   }
 
   if (blocks.length === 0) {
     return message;
   }
+
   const separator = message.trim().length > 0 ? "\n\n" : "";
   return `${message}${separator}${blocks.join("\n\n")}`;
 }
diff --git a/src/gateway/server-methods/agent.ts b/src/gateway/server-methods/agent.ts
index 43d1617f159..7ce897b7018 100644
--- a/src/gateway/server-methods/agent.ts
+++ b/src/gateway/server-methods/agent.ts
@@ -23,6 +23,7 @@ import {
 } from "../../infra/outbound/agent-delivery.js";
 import { shouldDowngradeDeliveryToSessionOnly } from "../../infra/outbound/best-effort-delivery.js";
 import { resolveMessageChannelSelection } from "../../infra/outbound/channel-selection.js";
+import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js";
 import { classifySessionKeyShape, normalizeAgentId } from "../../routing/session-key.js";
 import { defaultRuntime } from "../../runtime.js";
 import { normalizeInputProvenance, type InputProvenance } from "../../sessions/input-provenance.js";
@@ -39,7 +40,7 @@ import {
   normalizeMessageChannel,
 } from "../../utils/message-channel.js";
 import { resolveAssistantIdentity } from "../assistant-identity.js";
-import { parseMessageWithAttachments } from "../chat-attachments.js";
+import { MediaOffloadError, parseMessageWithAttachments } from "../chat-attachments.js";
 import { resolveAssistantAvatarUrl } from "../control-ui-shared.js";
 import { ADMIN_SCOPE } from "../method-scopes.js";
 import { GATEWAY_CLIENT_CAPS, hasGatewayClientCap } from "../protocol/client-info.js";
@@ -58,6 +59,8 @@ import {
   loadGatewaySessionRow,
   loadSessionEntry,
   migrateAndPruneGatewaySessionStoreKey,
+  resolveGatewayModelSupportsImages,
+  resolveSessionModelRef,
 } from "../session-utils.js";
 import { formatForLog } from "../ws-log.js";
 import { waitForAgentJob } from "./agent-job.js";
@@ -346,16 +349,53 @@ export const agentHandlers: GatewayRequestHandlers = {
 
     let message = (request.message ?? "").trim();
     let images: Array<{ type: "image"; data: string; mimeType: string }> = [];
+    let imageOrder: PromptImageOrderEntry[] = [];
     if (normalizedAttachments.length > 0) {
+      const requestedSessionKeyRaw =
+        typeof request.sessionKey === "string" && request.sessionKey.trim()
+          ? request.sessionKey.trim()
+          : undefined;
+
+      let baseProvider: string | undefined;
+      let baseModel: string | undefined;
+      if (requestedSessionKeyRaw) {
+        const { cfg: sessCfg, entry: sessEntry } = loadSessionEntry(requestedSessionKeyRaw);
+        const modelRef = resolveSessionModelRef(sessCfg, sessEntry, undefined);
+        baseProvider = modelRef.provider;
+        baseModel = modelRef.model;
+      }
+      const effectiveProvider = providerOverride || baseProvider;
+      const effectiveModel = modelOverride || baseModel;
+      const supportsImages = await resolveGatewayModelSupportsImages({
+        loadGatewayModelCatalog: context.loadGatewayModelCatalog,
+        provider: effectiveProvider,
+        model: effectiveModel,
+      });
+
       try {
         const parsed = await parseMessageWithAttachments(message, normalizedAttachments, {
           maxBytes: 5_000_000,
           log: context.logGateway,
+          supportsImages,
         });
         message = parsed.message.trim();
         images = parsed.images;
+        imageOrder = parsed.imageOrder;
+        // offloadedRefs are appended as text markers to `message`; the agent
+        // runner will resolve them via detectAndLoadPromptImages.
       } catch (err) {
-        respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, String(err)));
+        // MediaOffloadError indicates a server-side storage fault (ENOSPC, EPERM,
+        // etc.). Map it to UNAVAILABLE so clients can retry without treating it as
+        // a bad request. All other errors are input-validation failures → 4xx.
+        const isServerFault = err instanceof MediaOffloadError;
+        respond(
+          false,
+          undefined,
+          errorShape(
+            isServerFault ? ErrorCodes.UNAVAILABLE : ErrorCodes.INVALID_REQUEST,
+            String(err),
+          ),
+        );
         return;
       }
     }
@@ -765,6 +805,7 @@ export const agentHandlers: GatewayRequestHandlers = {
       ingressOpts: {
         message,
         images,
+        imageOrder,
         provider: providerOverride,
         model: modelOverride,
         to: resolvedTo,
diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts
index 5b38db73903..5ebe2732838 100644
--- a/src/gateway/server-methods/chat.ts
+++ b/src/gateway/server-methods/chat.ts
@@ -12,6 +12,7 @@ import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.j
 import type { ReplyPayload } from "../../auto-reply/types.js";
 import { resolveSessionFilePath } from "../../config/sessions.js";
 import { jsonUtf8Bytes } from "../../infra/json-utf8-bytes.js";
+import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js";
 import { type SavedMedia, saveMediaBuffer } from "../../media/store.js";
 import { createChannelReplyPipeline } from "../../plugin-sdk/channel-reply-pipeline.js";
 import { normalizeInputProvenance, type InputProvenance } from "../../sessions/input-provenance.js";
@@ -35,7 +36,12 @@ import {
   isChatStopCommandText,
   resolveChatRunExpiresAtMs,
 } from "../chat-abort.js";
-import { type ChatImageContent, parseMessageWithAttachments } from "../chat-attachments.js";
+import {
+  type ChatImageContent,
+  MediaOffloadError,
+  type OffloadedRef,
+  parseMessageWithAttachments,
+} from "../chat-attachments.js";
 import { stripEnvelopeFromMessage, stripEnvelopeFromMessages } from "../chat-sanitize.js";
 import { augmentChatHistoryWithCliSessionImports } from "../cli-session-history.js";
 import { ADMIN_SCOPE } from "../method-scopes.js";
@@ -59,6 +65,7 @@ import { getMaxChatHistoryMessagesBytes } from "../server-constants.js";
 import {
   capArrayByJsonBytes,
   loadSessionEntry,
+  resolveGatewayModelSupportsImages,
   readSessionMessages,
   resolveSessionModelRef,
 } from "../session-utils.js";
@@ -326,16 +333,50 @@ function canInjectSystemProvenance(client: GatewayRequestHandlerOptions["client"
   return scopes.includes(ADMIN_SCOPE);
 }
 
+/**
+ * Persist inline images and offloaded-ref media to the transcript media store.
+ *
+ * Inline images are re-saved from their base64 payload so that a stable
+ * filesystem path can be stored in the transcript.  Offloaded refs are already
+ * on disk (saved by parseMessageWithAttachments); their SavedMedia metadata is
+ * synthesised directly from the OffloadedRef, avoiding a redundant write.
+ *
+ * Both sets are combined so that transcript media fields remain complete
+ * regardless of whether attachments were inlined or offloaded.
+ */
 async function persistChatSendImages(params: {
   images: ChatImageContent[];
+  imageOrder: PromptImageOrderEntry[];
+  offloadedRefs: OffloadedRef[];
   client: GatewayRequestHandlerOptions["client"];
   logGateway: GatewayRequestContext["logGateway"];
 }): Promise<SavedMedia[]> {
-  if (params.images.length === 0 || isAcpBridgeClient(params.client)) {
+  if (isAcpBridgeClient(params.client)) {
     return [];
   }
+
   const saved: SavedMedia[] = [];
-  for (const img of params.images) {
+  let inlineIndex = 0;
+  let offloadedIndex = 0;
+  for (const entry of params.imageOrder) {
+    if (entry === "offloaded") {
+      const ref = params.offloadedRefs[offloadedIndex++];
+      if (!ref) {
+        continue;
+      }
+      saved.push({
+        id: ref.id,
+        path: ref.path,
+        size: 0,
+        contentType: ref.mimeType,
+      });
+      continue;
+    }
+
+    const img = params.images[inlineIndex++];
+    if (!img) {
+      continue;
+    }
     try {
       saved.push(await saveMediaBuffer(Buffer.from(img.data, "base64"), img.mimeType, "inbound"));
     } catch (err) {
@@ -344,6 +385,7 @@ async function persistChatSendImages(params: {
       );
     }
   }
+
   return saved;
 }
 
@@ -1387,23 +1429,18 @@ export const chatHandlers: GatewayRequestHandlers = {
       );
       return;
     }
-    let parsedMessage = inboundMessage;
-    let parsedImages: ChatImageContent[] = [];
-    if (normalizedAttachments.length > 0) {
-      try {
-        const parsed = await parseMessageWithAttachments(inboundMessage, normalizedAttachments, {
-          maxBytes: 5_000_000,
-          log: context.logGateway,
-        });
-        parsedMessage = parsed.message;
-        parsedImages = parsed.images;
-      } catch (err) {
-        respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, String(err)));
-        return;
-      }
-    }
+
+    // Load session entry before attachment parsing so we can gate media-URI
+    // marker injection on the model's image capability. This prevents opaque
+    // media:// markers from leaking into prompts for text-only model runs.
     const rawSessionKey = p.sessionKey;
     const { cfg, entry, canonicalKey: sessionKey } = loadSessionEntry(rawSessionKey);
+
+    let parsedMessage = inboundMessage;
+    let parsedImages: ChatImageContent[] = [];
+    let parsedImageOrder: PromptImageOrderEntry[] = [];
+    let parsedOffloadedRefs: OffloadedRef[] = [];
+
     const timeoutMs = resolveAgentTimeoutMs({
       cfg,
       overrideMs: p.timeoutMs,
@@ -1461,6 +1498,43 @@ export const chatHandlers: GatewayRequestHandlers = {
       return;
     }
 
+    if (normalizedAttachments.length > 0) {
+      const sessionAgentId = resolveSessionAgentId({ sessionKey, config: cfg });
+      const modelRef = resolveSessionModelRef(cfg, entry, sessionAgentId);
+      const supportsImages = await resolveGatewayModelSupportsImages({
+        loadGatewayModelCatalog: context.loadGatewayModelCatalog,
+        provider: modelRef.provider,
+        model: modelRef.model,
+      });
+
+      try {
+        const parsed = await parseMessageWithAttachments(inboundMessage, normalizedAttachments, {
+          maxBytes: 5_000_000,
+          log: context.logGateway,
+          supportsImages,
+        });
+        parsedMessage = parsed.message;
+        parsedImages = parsed.images;
+        parsedImageOrder = parsed.imageOrder;
+        parsedOffloadedRefs = parsed.offloadedRefs;
+      } catch (err) {
+        // MediaOffloadError indicates a server-side storage fault (ENOSPC, EPERM,
+        // etc.). All other errors are client-side input validation failures.
+        // Map them to different HTTP status codes so callers can retry server
+        // faults without treating them as bad requests.
+        const isServerFault = err instanceof MediaOffloadError;
+        respond(
+          false,
+          undefined,
+          errorShape(
+            isServerFault ? ErrorCodes.UNAVAILABLE : ErrorCodes.INVALID_REQUEST,
+            String(err),
+          ),
+        );
+        return;
+      }
+    }
+
     try {
       const abortController = new AbortController();
       context.chatAbortControllers.set(clientRunId, {
@@ -1477,8 +1551,15 @@ export const chatHandlers: GatewayRequestHandlers = {
         status: "started" as const,
       };
       respond(true, ackPayload, undefined, { runId: clientRunId });
+
+      // Persist both inline images and already-offloaded refs to the media
+      // store so that transcript media fields remain complete for all attachment
+      // sizes. Offloaded refs are already on disk; persistChatSendImages converts
+      // their metadata without re-writing the files.
       const persistedImagesPromise = persistChatSendImages({
         images: parsedImages,
+        imageOrder: parsedImageOrder,
+        offloadedRefs: parsedOffloadedRefs,
         client,
         logGateway: context.logGateway,
       });
@@ -1638,6 +1719,7 @@ export const chatHandlers: GatewayRequestHandlers = {
           runId: clientRunId,
           abortSignal: abortController.signal,
           images: parsedImages.length > 0 ? parsedImages : undefined,
+          imageOrder: parsedImageOrder.length > 0 ? parsedImageOrder : undefined,
           onAgentRunStart: (runId) => {
             agentRunStarted = true;
             void emitUserTranscriptUpdate();
diff --git a/src/gateway/server-node-events.test.ts b/src/gateway/server-node-events.test.ts
index f2220508138..5536806a576 100644
--- a/src/gateway/server-node-events.test.ts
+++ b/src/gateway/server-node-events.test.ts
@@ -7,6 +7,8 @@ const buildSessionLookup = (
   sessionKey: string,
   entry: {
     sessionId?: string;
+    model?: string;
+    modelProvider?: string;
     lastChannel?: string;
     lastTo?: string;
     lastAccountId?: string;
@@ -23,6 +25,8 @@ const buildSessionLookup = (
   entry: {
     sessionId: entry.sessionId ?? `sid-${sessionKey}`,
     updatedAt: entry.updatedAt ?? Date.now(),
+    model: entry.model,
+    modelProvider: entry.modelProvider,
     lastChannel: entry.lastChannel,
     lastTo: entry.lastTo,
     lastAccountId: entry.lastAccountId,
@@ -44,6 +48,7 @@ const loadOrCreateDeviceIdentityMock = vi.hoisted(() =>
     privateKeyPem: "private",
   })),
 );
+const parseMessageWithAttachmentsMock = vi.hoisted(() => vi.fn());
 const normalizeChannelIdMock = vi.hoisted(() =>
   vi.fn((channel?: string | null) => channel ?? null),
 );
@@ -79,6 +84,13 @@ vi.mock("../infra/push-apns.js", () => ({
 vi.mock("../infra/device-identity.js", () => ({
   loadOrCreateDeviceIdentity: loadOrCreateDeviceIdentityMock,
 }));
+vi.mock("./chat-attachments.js", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("./chat-attachments.js")>();
+  return {
+    ...actual,
+    parseMessageWithAttachments: parseMessageWithAttachmentsMock,
+  };
+});
 vi.mock("./session-utils.js", () => ({
   loadSessionEntry: vi.fn((sessionKey: string) => buildSessionLookup(sessionKey)),
   migrateAndPruneGatewaySessionStoreKey: vi.fn(
@@ -89,10 +101,38 @@ vi.mock("./session-utils.js", () => ({
     }),
   ),
   pruneLegacyStoreKeys: vi.fn(),
+  resolveGatewayModelSupportsImages: vi.fn(
+    async ({
+      loadGatewayModelCatalog,
+      provider,
+      model,
+    }: {
+      loadGatewayModelCatalog: () => Promise<
+        Array<{ id: string; provider: string; input?: string[] }>
+      >;
+      provider?: string;
+      model?: string;
+    }) => {
+      if (!model) {
+        return true;
+      }
+      const catalog = await loadGatewayModelCatalog();
+      const modelEntry = catalog.find(
+        (entry) => entry.id === model && (!provider || entry.provider === provider),
+      );
+      return modelEntry ? (modelEntry.input?.includes("image") ?? false) : true;
+    },
+  ),
   resolveGatewaySessionStoreTarget: vi.fn(({ key }: { key: string }) => ({
     canonicalKey: key,
     storeKeys: [key],
   })),
+  resolveSessionModelRef: vi.fn(
+    (_cfg: OpenClawConfig, entry?: { model?: string; modelProvider?: string }) => ({
+      provider: entry?.modelProvider ?? "test-provider",
+      model: entry?.model ?? "default-model",
+    }),
+  ),
 }));
 
 import { normalizeChannelId } from "../channels/plugins/index.js";
@@ -751,10 +791,17 @@ describe("notifications changed events", () => {
 describe("agent request events", () => {
   beforeEach(() => {
     agentCommandMock.mockClear();
+    parseMessageWithAttachmentsMock.mockReset();
     updateSessionStoreMock.mockClear();
     loadSessionEntryMock.mockClear();
     normalizeChannelIdVi.mockClear();
     normalizeChannelIdVi.mockImplementation((channel?: string | null) => channel ?? null);
+    parseMessageWithAttachmentsMock.mockResolvedValue({
+      message: "parsed message",
+      images: [],
+      imageOrder: [],
+      offloadedRefs: [],
+    });
     agentCommandMock.mockResolvedValue({ status: "ok" } as never);
     updateSessionStoreMock.mockImplementation(async (_storePath, update) => {
       update({});
@@ -821,4 +868,45 @@ describe("agent request events", () => {
     });
     expect(opts.runId).toBe(opts.sessionId);
   });
+
+  it("passes supportsImages false for text-only node-session models", async () => {
+    const ctx = buildCtx();
+    ctx.loadGatewayModelCatalog = async () => [
+      {
+        id: "text-only",
+        name: "Text only",
+        provider: "test-provider",
+        input: ["text"],
+      },
+    ];
+    loadSessionEntryMock.mockReturnValueOnce({
+      ...buildSessionLookup("agent:main:main", {
+        model: "text-only",
+        modelProvider: "test-provider",
+      }),
+      canonicalKey: "agent:main:main",
+    });
+
+    await handleNodeEvent(ctx, "node-text-only", {
+      event: "agent.request",
+      payloadJSON: JSON.stringify({
+        message: "describe",
+        sessionKey: "agent:main:main",
+        attachments: [
+          {
+            type: "image",
+            mimeType: "image/png",
+            fileName: "dot.png",
+            content: "AAAA",
+          },
+        ],
+      }),
+    });
+
+    expect(parseMessageWithAttachmentsMock).toHaveBeenCalledWith(
+      "describe",
+      expect.any(Array),
+      expect.objectContaining({ supportsImages: false }),
+    );
+  });
 });
diff --git a/src/gateway/server-node-events.ts b/src/gateway/server-node-events.ts
index 4b05a82faf9..9a8294339ed 100644
--- a/src/gateway/server-node-events.ts
+++ b/src/gateway/server-node-events.ts
@@ -1,4 +1,5 @@
 import { randomUUID } from "node:crypto";
+import { resolveSessionAgentId } from "../agents/agent-scope.js";
 import { sanitizeInboundSystemTags } from "../auto-reply/reply/inbound-text.js";
 import { normalizeChannelId } from "../channels/plugins/index.js";
 import { createOutboundSendDeps } from "../cli/outbound-send-deps.js";
@@ -12,12 +13,19 @@ import { buildOutboundSessionContext } from "../infra/outbound/session-context.j
 import { resolveOutboundTarget } from "../infra/outbound/targets.js";
 import { registerApnsRegistration } from "../infra/push-apns.js";
 import { enqueueSystemEvent } from "../infra/system-events.js";
+import type { PromptImageOrderEntry } from "../media/prompt-image-order.js";
+import { deleteMediaBuffer } from "../media/store.js";
 import { normalizeMainKey, scopedHeartbeatWakeOptions } from "../routing/session-key.js";
 import { defaultRuntime } from "../runtime.js";
 import { parseMessageWithAttachments } from "./chat-attachments.js";
 import { normalizeRpcAttachmentsToChatAttachments } from "./server-methods/attachment-normalize.js";
 import type { NodeEvent, NodeEventContext } from "./server-node-events-types.js";
-import { loadSessionEntry, migrateAndPruneGatewaySessionStoreKey } from "./session-utils.js";
+import {
+  loadSessionEntry,
+  migrateAndPruneGatewaySessionStoreKey,
+  resolveGatewayModelSupportsImages,
+  resolveSessionModelRef,
+} from "./session-utils.js";
 import { formatForLog } from "./ws-log.js";
 
 const MAX_EXEC_EVENT_OUTPUT_CHARS = 180;
@@ -347,33 +355,74 @@ export const handleNodeEvent = async (ctx: NodeEventContext, nodeId: string, evt
         timeoutSeconds?: number | null;
         key?: string | null;
       };
+
       let link: AgentDeepLink | null = null;
       try {
         link = JSON.parse(evt.payloadJSON) as AgentDeepLink;
       } catch {
         return;
       }
+
+      const sessionKeyRaw = (link?.sessionKey ?? "").trim();
+      const sessionKey = sessionKeyRaw.length > 0 ? sessionKeyRaw : `node-${nodeId}`;
+      const cfg = loadConfig();
+      const { storePath, entry, canonicalKey } = loadSessionEntry(sessionKey);
+
       let message = (link?.message ?? "").trim();
       const normalizedAttachments = normalizeRpcAttachmentsToChatAttachments(
         link?.attachments ?? undefined,
       );
       let images: Array<{ type: "image"; data: string; mimeType: string }> = [];
+      let imageOrder: PromptImageOrderEntry[] = [];
+      if (!message && normalizedAttachments.length === 0) {
+        return;
+      }
+      if (message.length > 20_000) {
+        return;
+      }
       if (normalizedAttachments.length > 0) {
+        const sessionAgentId = resolveSessionAgentId({ sessionKey, config: cfg });
+        const modelRef = resolveSessionModelRef(cfg, entry, sessionAgentId);
+        const supportsImages = await resolveGatewayModelSupportsImages({
+          loadGatewayModelCatalog: ctx.loadGatewayModelCatalog,
+          provider: modelRef.provider,
+          model: modelRef.model,
+        });
         try {
           const parsed = await parseMessageWithAttachments(message, normalizedAttachments, {
             maxBytes: 5_000_000,
             log: ctx.logGateway,
+            supportsImages,
           });
           message = parsed.message.trim();
           images = parsed.images;
-        } catch {
+          imageOrder = parsed.imageOrder;
+          if (message.length > 20_000) {
+            ctx.logGateway.warn(
+              `agent.request message exceeds limit after attachment parsing (length=${message.length})`,
+            );
+            if (parsed.offloadedRefs && parsed.offloadedRefs.length > 0) {
+              for (const ref of parsed.offloadedRefs) {
+                try {
+                  await deleteMediaBuffer(ref.id);
+                } catch (cleanupErr) {
+                  ctx.logGateway.warn(
+                    `Failed to cleanup orphaned media ${ref.id}: ${cleanupErr instanceof Error ? cleanupErr.message : String(cleanupErr)}`,
+                  );
+                }
+              }
+            }
+            return;
+          }
+        } catch (err) {
+          ctx.logGateway.warn(
+            `agent.request attachment parse failed: ${err instanceof Error ? err.message : String(err)}`,
+          );
           return;
         }
       }
-      if (!message) {
-        return;
-      }
-      if (message.length > 20_000) {
+
+      if (!message && images.length === 0) {
         return;
       }
 
@@ -386,10 +435,6 @@ export const handleNodeEvent = async (ctx: NodeEventContext, nodeId: string, evt
       const receiptText =
         receiptTextRaw || "Just received your iOS share + request, working on it.";
 
-      const sessionKeyRaw = (link?.sessionKey ?? "").trim();
-      const sessionKey = sessionKeyRaw.length > 0 ? sessionKeyRaw : `node-${nodeId}`;
-      const cfg = loadConfig();
-      const { storePath, entry, canonicalKey } = loadSessionEntry(sessionKey);
       const now = Date.now();
       const sessionId = entry?.sessionId ?? randomUUID();
       await touchSessionStore({ cfg, sessionKey, storePath, canonicalKey, entry, sessionId, now });
@@ -438,6 +483,7 @@ export const handleNodeEvent = async (ctx: NodeEventContext, nodeId: string, evt
           runId: sessionId,
           message,
           images,
+          imageOrder,
           sessionId,
           sessionKey: canonicalKey,
           thinking: link?.thinking ?? undefined,
diff --git a/src/gateway/session-utils.test.ts b/src/gateway/session-utils.test.ts
index cff89904182..6242900a3e7 100644
--- a/src/gateway/session-utils.test.ts
+++ b/src/gateway/session-utils.test.ts
@@ -22,6 +22,7 @@ import {
   migrateAndPruneGatewaySessionStoreKey,
   parseGroupKey,
   pruneLegacyStoreKeys,
+  resolveGatewayModelSupportsImages,
   resolveGatewaySessionStoreTarget,
   resolveSessionModelIdentityRef,
   resolveSessionModelRef,
@@ -2356,6 +2357,30 @@ describe("listSessionsFromStore subagent metadata", () => {
     expect(timeout?.status).toBe("timeout");
     expect(timeout?.runtimeMs).toBe(0);
   });
+
+  test("fails closed when model lookup misses", async () => {
+    await expect(
+      resolveGatewayModelSupportsImages({
+        model: "gpt-5.4",
+        provider: "openai",
+        loadGatewayModelCatalog: async () => [
+          { id: "gpt-5.4", name: "GPT-5.4", provider: "other", input: ["text", "image"] },
+        ],
+      }),
+    ).resolves.toBe(false);
+  });
+
+  test("fails closed when model catalog load throws", async () => {
+    await expect(
+      resolveGatewayModelSupportsImages({
+        model: "gpt-5.4",
+        provider: "openai",
+        loadGatewayModelCatalog: async () => {
+          throw new Error("catalog unavailable");
+        },
+      }),
+    ).resolves.toBe(false);
+  });
 });
 
 describe("loadCombinedSessionStoreForGateway includes disk-only agents (#32804)", () => {
diff --git a/src/gateway/session-utils.ts b/src/gateway/session-utils.ts
index b107026c858..f401724debe 100644
--- a/src/gateway/session-utils.ts
+++ b/src/gateway/session-utils.ts
@@ -8,6 +8,7 @@ import {
 } from "../agents/agent-scope.js";
 import { lookupContextTokens, resolveContextTokensForModel } from "../agents/context.js";
 import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
+import type { ModelCatalogEntry } from "../agents/model-catalog.js";
 import {
   inferUniqueProviderFromConfiguredModels,
   parseModelRef,
@@ -1074,6 +1075,27 @@ export function resolveSessionModelRef(
   return { provider, model };
 }
 
+export async function resolveGatewayModelSupportsImages(params: {
+  loadGatewayModelCatalog: () => Promise<ModelCatalogEntry[]>;
+  provider?: string;
+  model?: string;
+}): Promise<boolean> {
+  if (!params.model) {
+    return true;
+  }
+
+  try {
+    const catalog = await params.loadGatewayModelCatalog();
+    const modelEntry = catalog.find(
+      (entry) =>
+        entry.id === params.model && (!params.provider || entry.provider === params.provider),
+    );
+    return modelEntry ? (modelEntry.input?.includes("image") ?? false) : false;
+  } catch {
+    return false;
+  }
+}
+
 export function resolveSessionModelIdentityRef(
   cfg: OpenClawConfig,
   entry?:
diff --git a/src/media/prompt-image-order.ts b/src/media/prompt-image-order.ts
new file mode 100644
index 00000000000..97b8763e8ff
--- /dev/null
+++ b/src/media/prompt-image-order.ts
@@ -0,0 +1 @@
+export type PromptImageOrderEntry = "inline" | "offloaded";
diff --git a/src/media/store.ts b/src/media/store.ts
index 2bec9b9cd62..85ff587943d 100644
--- a/src/media/store.ts
+++ b/src/media/store.ts
@@ -407,3 +407,99 @@ export async function saveMediaBuffer(
   await writeSavedMediaBuffer({ dir, id, buffer });
   return buildSavedMediaResult({ dir, id, size: buffer.byteLength, contentType: mime });
 }
+
+/**
+ * Resolves a media ID saved by saveMediaBuffer to its absolute physical path.
+ *
+ * This is the read-side counterpart to saveMediaBuffer and is used by the
+ * agent runner to hydrate opaque `media://inbound/<id>` URIs written by the
+ * Gateway's claim-check offload path.
+ *
+ * Security:
+ * - Rejects IDs containing path separators, "..", or null bytes to prevent
+ *   directory traversal and path injection outside the resolved subdir.
+ * - Verifies the resolved path is a regular file (not a symlink or directory)
+ *   before returning it, matching the write-side MEDIA_FILE_MODE policy.
+ *
+ * @param id      The media ID as returned by SavedMedia.id (may include
+ *                extension and original-filename prefix,
+ *                e.g. "photo---<uuid>.png" or "图片---<uuid>.png").
+ * @param subdir  The subdirectory the file was saved into (default "inbound").
+ * @returns       Absolute path to the file on disk.
+ * @throws        If the ID is unsafe, the file does not exist, or is not a
+ *                regular file.
+ */
+export async function resolveMediaBufferPath(
+  id: string,
+  subdir: "inbound" = "inbound",
+): Promise<string> {
+  // Guard against path traversal and null-byte injection.
+  //
+  // - Separator checks: reject any ID containing "/" or "\" (covers all
+  //   relative traversal sequences such as "../foo" or "..\\foo").
+  // - Exact ".." check: reject the bare traversal operator in case a caller
+  //   strips separators but keeps the dots.
+  // - Null-byte check: reject "\0" which can truncate paths on some platforms
+  //   and cause the OS to open a different file than intended.
+  //
+  // We allow consecutive dots in legitimate filenames (e.g. "report..draft.png"),
+  // so we only reject the exact two-character string "..".
+  //
+  // JSON.stringify is used in the error message so that control characters
+  // (including \0) are rendered visibly in logs rather than silently dropped.
+  if (!id || id.includes("/") || id.includes("\\") || id.includes("\0") || id === "..") {
+    throw new Error(`resolveMediaBufferPath: unsafe media ID: ${JSON.stringify(id)}`);
+  }
+
+  const dir = path.join(resolveMediaDir(), subdir);
+  const resolved = path.join(dir, id);
+
+  // Double-check that path.join didn't escape the intended directory.
+  // This should be unreachable after the separator check above, but be
+  // explicit about the invariant.
+  if (!resolved.startsWith(dir + path.sep) && resolved !== dir) {
+    throw new Error(`resolveMediaBufferPath: path escapes media directory: ${JSON.stringify(id)}`);
+  }
+
+  // lstat (not stat) so we see symlinks rather than following them.
+  const stat = await fs.lstat(resolved);
+
+  if (stat.isSymbolicLink()) {
+    throw new Error(
+      `resolveMediaBufferPath: refusing to follow symlink for media ID: ${JSON.stringify(id)}`,
+    );
+  }
+  if (!stat.isFile()) {
+    throw new Error(
+      `resolveMediaBufferPath: media ID does not resolve to a file: ${JSON.stringify(id)}`,
+    );
+  }
+
+  return resolved;
+}
+
+/**
+ * Deletes a file previously saved by saveMediaBuffer.
+ *
+ * This is used by parseMessageWithAttachments to clean up files that were
+ * successfully offloaded earlier in the same request when a later attachment
+ * fails validation and the entire parse is aborted, preventing orphaned files
+ * from accumulating on disk ahead of the periodic TTL sweep.
+ *
+ * Uses resolveMediaBufferPath to apply the same path-safety guards as the
+ * read path (separator checks, symlink rejection, etc.) before unlinking.
+ *
+ * Errors are intentionally not suppressed — callers that want best-effort
+ * cleanup should catch and discard exceptions themselves (e.g. via
+ * Promise.allSettled).
+ *
+ * @param id     The media ID as returned by SavedMedia.id.
+ * @param subdir The subdirectory the file was saved into (default "inbound").
+ */
+export async function deleteMediaBuffer(
+  id: string,
+  subdir: "inbound" = "inbound",
+): Promise<void> {
+  const physicalPath = await resolveMediaBufferPath(id, subdir);
+  await fs.unlink(physicalPath);
+}
diff --git a/test/test-env.ts b/test/test-env.ts
index e247f396772..f7dd51520ad 100644
--- a/test/test-env.ts
+++ b/test/test-env.ts
@@ -233,13 +233,16 @@ function sanitizeLiveConfig(raw: string): string {
         list?: Array<Record<string, unknown>>;
       };
     } = JSON5.parse(raw);
+
     if (!parsed || typeof parsed !== "object") {
       return raw;
     }
+
     if (parsed.agents?.defaults && typeof parsed.agents.defaults === "object") {
       delete parsed.agents.defaults.workspace;
       delete parsed.agents.defaults.agentDir;
     }
+
     if (Array.isArray(parsed.agents?.list)) {
       parsed.agents.list = parsed.agents.list.map((entry) => {
         if (!entry || typeof entry !== "object") {
@@ -251,6 +254,7 @@ function sanitizeLiveConfig(raw: string): string {
         return nextEntry;
       });
     }
+
     return `${JSON.stringify(parsed, null, 2)}\n`;
   } catch {
     return raw;