diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d6359b2fc1..bc6adddacd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Outbound media/local files: piggyback host-local `MEDIA:` reads on the configured fs policy instead of a separate media-root check, so generated files outside the workspace can send when `tools.fs.workspaceOnly=false` while plaintext-like host files stay blocked by the outbound media allowlist. - Gateway/auth: reject mismatched browser `Origin` headers on trusted-proxy HTTP operator requests while keeping origin-less headless proxy clients working. Thanks @AntAISecurityLab and @vincentkoc. - Plugins/startup: block workspace `.env` from overriding `OPENCLAW_BUNDLED_PLUGINS_DIR`, so bundled plugin trust roots only come from inherited runtime env or package resolution instead of repo-local dotenv files. Thanks @nexrin and @vincentkoc. - Image generation/build: write stable runtime alias files into `dist/` and route provider-auth runtime lookups through those aliases so image-generation providers keep resolving auth/runtime modules after rebuilds instead of crashing on missing hashed chunk files. diff --git a/docs/help/faq.md b/docs/help/faq.md index d4f04d87939..c78ce0b5f56 100644 --- a/docs/help/faq.md +++ b/docs/help/faq.md @@ -2784,6 +2784,8 @@ Related: [/concepts/oauth](/concepts/oauth) (OAuth flows, token storage, multi-a - The target channel supports outbound media and isn't blocked by allowlists. - The file is within the provider's size limits (images are resized to max 2048px). + - `tools.fs.workspaceOnly=true` keeps local-path sends limited to workspace, temp/media-store, and sandbox-validated files. + - `tools.fs.workspaceOnly=false` lets `MEDIA:` send host-local files the agent can already read, but only for media plus safe document types (images, audio, video, PDF, and Office docs). Plain text and secret-like files are still blocked. See [Images](/nodes/images). diff --git a/docs/start/openclaw.md b/docs/start/openclaw.md index 0c8e09c41f2..1cd7a888a80 100644 --- a/docs/start/openclaw.md +++ b/docs/start/openclaw.md @@ -192,10 +192,13 @@ MEDIA:https://example.com/screenshot.png OpenClaw extracts these and sends them as media alongside the text. -For local paths, the default allowlist is intentionally narrow: the OpenClaw temp -root, the media cache, agent workspace paths, and sandbox-generated files. If you -need broader local-file attachment roots, configure an explicit channel/plugin -allowlist instead of relying on arbitrary host paths. +Local-path behavior follows the same file-read trust model as the agent: + +- If `tools.fs.workspaceOnly` is `true`, outbound `MEDIA:` local paths stay restricted to the OpenClaw temp root, the media cache, agent workspace paths, and sandbox-generated files. +- If `tools.fs.workspaceOnly` is `false`, outbound `MEDIA:` can use host-local files the agent is already allowed to read. +- Host-local sends still only allow media and safe document types (images, audio, video, PDF, and Office documents). Plain text and secret-like files are not treated as sendable media. + +That means generated images/files outside the workspace can now send when your fs policy already allows those reads, without reopening arbitrary host-text attachment exfiltration. ## Operations checklist diff --git a/extensions/discord/src/actions/handle-action.ts b/extensions/discord/src/actions/handle-action.ts index 23b41a72e19..68acd303419 100644 --- a/extensions/discord/src/actions/handle-action.ts +++ b/extensions/discord/src/actions/handle-action.ts @@ -26,12 +26,14 @@ export async function handleDiscordMessageAction( | "requesterSenderId" | "toolContext" | "mediaLocalRoots" + | "mediaReadFile" >, ): Promise> { const { action, params, cfg } = ctx; const accountId = ctx.accountId ?? readStringParam(params, "accountId"); const actionOptions = { mediaLocalRoots: ctx.mediaLocalRoots, + mediaReadFile: ctx.mediaReadFile, } as const; const resolveChannelId = () => diff --git a/extensions/discord/src/actions/runtime.messaging.ts b/extensions/discord/src/actions/runtime.messaging.ts index 757bf90f056..ff7213db86d 100644 --- a/extensions/discord/src/actions/runtime.messaging.ts +++ b/extensions/discord/src/actions/runtime.messaging.ts @@ -96,6 +96,7 @@ export async function handleDiscordMessagingAction( isActionEnabled: ActionGate, options?: { mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; }, cfg?: OpenClawConfig, ): Promise> { @@ -388,6 +389,7 @@ export async function handleDiscordMessagingAction( mediaUrl, filename: filename ?? undefined, mediaLocalRoots: options?.mediaLocalRoots, + mediaReadFile: options?.mediaReadFile, replyTo, components, embeds, @@ -516,6 +518,7 @@ export async function handleDiscordMessagingAction( ...(accountId ? { accountId } : {}), mediaUrl, mediaLocalRoots: options?.mediaLocalRoots, + mediaReadFile: options?.mediaReadFile, replyTo, }, ); diff --git a/extensions/discord/src/outbound-adapter.ts b/extensions/discord/src/outbound-adapter.ts index f79f6403635..9e51cb5967a 100644 --- a/extensions/discord/src/outbound-adapter.ts +++ b/extensions/discord/src/outbound-adapter.ts @@ -173,6 +173,7 @@ export const discordOutbound: ChannelOutboundAdapter = { return await sendDiscordComponentMessage(target, componentSpec, { mediaUrl, mediaLocalRoots: ctx.mediaLocalRoots, + mediaReadFile: ctx.mediaReadFile, replyTo: ctx.replyToId ?? undefined, accountId: ctx.accountId ?? undefined, silent: ctx.silent ?? undefined, @@ -183,6 +184,7 @@ export const discordOutbound: ChannelOutboundAdapter = { verbose: false, mediaUrl, mediaLocalRoots: ctx.mediaLocalRoots, + mediaReadFile: ctx.mediaReadFile, replyTo: ctx.replyToId ?? undefined, accountId: ctx.accountId ?? undefined, silent: ctx.silent ?? undefined, @@ -224,6 +226,7 @@ export const discordOutbound: ChannelOutboundAdapter = { text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, replyToId, @@ -236,6 +239,7 @@ export const discordOutbound: ChannelOutboundAdapter = { verbose: false, mediaUrl, mediaLocalRoots, + mediaReadFile, replyTo: replyToId ?? undefined, accountId: accountId ?? undefined, silent: silent ?? undefined, diff --git a/extensions/discord/src/send.components.ts b/extensions/discord/src/send.components.ts index 446a52296b1..7a2e821da23 100644 --- a/extensions/discord/src/send.components.ts +++ b/extensions/discord/src/send.components.ts @@ -52,6 +52,7 @@ type DiscordComponentSendOpts = { agentId?: string; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; filename?: string; }; @@ -100,6 +101,8 @@ async function buildDiscordComponentPayload(params: { if (params.opts.mediaUrl) { const media = await loadWebMedia(params.opts.mediaUrl, { localRoots: params.opts.mediaLocalRoots, + readFile: params.opts.mediaReadFile, + hostReadCapability: Boolean(params.opts.mediaReadFile), }); const filenameOverride = params.opts.filename?.trim(); const fileName = filenameOverride || media.fileName || "upload"; diff --git a/extensions/discord/src/send.outbound.ts b/extensions/discord/src/send.outbound.ts index 63c5386a43c..27d466a5c61 100644 --- a/extensions/discord/src/send.outbound.ts +++ b/extensions/discord/src/send.outbound.ts @@ -50,6 +50,7 @@ type DiscordSendOpts = { mediaUrl?: string; filename?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; verbose?: boolean; rest?: RequestClient; replyTo?: string; @@ -217,6 +218,7 @@ export async function sendMessageDiscord( opts.mediaUrl, opts.filename, opts.mediaLocalRoots, + opts.mediaReadFile, mediaMaxBytes, undefined, request, @@ -279,6 +281,7 @@ export async function sendMessageDiscord( opts.mediaUrl, opts.filename, opts.mediaLocalRoots, + opts.mediaReadFile, mediaMaxBytes, opts.replyTo, request, diff --git a/extensions/discord/src/send.shared.ts b/extensions/discord/src/send.shared.ts index eb840ddc45f..395d9b13a72 100644 --- a/extensions/discord/src/send.shared.ts +++ b/extensions/discord/src/send.shared.ts @@ -419,6 +419,7 @@ async function sendDiscordMedia( mediaUrl: string, filename: string | undefined, mediaLocalRoots: readonly string[] | undefined, + mediaReadFile: ((filePath: string) => Promise) | undefined, maxBytes: number | undefined, replyTo: string | undefined, request: DiscordRequest, @@ -430,7 +431,7 @@ async function sendDiscordMedia( ) { const media = await loadWebMedia( mediaUrl, - buildOutboundMediaLoadOptions({ maxBytes, mediaLocalRoots }), + buildOutboundMediaLoadOptions({ maxBytes, mediaLocalRoots, mediaReadFile }), ); const requestedFileName = filename?.trim(); const resolvedFileName = diff --git a/extensions/googlechat/src/actions.ts b/extensions/googlechat/src/actions.ts index e6c893475f7..62cce73e510 100644 --- a/extensions/googlechat/src/actions.ts +++ b/extensions/googlechat/src/actions.ts @@ -54,6 +54,7 @@ async function loadGoogleChatActionMedia(params: { mediaUrl: string; maxBytes: number; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; }) { const runtime = getGoogleChatRuntime(); return /^https?:\/\//i.test(params.mediaUrl) @@ -64,6 +65,8 @@ async function loadGoogleChatActionMedia(params: { : await runtime.media.loadWebMedia(params.mediaUrl, { maxBytes: params.maxBytes, localRoots: params.mediaLocalRoots?.length ? params.mediaLocalRoots : undefined, + readFile: params.mediaReadFile, + hostReadCapability: Boolean(params.mediaReadFile), }); } @@ -85,7 +88,7 @@ export const googlechatMessageActions: ChannelMessageActionAdapter = { extractToolSend: ({ args }) => { return extractToolSend(args, "sendMessage"); }, - handleAction: async ({ action, params, cfg, accountId, mediaLocalRoots }) => { + handleAction: async ({ action, params, cfg, accountId, mediaLocalRoots, mediaReadFile }) => { const account = resolveGoogleChatAccount({ cfg: cfg, accountId, @@ -118,6 +121,7 @@ export const googlechatMessageActions: ChannelMessageActionAdapter = { mediaUrl, maxBytes, mediaLocalRoots, + mediaReadFile, }); const uploadFileName = readStringParam(params, "filename") ?? diff --git a/extensions/googlechat/src/channel.ts b/extensions/googlechat/src/channel.ts index 6e5b6220572..63bd8238203 100644 --- a/extensions/googlechat/src/channel.ts +++ b/extensions/googlechat/src/channel.ts @@ -402,6 +402,7 @@ export const googlechatPlugin = createChatChannelPlugin({ text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, replyToId, threadId, @@ -435,6 +436,8 @@ export const googlechatPlugin = createChatChannelPlugin({ : await loadWebMedia(mediaUrl, { maxBytes: effectiveMaxBytes, localRoots: mediaLocalRoots?.length ? mediaLocalRoots : undefined, + readFile: mediaReadFile, + hostReadCapability: Boolean(mediaReadFile), }); const { sendGoogleChatMessage, uploadGoogleChatAttachment } = await loadGoogleChatChannelRuntime(); diff --git a/extensions/imessage/src/send.ts b/extensions/imessage/src/send.ts index eb3820b0764..c614c9f6a50 100644 --- a/extensions/imessage/src/send.ts +++ b/extensions/imessage/src/send.ts @@ -17,6 +17,7 @@ export type IMessageSendOpts = { replyToId?: string; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; maxBytes?: number; timeoutMs?: number; chatId?: number; @@ -26,7 +27,10 @@ export type IMessageSendOpts = { resolveAttachmentImpl?: ( mediaUrl: string, maxBytes: number, - options?: { localRoots?: readonly string[] }, + options?: { + localRoots?: readonly string[]; + readFile?: (filePath: string) => Promise; + }, ) => Promise<{ path: string; contentType?: string }>; createClient?: (params: { cliPath: string; dbPath?: string }) => Promise; }; @@ -123,6 +127,7 @@ export async function sendMessageIMessage( const resolveAttachmentFn = opts.resolveAttachmentImpl ?? resolveOutboundAttachmentFromUrl; const resolved = await resolveAttachmentFn(opts.mediaUrl.trim(), maxBytes, { localRoots: opts.mediaLocalRoots, + readFile: opts.mediaReadFile, }); filePath = resolved.path; message = resolveDeliveredIMessageText(message, resolved.contentType ?? undefined); diff --git a/extensions/matrix/src/matrix/send.ts b/extensions/matrix/src/matrix/send.ts index 20be2cf7cba..e742f154558 100644 --- a/extensions/matrix/src/matrix/send.ts +++ b/extensions/matrix/src/matrix/send.ts @@ -166,6 +166,8 @@ export async function sendMessageMatrix( const media = await getCore().media.loadWebMedia(opts.mediaUrl, { maxBytes, localRoots: opts.mediaLocalRoots, + readFile: opts.mediaReadFile, + hostReadCapability: Boolean(opts.mediaReadFile), }); const uploaded = await uploadMediaMaybeEncrypted(client, roomId, media.buffer, { contentType: media.contentType, diff --git a/extensions/matrix/src/matrix/send/types.ts b/extensions/matrix/src/matrix/send/types.ts index f3d40d92543..46c79da6282 100644 --- a/extensions/matrix/src/matrix/send/types.ts +++ b/extensions/matrix/src/matrix/send/types.ts @@ -89,6 +89,7 @@ export type MatrixSendOpts = { cfg?: CoreConfig; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; accountId?: string; replyToId?: string; threadId?: string | number | null; diff --git a/extensions/matrix/src/outbound.ts b/extensions/matrix/src/outbound.ts index bededd795be..647ddd2ca1c 100644 --- a/extensions/matrix/src/outbound.ts +++ b/extensions/matrix/src/outbound.ts @@ -35,6 +35,7 @@ export const matrixOutbound: ChannelOutboundAdapter = { text, mediaUrl, mediaLocalRoots, + mediaReadFile, deps, replyToId, threadId, @@ -49,6 +50,7 @@ export const matrixOutbound: ChannelOutboundAdapter = { cfg, mediaUrl, mediaLocalRoots, + mediaReadFile, replyToId: replyToId ?? undefined, threadId: resolvedThreadId, accountId: accountId ?? undefined, diff --git a/extensions/mattermost/src/mattermost/send.ts b/extensions/mattermost/src/mattermost/send.ts index 48d5d678f1d..7389a464771 100644 --- a/extensions/mattermost/src/mattermost/send.ts +++ b/extensions/mattermost/src/mattermost/send.ts @@ -31,6 +31,7 @@ export type MattermostSendOpts = { accountId?: string; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; replyToId?: string; props?: Record; buttons?: Array; @@ -417,6 +418,7 @@ export async function sendMessageMattermost( try { const media = await loadOutboundMediaFromUrl(mediaUrl, { mediaLocalRoots: opts.mediaLocalRoots, + mediaReadFile: opts.mediaReadFile, }); const fileInfo = await uploadMattermostFile(client, { channelId, diff --git a/extensions/msteams/src/channel.ts b/extensions/msteams/src/channel.ts index c4c4794d424..9749e695d73 100644 --- a/extensions/msteams/src/channel.ts +++ b/extensions/msteams/src/channel.ts @@ -625,6 +625,7 @@ export const msteamsPlugin: ChannelPlugin sendMessageMSTeams({ cfg, to, text })); return await send(to, text); }, - sendMedia: async ({ cfg, to, text, mediaUrl, mediaLocalRoots, deps }) => { + sendMedia: async ({ cfg, to, text, mediaUrl, mediaLocalRoots, mediaReadFile, deps }) => { type SendFn = ( to: string, text: string, - opts?: { mediaUrl?: string; mediaLocalRoots?: readonly string[] }, + opts?: { + mediaUrl?: string; + mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; + }, ) => Promise<{ messageId: string; conversationId: string }>; const send = resolveOutboundSendDep(deps, "msteams") ?? @@ -38,8 +42,9 @@ export const msteamsOutbound: ChannelOutboundAdapter = { text, mediaUrl: opts?.mediaUrl, mediaLocalRoots: opts?.mediaLocalRoots, + mediaReadFile: opts?.mediaReadFile, })); - return await send(to, text, { mediaUrl, mediaLocalRoots }); + return await send(to, text, { mediaUrl, mediaLocalRoots, mediaReadFile }); }, sendPoll: async ({ cfg, to, poll }) => { const maxSelections = poll.maxSelections ?? 1; diff --git a/extensions/msteams/src/send.ts b/extensions/msteams/src/send.ts index 2fae95a4fa8..7b0ae07f408 100644 --- a/extensions/msteams/src/send.ts +++ b/extensions/msteams/src/send.ts @@ -33,6 +33,7 @@ export type SendMSTeamsMessageParams = { /** Optional filename override for uploaded media/files */ filename?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; }; export type SendMSTeamsMessageResult = { @@ -98,7 +99,7 @@ export type SendMSTeamsCardResult = { export async function sendMessageMSTeams( params: SendMSTeamsMessageParams, ): Promise { - const { cfg, to, text, mediaUrl, filename, mediaLocalRoots } = params; + const { cfg, to, text, mediaUrl, filename, mediaLocalRoots, mediaReadFile } = params; const tableMode = resolveMarkdownTableMode({ cfg, channel: "msteams", @@ -129,6 +130,7 @@ export async function sendMessageMSTeams( const media = await loadOutboundMediaFromUrl(mediaUrl, { maxBytes: mediaMaxBytes, mediaLocalRoots, + mediaReadFile, }); const isLargeFile = media.buffer.length >= FILE_CONSENT_THRESHOLD_BYTES; const isImage = media.contentType?.startsWith("image/") ?? false; diff --git a/extensions/signal/src/channel.ts b/extensions/signal/src/channel.ts index 5b4c18d6e0e..09d3ac10a99 100644 --- a/extensions/signal/src/channel.ts +++ b/extensions/signal/src/channel.ts @@ -62,6 +62,7 @@ async function sendSignalOutbound(params: { text: string; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; accountId?: string; deps?: { [channelId: string]: unknown }; }) { @@ -70,6 +71,7 @@ async function sendSignalOutbound(params: { cfg: params.cfg, ...(params.mediaUrl ? { mediaUrl: params.mediaUrl } : {}), ...(params.mediaLocalRoots?.length ? { mediaLocalRoots: params.mediaLocalRoots } : {}), + ...(params.mediaReadFile ? { mediaReadFile: params.mediaReadFile } : {}), maxBytes, accountId: params.accountId ?? undefined, }); @@ -188,6 +190,7 @@ async function sendFormattedSignalMedia(ctx: { text: string; mediaUrl: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; accountId?: string | null; deps?: { [channelId: string]: unknown }; abortSignal?: AbortSignal; @@ -213,6 +216,7 @@ async function sendFormattedSignalMedia(ctx: { cfg: ctx.cfg, mediaUrl: ctx.mediaUrl, mediaLocalRoots: ctx.mediaLocalRoots, + ...(ctx.mediaReadFile ? { mediaReadFile: ctx.mediaReadFile } : {}), maxBytes, accountId: ctx.accountId ?? undefined, textMode: "plain", @@ -337,6 +341,7 @@ export const signalPlugin: ChannelPlugin = text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, abortSignal, @@ -347,6 +352,7 @@ export const signalPlugin: ChannelPlugin = text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, abortSignal, @@ -362,13 +368,23 @@ export const signalPlugin: ChannelPlugin = accountId: accountId ?? undefined, deps, }), - sendMedia: async ({ cfg, to, text, mediaUrl, mediaLocalRoots, accountId, deps }) => + sendMedia: async ({ + cfg, + to, + text, + mediaUrl, + mediaLocalRoots, + mediaReadFile, + accountId, + deps, + }) => await sendSignalOutbound({ cfg, to, text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId: accountId ?? undefined, deps, }), diff --git a/extensions/signal/src/outbound-adapter.ts b/extensions/signal/src/outbound-adapter.ts index 0cd2fba6107..5e4d24c38b7 100644 --- a/extensions/signal/src/outbound-adapter.ts +++ b/extensions/signal/src/outbound-adapter.ts @@ -71,6 +71,7 @@ export const signalOutbound: ChannelOutboundAdapter = { text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, abortSignal, @@ -96,6 +97,7 @@ export const signalOutbound: ChannelOutboundAdapter = { textMode: "plain", textStyles: formatted.styles, mediaLocalRoots, + mediaReadFile, }); return attachChannelToResult("signal", result); }, @@ -113,7 +115,16 @@ export const signalOutbound: ChannelOutboundAdapter = { accountId: accountId ?? undefined, }); }, - sendMedia: async ({ cfg, to, text, mediaUrl, mediaLocalRoots, accountId, deps }) => { + sendMedia: async ({ + cfg, + to, + text, + mediaUrl, + mediaLocalRoots, + mediaReadFile, + accountId, + deps, + }) => { const send = resolveSignalSender(deps); const maxBytes = resolveSignalMaxBytes({ cfg, @@ -125,6 +136,7 @@ export const signalOutbound: ChannelOutboundAdapter = { maxBytes, accountId: accountId ?? undefined, mediaLocalRoots, + mediaReadFile, }); }, }), diff --git a/extensions/signal/src/send.ts b/extensions/signal/src/send.ts index c102624836e..0fed9cee8c7 100644 --- a/extensions/signal/src/send.ts +++ b/extensions/signal/src/send.ts @@ -14,6 +14,7 @@ export type SignalSendOpts = { accountId?: string; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; maxBytes?: number; timeoutMs?: number; textMode?: "markdown" | "plain"; @@ -129,6 +130,7 @@ export async function sendMessageSignal( if (opts.mediaUrl?.trim()) { const resolved = await resolveOutboundAttachmentFromUrl(opts.mediaUrl.trim(), maxBytes, { localRoots: opts.mediaLocalRoots, + readFile: opts.mediaReadFile, }); attachments = [resolved.path]; const kind = kindFromMime(resolved.contentType ?? undefined); diff --git a/extensions/slack/src/action-runtime.ts b/extensions/slack/src/action-runtime.ts index 8d1678149aa..6032bc6ece0 100644 --- a/extensions/slack/src/action-runtime.ts +++ b/extensions/slack/src/action-runtime.ts @@ -72,6 +72,7 @@ export type SlackActionContext = { hasRepliedRef?: { value: boolean }; /** Allowed local media directories for file uploads. */ mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; }; /** @@ -232,6 +233,7 @@ export async function handleSlackAction( ...writeOpts, mediaUrl: mediaUrl ?? undefined, mediaLocalRoots: context?.mediaLocalRoots, + mediaReadFile: context?.mediaReadFile, threadTs: threadTs ?? undefined, blocks, }); @@ -276,6 +278,7 @@ export async function handleSlackAction( ...writeOpts, mediaUrl: filePath, mediaLocalRoots: context?.mediaLocalRoots, + mediaReadFile: context?.mediaReadFile, threadTs: threadTs ?? undefined, ...(filename ? { uploadFileName: filename } : {}), ...(title ? { uploadTitle: title } : {}), diff --git a/extensions/slack/src/actions.ts b/extensions/slack/src/actions.ts index d527dc58976..4620319f696 100644 --- a/extensions/slack/src/actions.ts +++ b/extensions/slack/src/actions.ts @@ -160,6 +160,7 @@ export async function sendSlackMessage( opts: SlackActionClientOpts & { mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; threadTs?: string; uploadFileName?: string; uploadTitle?: string; @@ -171,6 +172,7 @@ export async function sendSlackMessage( token: opts.token, mediaUrl: opts.mediaUrl, mediaLocalRoots: opts.mediaLocalRoots, + mediaReadFile: opts.mediaReadFile, client: opts.client, threadTs: opts.threadTs, ...(opts.uploadFileName ? { uploadFileName: opts.uploadFileName } : {}), diff --git a/extensions/slack/src/channel-actions.ts b/extensions/slack/src/channel-actions.ts index 4b126661621..6a68d9f7036 100644 --- a/extensions/slack/src/channel-actions.ts +++ b/extensions/slack/src/channel-actions.ts @@ -63,6 +63,7 @@ export function createSlackActions( : handleSlackAction(action, cfg, { ...(toolContext as SlackActionContext | undefined), mediaLocalRoots: ctx.mediaLocalRoots, + mediaReadFile: ctx.mediaReadFile, })), }); }, diff --git a/extensions/slack/src/outbound-adapter.ts b/extensions/slack/src/outbound-adapter.ts index 1f6b99d96f6..64f09b545a4 100644 --- a/extensions/slack/src/outbound-adapter.ts +++ b/extensions/slack/src/outbound-adapter.ts @@ -83,6 +83,7 @@ async function sendSlackOutboundMessage(params: { text: string; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; blocks?: NonNullable[2]>["blocks"]; accountId?: string | null; deps?: { [channelId: string]: unknown } | null; @@ -115,7 +116,11 @@ async function sendSlackOutboundMessage(params: { threadTs, accountId: params.accountId ?? undefined, ...(params.mediaUrl - ? { mediaUrl: params.mediaUrl, mediaLocalRoots: params.mediaLocalRoots } + ? { + mediaUrl: params.mediaUrl, + mediaLocalRoots: params.mediaLocalRoots, + mediaReadFile: params.mediaReadFile, + } : {}), ...(params.blocks ? { blocks: params.blocks } : {}), ...(slackIdentity ? { identity: slackIdentity } : {}), @@ -184,6 +189,7 @@ export const slackOutbound: ChannelOutboundAdapter = { text, mediaUrl, mediaLocalRoots: ctx.mediaLocalRoots, + mediaReadFile: ctx.mediaReadFile, accountId: ctx.accountId, deps: ctx.deps, replyToId: ctx.replyToId, @@ -196,6 +202,7 @@ export const slackOutbound: ChannelOutboundAdapter = { to: ctx.to, text: payload.text ?? "", mediaLocalRoots: ctx.mediaLocalRoots, + mediaReadFile: ctx.mediaReadFile, blocks, accountId: ctx.accountId, deps: ctx.deps, @@ -225,6 +232,7 @@ export const slackOutbound: ChannelOutboundAdapter = { text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, replyToId, @@ -237,6 +245,7 @@ export const slackOutbound: ChannelOutboundAdapter = { text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, replyToId, diff --git a/extensions/slack/src/send.ts b/extensions/slack/src/send.ts index b6a9c4c11a0..48674cde158 100644 --- a/extensions/slack/src/send.ts +++ b/extensions/slack/src/send.ts @@ -52,6 +52,7 @@ type SlackSendOpts = { uploadFileName?: string; uploadTitle?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; client?: WebClient; threadTs?: string; identity?: SlackSendIdentity; @@ -233,6 +234,7 @@ async function uploadSlackFile(params: { uploadFileName?: string; uploadTitle?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; caption?: string; threadTs?: string; maxBytes?: number; @@ -240,6 +242,8 @@ async function uploadSlackFile(params: { const { buffer, contentType, fileName } = await loadWebMedia(params.mediaUrl, { maxBytes: params.maxBytes, localRoots: params.mediaLocalRoots, + readFile: params.mediaReadFile, + hostReadCapability: Boolean(params.mediaReadFile), }); const uploadFileName = params.uploadFileName ?? fileName ?? "upload"; const uploadTitle = params.uploadTitle ?? uploadFileName; @@ -372,6 +376,7 @@ export async function sendMessageSlack( uploadFileName: opts.uploadFileName, uploadTitle: opts.uploadTitle, mediaLocalRoots: opts.mediaLocalRoots, + mediaReadFile: opts.mediaReadFile, caption: firstChunk, threadTs: opts.threadTs, maxBytes: mediaMaxBytes, diff --git a/extensions/telegram/src/action-runtime.ts b/extensions/telegram/src/action-runtime.ts index d6141c3bf37..e3065c67293 100644 --- a/extensions/telegram/src/action-runtime.ts +++ b/extensions/telegram/src/action-runtime.ts @@ -214,6 +214,7 @@ export async function handleTelegramAction( cfg: OpenClawConfig, options?: { mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; }, ): Promise> { const { action, accountId } = { @@ -365,6 +366,7 @@ export async function handleTelegramAction( accountId: accountId ?? undefined, mediaUrl: mediaUrl || undefined, mediaLocalRoots: options?.mediaLocalRoots, + mediaReadFile: options?.mediaReadFile, buttons, replyToMessageId: replyToMessageId ?? undefined, messageThreadId: messageThreadId ?? undefined, diff --git a/extensions/telegram/src/outbound-adapter.ts b/extensions/telegram/src/outbound-adapter.ts index 5139cdc20b8..f1cb5ef8772 100644 --- a/extensions/telegram/src/outbound-adapter.ts +++ b/extensions/telegram/src/outbound-adapter.ts @@ -142,6 +142,7 @@ export const telegramOutbound: ChannelOutboundAdapter = { text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, replyToId, @@ -161,6 +162,7 @@ export const telegramOutbound: ChannelOutboundAdapter = { ...baseOpts, mediaUrl, mediaLocalRoots, + mediaReadFile, forceDocument: forceDocument ?? false, }); }, @@ -170,6 +172,7 @@ export const telegramOutbound: ChannelOutboundAdapter = { to, payload, mediaLocalRoots, + mediaReadFile, accountId, deps, replyToId, @@ -192,6 +195,7 @@ export const telegramOutbound: ChannelOutboundAdapter = { baseOpts: { ...baseOpts, mediaLocalRoots, + mediaReadFile, forceDocument: forceDocument ?? false, }, }); diff --git a/extensions/telegram/src/send.ts b/extensions/telegram/src/send.ts index 08d9f58c49d..d6ae16efcbc 100644 --- a/extensions/telegram/src/send.ts +++ b/extensions/telegram/src/send.ts @@ -75,6 +75,7 @@ type TelegramSendOpts = { verbose?: boolean; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; gatewayClientScopes?: readonly string[]; maxBytes?: number; api?: TelegramApiOverride; @@ -838,6 +839,7 @@ export async function sendMessageTelegram( buildOutboundMediaLoadOptions({ maxBytes: mediaMaxBytes, mediaLocalRoots: opts.mediaLocalRoots, + mediaReadFile: opts.mediaReadFile, optimizeImages: opts.forceDocument ? false : undefined, }), ); diff --git a/extensions/whatsapp/src/outbound-adapter.ts b/extensions/whatsapp/src/outbound-adapter.ts index f9a845a0ebd..a82dc95b2a2 100644 --- a/extensions/whatsapp/src/outbound-adapter.ts +++ b/extensions/whatsapp/src/outbound-adapter.ts @@ -66,6 +66,7 @@ export const whatsappOutbound: ChannelOutboundAdapter = { text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, gifPlayback, @@ -79,6 +80,7 @@ export const whatsappOutbound: ChannelOutboundAdapter = { cfg, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId: accountId ?? undefined, gifPlayback, }); diff --git a/extensions/whatsapp/src/send.ts b/extensions/whatsapp/src/send.ts index 6617c358ae7..729121cbe73 100644 --- a/extensions/whatsapp/src/send.ts +++ b/extensions/whatsapp/src/send.ts @@ -22,6 +22,7 @@ export async function sendMessageWhatsApp( cfg?: OpenClawConfig; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; gifPlayback?: boolean; accountId?: string; }, @@ -63,6 +64,8 @@ export async function sendMessageWhatsApp( const media = await loadWebMedia(options.mediaUrl, { maxBytes: resolveWhatsAppMediaMaxBytes(account), localRoots: options.mediaLocalRoots, + readFile: options.mediaReadFile, + hostReadCapability: Boolean(options.mediaReadFile), }); const caption = text || undefined; mediaBuffer = media.buffer; diff --git a/extensions/zalouser/src/channel.ts b/extensions/zalouser/src/channel.ts index efe5367dc80..5ea7758c17e 100644 --- a/extensions/zalouser/src/channel.ts +++ b/extensions/zalouser/src/channel.ts @@ -77,7 +77,7 @@ const zalouserRawSendResultAdapter = createRawChannelSendResultAdapter({ textChunkLimit: resolveZalouserOutboundTextChunkLimit(cfg, account.accountId), }); }, - sendMedia: async ({ to, text, mediaUrl, accountId, cfg, mediaLocalRoots }) => { + sendMedia: async ({ to, text, mediaUrl, accountId, cfg, mediaLocalRoots, mediaReadFile }) => { const account = resolveZalouserAccountSync({ cfg: cfg, accountId }); const target = parseZalouserOutboundTarget(to); return await sendMessageZalouser(target.threadId, text, { @@ -85,6 +85,7 @@ const zalouserRawSendResultAdapter = createRawChannelSendResultAdapter({ isGroup: target.isGroup, mediaUrl, mediaLocalRoots, + mediaReadFile, textMode: "markdown", textChunkMode: resolveZalouserOutboundChunkMode(cfg, account.accountId), textChunkLimit: resolveZalouserOutboundTextChunkLimit(cfg, account.accountId), diff --git a/extensions/zalouser/src/types.ts b/extensions/zalouser/src/types.ts index aaf9b9b44b7..f29f503f7d9 100644 --- a/extensions/zalouser/src/types.ts +++ b/extensions/zalouser/src/types.ts @@ -61,6 +61,7 @@ export type ZaloSendOptions = { caption?: string; isGroup?: boolean; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; textMode?: "markdown" | "plain"; textChunkMode?: "length" | "newline"; textChunkLimit?: number; diff --git a/extensions/zalouser/src/zalo-js.ts b/extensions/zalouser/src/zalo-js.ts index 965413e3bde..25f33e7e001 100644 --- a/extensions/zalouser/src/zalo-js.ts +++ b/extensions/zalouser/src/zalo-js.ts @@ -1043,6 +1043,7 @@ export async function sendZaloTextMessage( if (options.mediaUrl?.trim()) { const media = await loadOutboundMediaFromUrl(options.mediaUrl.trim(), { mediaLocalRoots: options.mediaLocalRoots, + mediaReadFile: options.mediaReadFile, }); const fileName = resolveMediaFileName({ mediaUrl: options.mediaUrl, diff --git a/src/channels/plugins/outbound/direct-text-media.ts b/src/channels/plugins/outbound/direct-text-media.ts index 80a7178a10e..8824cfa1b50 100644 --- a/src/channels/plugins/outbound/direct-text-media.ts +++ b/src/channels/plugins/outbound/direct-text-media.ts @@ -17,6 +17,7 @@ type DirectSendOptions = { replyToId?: string | null; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; maxBytes?: number; }; @@ -80,6 +81,7 @@ export function createDirectTextMediaOutbound< replyToId?: string | null; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; buildOptions: (params: DirectSendOptions) => TOpts; }) => { const send = params.resolveSender(sendParams.deps); @@ -94,6 +96,7 @@ export function createDirectTextMediaOutbound< cfg: sendParams.cfg, mediaUrl: sendParams.mediaUrl, mediaLocalRoots: sendParams.mediaLocalRoots, + mediaReadFile: sendParams.mediaReadFile, accountId: sendParams.accountId, replyToId: sendParams.replyToId, maxBytes, @@ -120,13 +123,24 @@ export function createDirectTextMediaOutbound< buildOptions: params.buildTextOptions, }); }, - sendMedia: async ({ cfg, to, text, mediaUrl, mediaLocalRoots, accountId, deps, replyToId }) => { + sendMedia: async ({ + cfg, + to, + text, + mediaUrl, + mediaLocalRoots, + mediaReadFile, + accountId, + deps, + replyToId, + }) => { return await sendDirect({ cfg, to, text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, replyToId, diff --git a/src/channels/plugins/types.adapters.ts b/src/channels/plugins/types.adapters.ts index e6de1db7884..41ea098e988 100644 --- a/src/channels/plugins/types.adapters.ts +++ b/src/channels/plugins/types.adapters.ts @@ -138,6 +138,7 @@ export type ChannelOutboundContext = { mediaUrl?: string; audioAsVoice?: boolean; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; gifPlayback?: boolean; /** Send image as document to avoid Telegram compression. */ forceDocument?: boolean; diff --git a/src/channels/plugins/types.core.ts b/src/channels/plugins/types.core.ts index 114be7009a5..23d8609ca23 100644 --- a/src/channels/plugins/types.core.ts +++ b/src/channels/plugins/types.core.ts @@ -495,6 +495,7 @@ export type ChannelMessageActionContext = { cfg: OpenClawConfig; params: Record; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; accountId?: string | null; /** * Trusted sender id from inbound context. This is server-injected and must diff --git a/src/channels/plugins/whatsapp-shared.ts b/src/channels/plugins/whatsapp-shared.ts index 6acb2309bcb..7e896996693 100644 --- a/src/channels/plugins/whatsapp-shared.ts +++ b/src/channels/plugins/whatsapp-shared.ts @@ -30,6 +30,7 @@ type WhatsAppSendMessage = ( cfg?: OpenClawConfig; mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; gifPlayback?: boolean; accountId?: string; }, @@ -99,6 +100,7 @@ export function createWhatsAppOutboundBase({ text, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId, deps, gifPlayback, @@ -110,6 +112,7 @@ export function createWhatsAppOutboundBase({ cfg, mediaUrl, mediaLocalRoots, + mediaReadFile, accountId: accountId ?? undefined, gifPlayback, }); diff --git a/src/infra/outbound/deliver.ts b/src/infra/outbound/deliver.ts index 34f4223020e..31762d48640 100644 --- a/src/infra/outbound/deliver.ts +++ b/src/infra/outbound/deliver.ts @@ -30,6 +30,7 @@ import { import { hasReplyPayloadContent } from "../../interactive/payload.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; import { getAgentScopedMediaLocalRootsForSources } from "../../media/local-roots.js"; +import { createAgentScopedHostMediaReadFile } from "../../media/read-capability.js"; import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js"; import { throwIfAborted } from "./abort.js"; import { resolveOutboundChannelPlugin } from "./channel-resolution.js"; @@ -130,6 +131,7 @@ type ChannelHandlerParams = { forceDocument?: boolean; silent?: boolean; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; gatewayClientScopes?: readonly string[]; }; @@ -251,6 +253,7 @@ function createChannelOutboundContextBase( deps: params.deps, silent: params.silent, mediaLocalRoots: params.mediaLocalRoots, + mediaReadFile: params.mediaReadFile, gatewayClientScopes: params.gatewayClientScopes, }; } @@ -566,6 +569,10 @@ async function deliverOutboundPayloadsCore( agentId: params.session?.agentId ?? params.mirror?.agentId, mediaSources: collectPayloadMediaSources(payloads), }); + const mediaReadFile = createAgentScopedHostMediaReadFile({ + cfg, + agentId: params.session?.agentId ?? params.mirror?.agentId, + }); const results: OutboundDeliveryResult[] = []; const handler = await createChannelHandler({ cfg, @@ -580,6 +587,7 @@ async function deliverOutboundPayloadsCore( forceDocument: params.forceDocument, silent: params.silent, mediaLocalRoots, + mediaReadFile, gatewayClientScopes: params.gatewayClientScopes, }); const configuredTextLimit = handler.chunker diff --git a/src/infra/outbound/message-action-params.ts b/src/infra/outbound/message-action-params.ts index ac2c060d997..e40a9fe89a7 100644 --- a/src/infra/outbound/message-action-params.ts +++ b/src/infra/outbound/message-action-params.ts @@ -102,11 +102,13 @@ export type AttachmentMediaPolicy = | { mode: "host"; localRoots?: readonly string[]; + readFile?: (filePath: string) => Promise; }; export function resolveAttachmentMediaPolicy(params: { sandboxRoot?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; }): AttachmentMediaPolicy { const sandboxRoot = params.sandboxRoot?.trim(); if (sandboxRoot) { @@ -118,6 +120,7 @@ export function resolveAttachmentMediaPolicy(params: { return { mode: "host", localRoots: params.mediaLocalRoots, + readFile: params.mediaReadFile, }; } @@ -132,7 +135,9 @@ function buildAttachmentMediaLoadOptions(params: { } | { maxBytes?: number; - localRoots?: readonly string[]; + localRoots?: readonly string[] | "any"; + readFile?: (filePath: string) => Promise; + hostReadCapability?: boolean; } { if (params.policy.mode === "sandbox") { const readSandboxFile = createRootScopedReadFile({ @@ -146,7 +151,13 @@ function buildAttachmentMediaLoadOptions(params: { } return { maxBytes: params.maxBytes, - localRoots: params.policy.localRoots, + ...(params.policy.readFile + ? { + localRoots: "any" as const, + readFile: params.policy.readFile, + hostReadCapability: true, + } + : { localRoots: params.policy.localRoots }), }; } diff --git a/src/infra/outbound/message-action-runner.media.test.ts b/src/infra/outbound/message-action-runner.media.test.ts index 3ff2212db2d..56114c9d9b1 100644 --- a/src/infra/outbound/message-action-runner.media.test.ts +++ b/src/infra/outbound/message-action-runner.media.test.ts @@ -12,6 +12,11 @@ import { } from "../../test-utils/channel-plugins.js"; import { resolvePreferredOpenClawTmpDir } from "../tmp-openclaw-dir.js"; +const onePixelPng = Buffer.from( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO5m8gAAAABJRU5ErkJggg==", + "base64", +); + vi.mock("../../media/web-media.js", async () => { const actual = await vi.importActual( "../../media/web-media.js", @@ -260,7 +265,9 @@ describe("runMessageAction media behavior", () => { const call = vi.mocked(loadWebMedia).mock.calls[0]; expect(call?.[1]).toEqual( expect.objectContaining({ - localRoots: expect.any(Array), + localRoots: "any", + readFile: expect.any(Function), + hostReadCapability: true, }), ); expect((call?.[1] as { sandboxValidated?: boolean } | undefined)?.sandboxValidated).not.toBe( @@ -268,6 +275,67 @@ describe("runMessageAction media behavior", () => { ); }); + it("allows host-local image attachment paths when fs root expansion is enabled", async () => { + await restoreRealMediaLoader(); + + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "msg-attachment-image-")); + try { + const outsidePath = path.join(tempDir, "photo.png"); + await fs.writeFile(outsidePath, onePixelPng); + + const result = await runMessageAction({ + cfg: { + ...cfg, + tools: { fs: { workspaceOnly: false } }, + }, + action: "sendAttachment", + params: { + channel: "bluebubbles", + target: "+15551234567", + media: outsidePath, + message: "caption", + }, + }); + + expect(result.kind).toBe("action"); + expect(result.payload).toMatchObject({ + ok: true, + filename: "photo.png", + contentType: "image/png", + }); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + }); + + it("rejects host-local text attachments even when fs root expansion is enabled", async () => { + await restoreRealMediaLoader(); + + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "msg-attachment-text-")); + try { + const outsidePath = path.join(tempDir, "secret.txt"); + await fs.writeFile(outsidePath, "secret", "utf8"); + + await expect( + runMessageAction({ + cfg: { + ...cfg, + tools: { fs: { workspaceOnly: false } }, + }, + action: "sendAttachment", + params: { + channel: "bluebubbles", + target: "+15551234567", + media: outsidePath, + message: "caption", + }, + }), + ).rejects.toThrow(/Host-local media sends only allow/i); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + }); + it("hydrates buffer and filename from media for bluebubbles upload-file", async () => { const result = await runMessageAction({ cfg, diff --git a/src/infra/outbound/message-action-runner.ts b/src/infra/outbound/message-action-runner.ts index f385081d09a..a3481a8173d 100644 --- a/src/infra/outbound/message-action-runner.ts +++ b/src/infra/outbound/message-action-runner.ts @@ -19,6 +19,7 @@ import { getAgentScopedMediaLocalRoots, getAgentScopedMediaLocalRootsForSources, } from "../../media/local-roots.js"; +import { createAgentScopedHostMediaReadFile } from "../../media/read-capability.js"; import { hasPollCreationParams } from "../../poll-params.js"; import { resolvePollMaxSelections } from "../../polls.js"; import { buildChannelAccountBindings } from "../../routing/bindings.js"; @@ -273,6 +274,7 @@ type ResolvedActionContext = { params: Record; channel: ChannelId; mediaLocalRoots: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; accountId?: string | null; dryRun: boolean; gateway?: MessageActionRunnerGateway; @@ -518,6 +520,7 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise ({ sendMessage: vi.fn(), sendPoll: vi.fn(), getAgentScopedMediaLocalRootsForSources: vi.fn(() => ["/tmp/agent-roots"]), + createAgentScopedHostMediaReadFile: vi.fn(() => async () => Buffer.from("capability")), appendAssistantMessageToSessionTranscript: vi.fn(async () => ({ ok: true, sessionFile: "x" })), })); @@ -19,6 +20,10 @@ vi.mock("./message.js", () => ({ sendPoll: mocks.sendPoll, })); +vi.mock("../../media/read-capability.js", () => ({ + createAgentScopedHostMediaReadFile: mocks.createAgentScopedHostMediaReadFile, +})); + vi.mock("../../media/local-roots.js", async (importOriginal) => { const actual = await importOriginal(); return { @@ -99,6 +104,7 @@ describe("executeSendAction", () => { mocks.sendPoll.mockClear(); mocks.getDefaultMediaLocalRoots.mockClear(); mocks.getAgentScopedMediaLocalRootsForSources.mockClear(); + mocks.createAgentScopedHostMediaReadFile.mockClear(); mocks.appendAssistantMessageToSessionTranscript.mockClear(); }); @@ -204,6 +210,7 @@ describe("executeSendAction", () => { expect(mocks.dispatchChannelMessageAction).toHaveBeenCalledWith( expect.objectContaining({ mediaLocalRoots: ["/tmp/agent-roots"], + mediaReadFile: mocks.createAgentScopedHostMediaReadFile.mock.results[0]?.value, }), ); }); diff --git a/src/infra/outbound/outbound-send-service.ts b/src/infra/outbound/outbound-send-service.ts index 731d22a5fc3..d6c953d5b67 100644 --- a/src/infra/outbound/outbound-send-service.ts +++ b/src/infra/outbound/outbound-send-service.ts @@ -4,6 +4,7 @@ import type { ChannelId, ChannelThreadingToolContext } from "../../channels/plug import type { OpenClawConfig } from "../../config/config.js"; import { appendAssistantMessageToSessionTranscript } from "../../config/sessions.js"; import { getAgentScopedMediaLocalRootsForSources } from "../../media/local-roots.js"; +import { createAgentScopedHostMediaReadFile } from "../../media/read-capability.js"; import type { GatewayClientMode, GatewayClientName } from "../../utils/message-channel.js"; import { throwIfAborted } from "./abort.js"; import type { OutboundSendDeps } from "./deliver.js"; @@ -27,6 +28,7 @@ export type OutboundSendContext = { params: Record; /** Active agent id for per-agent outbound media root scoping. */ agentId?: string; + mediaReadFile?: (filePath: string) => Promise; accountId?: string | null; gateway?: OutboundGatewayContext; toolContext?: ChannelThreadingToolContext; @@ -67,12 +69,19 @@ async function tryHandleWithPluginAction(params: { agentId: params.ctx.agentId ?? params.ctx.mirror?.agentId, mediaSources: collectActionMediaSources(params.ctx.params), }); + const mediaReadFile = + params.ctx.mediaReadFile ?? + createAgentScopedHostMediaReadFile({ + cfg: params.ctx.cfg, + agentId: params.ctx.agentId ?? params.ctx.mirror?.agentId, + }); const handled = await dispatchChannelMessageAction({ channel: params.ctx.channel, action: params.action, cfg: params.ctx.cfg, params: params.ctx.params, mediaLocalRoots, + mediaReadFile, accountId: params.ctx.accountId ?? undefined, gateway: params.ctx.gateway, toolContext: params.ctx.toolContext, diff --git a/src/media/load-options.ts b/src/media/load-options.ts index da4545ae10e..08cead669fc 100644 --- a/src/media/load-options.ts +++ b/src/media/load-options.ts @@ -1,12 +1,15 @@ export type OutboundMediaLoadParams = { maxBytes?: number; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; optimizeImages?: boolean; }; export type OutboundMediaLoadOptions = { maxBytes?: number; - localRoots?: readonly string[]; + localRoots?: readonly string[] | "any"; + readFile?: (filePath: string) => Promise; + hostReadCapability?: boolean; optimizeImages?: boolean; }; @@ -19,6 +22,15 @@ export function resolveOutboundMediaLocalRoots( export function buildOutboundMediaLoadOptions( params: OutboundMediaLoadParams = {}, ): OutboundMediaLoadOptions { + if (params.mediaReadFile) { + return { + ...(params.maxBytes !== undefined ? { maxBytes: params.maxBytes } : {}), + localRoots: "any", + readFile: params.mediaReadFile, + hostReadCapability: true, + ...(params.optimizeImages !== undefined ? { optimizeImages: params.optimizeImages } : {}), + }; + } const localRoots = resolveOutboundMediaLocalRoots(params.mediaLocalRoots); return { ...(params.maxBytes !== undefined ? { maxBytes: params.maxBytes } : {}), diff --git a/src/media/outbound-attachment.ts b/src/media/outbound-attachment.ts index b9617c1f7b2..1df02b29a9f 100644 --- a/src/media/outbound-attachment.ts +++ b/src/media/outbound-attachment.ts @@ -5,13 +5,17 @@ import { loadWebMedia } from "./web-media.js"; export async function resolveOutboundAttachmentFromUrl( mediaUrl: string, maxBytes: number, - options?: { localRoots?: readonly string[] }, + options?: { + localRoots?: readonly string[]; + readFile?: (filePath: string) => Promise; + }, ): Promise<{ path: string; contentType?: string }> { const media = await loadWebMedia( mediaUrl, buildOutboundMediaLoadOptions({ maxBytes, mediaLocalRoots: options?.localRoots, + mediaReadFile: options?.readFile, }), ); const saved = await saveMediaBuffer( diff --git a/src/media/web-media.ts b/src/media/web-media.ts index b6d2a10b01d..ff823088c13 100644 --- a/src/media/web-media.ts +++ b/src/media/web-media.ts @@ -40,6 +40,8 @@ type WebMediaOptions = { /** Caller already validated the local path (sandbox/other guards); requires readFile override. */ sandboxValidated?: boolean; readFile?: (filePath: string) => Promise; + /** Host-local fs-policy read piggyback; rejects plaintext-like document sends. */ + hostReadCapability?: boolean; }; function resolveWebMediaOptions(params: { @@ -65,6 +67,24 @@ function resolveWebMediaOptions(params: { const HEIC_MIME_RE = /^image\/hei[cf]$/i; const HEIC_EXT_RE = /\.(heic|heif)$/i; +const HOST_READ_ALLOWED_DOCUMENT_MIMES = new Set([ + "application/msword", + "application/pdf", + "application/vnd.ms-excel", + "application/vnd.ms-powerpoint", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", +]); +const HOST_READ_ALLOWED_DOCUMENT_EXTS = new Set([ + ".doc", + ".docx", + ".pdf", + ".ppt", + ".pptx", + ".xls", + ".xlsx", +]); const MB = 1024 * 1024; function formatMb(bytes: number, digits = 2): string { @@ -96,6 +116,32 @@ function isHeicSource(opts: { contentType?: string; fileName?: string }): boolea return false; } +function assertHostReadMediaAllowed(params: { + contentType?: string; + kind: MediaKind | undefined; + mediaUrl: string; + fileName?: string; +}): void { + if (params.kind !== "document") { + return; + } + const normalizedMime = params.contentType?.trim().toLowerCase(); + if (normalizedMime && HOST_READ_ALLOWED_DOCUMENT_MIMES.has(normalizedMime)) { + return; + } + const ext = path + .extname(params.fileName ?? params.mediaUrl) + .trim() + .toLowerCase(); + if (ext && HOST_READ_ALLOWED_DOCUMENT_EXTS.has(ext)) { + return; + } + throw new LocalMediaAccessError( + "path-not-allowed", + `Host-local media sends only allow images, audio, video, PDF, and Office documents (got ${normalizedMime ?? "unknown"}).`, + ); +} + function toJpegFileName(fileName?: string): string | undefined { if (!fileName) { return undefined; @@ -174,6 +220,7 @@ async function loadWebMediaInternal( localRoots, sandboxValidated = false, readFile: readFileOverride, + hostReadCapability = false, } = options; // Strip MEDIA: prefix used by agent tools (e.g. TTS) to tag media paths. // Be lenient: LLM output may add extra whitespace (e.g. " MEDIA : /tmp/x.png"). @@ -333,6 +380,14 @@ async function loadWebMediaInternal( fileName = `${fileName}${ext}`; } } + if (hostReadCapability) { + assertHostReadMediaAllowed({ + contentType: mime, + kind, + mediaUrl, + fileName, + }); + } return await clampAndFinalize({ buffer: data, contentType: mime, diff --git a/src/plugin-sdk/discord-send.ts b/src/plugin-sdk/discord-send.ts index 4c4464fe9c1..a01eeb6f8ad 100644 --- a/src/plugin-sdk/discord-send.ts +++ b/src/plugin-sdk/discord-send.ts @@ -10,6 +10,7 @@ type DiscordSendOptionInput = { type DiscordSendMediaOptionInput = DiscordSendOptionInput & { mediaUrl?: string; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; }; /** Build the common Discord send options from SDK-level reply payload fields. */ @@ -28,6 +29,7 @@ export function buildDiscordSendMediaOptions(input: DiscordSendMediaOptionInput) ...buildDiscordSendOptions(input), mediaUrl: input.mediaUrl, mediaLocalRoots: input.mediaLocalRoots, + mediaReadFile: input.mediaReadFile, }; } diff --git a/src/plugin-sdk/outbound-media.test.ts b/src/plugin-sdk/outbound-media.test.ts index 1a102e90a0f..15b9562de8a 100644 --- a/src/plugin-sdk/outbound-media.test.ts +++ b/src/plugin-sdk/outbound-media.test.ts @@ -51,4 +51,26 @@ describe("loadOutboundMediaFromUrl", () => { localRoots: undefined, }); }); + + it("prefers host read capability over local roots when provided", async () => { + const mediaReadFile = vi.fn(async () => Buffer.from("x")); + loadWebMediaMock.mockResolvedValueOnce({ + buffer: Buffer.from("x"), + kind: "image", + contentType: "image/png", + }); + + await loadOutboundMediaFromUrl("/Users/peter/Pictures/image.png", { + maxBytes: 2048, + mediaLocalRoots: ["/tmp/workspace-agent"], + mediaReadFile, + }); + + expect(loadWebMediaMock).toHaveBeenCalledWith("/Users/peter/Pictures/image.png", { + maxBytes: 2048, + localRoots: "any", + readFile: mediaReadFile, + hostReadCapability: true, + }); + }); }); diff --git a/src/plugin-sdk/outbound-media.ts b/src/plugin-sdk/outbound-media.ts index 979f8ac77a3..3e8a5a18405 100644 --- a/src/plugin-sdk/outbound-media.ts +++ b/src/plugin-sdk/outbound-media.ts @@ -1,8 +1,10 @@ +import { buildOutboundMediaLoadOptions } from "../media/load-options.js"; import { loadWebMedia } from "./web-media.js"; export type OutboundMediaLoadOptions = { maxBytes?: number; mediaLocalRoots?: readonly string[]; + mediaReadFile?: (filePath: string) => Promise; }; /** Load outbound media from a remote URL or approved local path using the shared web-media policy. */ @@ -10,8 +12,12 @@ export async function loadOutboundMediaFromUrl( mediaUrl: string, options: OutboundMediaLoadOptions = {}, ) { - return await loadWebMedia(mediaUrl, { - maxBytes: options.maxBytes, - localRoots: options.mediaLocalRoots, - }); + return await loadWebMedia( + mediaUrl, + buildOutboundMediaLoadOptions({ + maxBytes: options.maxBytes, + mediaLocalRoots: options.mediaLocalRoots, + mediaReadFile: options.mediaReadFile, + }), + ); }