openclaw/extensions/msteams/src/attachments/shared.ts

460 lines
13 KiB
TypeScript

import { lookup } from "node:dns/promises";
import {
buildHostnameAllowlistPolicyFromSuffixAllowlist,
isHttpsUrlAllowedByHostnameSuffixAllowlist,
isPrivateIpAddress,
normalizeHostnameSuffixAllowlist,
} from "openclaw/plugin-sdk/msteams";
import type { SsrFPolicy } from "openclaw/plugin-sdk/msteams";
import type { MSTeamsAttachmentLike } from "./types.js";
type InlineImageCandidate =
| {
kind: "data";
data: Buffer;
contentType?: string;
placeholder: string;
}
| {
kind: "url";
url: string;
contentType?: string;
fileHint?: string;
placeholder: string;
};
export const IMAGE_EXT_RE = /\.(avif|bmp|gif|heic|heif|jpe?g|png|tiff?|webp)$/i;
export const IMG_SRC_RE = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
export const ATTACHMENT_TAG_RE = /<attachment[^>]+id=["']([^"']+)["'][^>]*>/gi;
export const DEFAULT_MEDIA_HOST_ALLOWLIST = [
"graph.microsoft.com",
"graph.microsoft.us",
"graph.microsoft.de",
"graph.microsoft.cn",
"sharepoint.com",
"sharepoint.us",
"sharepoint.de",
"sharepoint.cn",
"sharepoint-df.com",
"1drv.ms",
"onedrive.com",
"teams.microsoft.com",
"teams.cdn.office.net",
"statics.teams.cdn.office.net",
"office.com",
"office.net",
// Azure Media Services / Skype CDN for clipboard-pasted images
"asm.skype.com",
"ams.skype.com",
"media.ams.skype.com",
// Bot Framework attachment URLs
"trafficmanager.net",
"blob.core.windows.net",
"azureedge.net",
"microsoft.com",
] as const;
export const DEFAULT_MEDIA_AUTH_HOST_ALLOWLIST = [
"api.botframework.com",
"botframework.com",
"graph.microsoft.com",
"graph.microsoft.us",
"graph.microsoft.de",
"graph.microsoft.cn",
] as const;
export const GRAPH_ROOT = "https://graph.microsoft.com/v1.0";
export function isRecord(value: unknown): value is Record<string, unknown> {
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
}
export function resolveRequestUrl(input: RequestInfo | URL): string {
if (typeof input === "string") {
return input;
}
if (input instanceof URL) {
return input.toString();
}
if (typeof input === "object" && input && "url" in input && typeof input.url === "string") {
return input.url;
}
return String(input);
}
export function normalizeContentType(value: unknown): string | undefined {
if (typeof value !== "string") {
return undefined;
}
const trimmed = value.trim();
return trimmed ? trimmed : undefined;
}
export function inferPlaceholder(params: {
contentType?: string;
fileName?: string;
fileType?: string;
}): string {
const mime = params.contentType?.toLowerCase() ?? "";
const name = params.fileName?.toLowerCase() ?? "";
const fileType = params.fileType?.toLowerCase() ?? "";
const looksLikeImage =
mime.startsWith("image/") || IMAGE_EXT_RE.test(name) || IMAGE_EXT_RE.test(`x.${fileType}`);
return looksLikeImage ? "<media:image>" : "<media:document>";
}
export function isLikelyImageAttachment(att: MSTeamsAttachmentLike): boolean {
const contentType = normalizeContentType(att.contentType) ?? "";
const name = typeof att.name === "string" ? att.name : "";
if (contentType.startsWith("image/")) {
return true;
}
if (IMAGE_EXT_RE.test(name)) {
return true;
}
if (
contentType === "application/vnd.microsoft.teams.file.download.info" &&
isRecord(att.content)
) {
const fileType = typeof att.content.fileType === "string" ? att.content.fileType : "";
if (fileType && IMAGE_EXT_RE.test(`x.${fileType}`)) {
return true;
}
const fileName = typeof att.content.fileName === "string" ? att.content.fileName : "";
if (fileName && IMAGE_EXT_RE.test(fileName)) {
return true;
}
}
return false;
}
/**
* Returns true if the attachment can be downloaded (any file type).
* Used when downloading all files, not just images.
*/
export function isDownloadableAttachment(att: MSTeamsAttachmentLike): boolean {
const contentType = normalizeContentType(att.contentType) ?? "";
// Teams file download info always has a downloadUrl
if (
contentType === "application/vnd.microsoft.teams.file.download.info" &&
isRecord(att.content) &&
typeof att.content.downloadUrl === "string"
) {
return true;
}
// Any attachment with a contentUrl can be downloaded
if (typeof att.contentUrl === "string" && att.contentUrl.trim()) {
return true;
}
return false;
}
function isHtmlAttachment(att: MSTeamsAttachmentLike): boolean {
const contentType = normalizeContentType(att.contentType) ?? "";
return contentType.startsWith("text/html");
}
export function extractHtmlFromAttachment(att: MSTeamsAttachmentLike): string | undefined {
if (!isHtmlAttachment(att)) {
return undefined;
}
if (typeof att.content === "string") {
return att.content;
}
if (!isRecord(att.content)) {
return undefined;
}
const text =
typeof att.content.text === "string"
? att.content.text
: typeof att.content.body === "string"
? att.content.body
: typeof att.content.content === "string"
? att.content.content
: undefined;
return text;
}
function decodeDataImage(src: string): InlineImageCandidate | null {
const match = /^data:(image\/[a-z0-9.+-]+)?(;base64)?,(.*)$/i.exec(src);
if (!match) {
return null;
}
const contentType = match[1]?.toLowerCase();
const isBase64 = Boolean(match[2]);
if (!isBase64) {
return null;
}
const payload = match[3] ?? "";
if (!payload) {
return null;
}
try {
const data = Buffer.from(payload, "base64");
return { kind: "data", data, contentType, placeholder: "<media:image>" };
} catch {
return null;
}
}
function fileHintFromUrl(src: string): string | undefined {
try {
const url = new URL(src);
const name = url.pathname.split("/").pop();
return name || undefined;
} catch {
return undefined;
}
}
export function extractInlineImageCandidates(
attachments: MSTeamsAttachmentLike[],
): InlineImageCandidate[] {
const out: InlineImageCandidate[] = [];
for (const att of attachments) {
const html = extractHtmlFromAttachment(att);
if (!html) {
continue;
}
IMG_SRC_RE.lastIndex = 0;
let match: RegExpExecArray | null = IMG_SRC_RE.exec(html);
while (match) {
const src = match[1]?.trim();
if (src && !src.startsWith("cid:")) {
if (src.startsWith("data:")) {
const decoded = decodeDataImage(src);
if (decoded) {
out.push(decoded);
}
} else {
out.push({
kind: "url",
url: src,
fileHint: fileHintFromUrl(src),
placeholder: "<media:image>",
});
}
}
match = IMG_SRC_RE.exec(html);
}
}
return out;
}
export function safeHostForUrl(url: string): string {
try {
return new URL(url).hostname.toLowerCase();
} catch {
return "invalid-url";
}
}
export function resolveAllowedHosts(input?: string[]): string[] {
return normalizeHostnameSuffixAllowlist(input, DEFAULT_MEDIA_HOST_ALLOWLIST);
}
export function resolveAuthAllowedHosts(input?: string[]): string[] {
return normalizeHostnameSuffixAllowlist(input, DEFAULT_MEDIA_AUTH_HOST_ALLOWLIST);
}
export type MSTeamsAttachmentFetchPolicy = {
allowHosts: string[];
authAllowHosts: string[];
};
export function resolveAttachmentFetchPolicy(params?: {
allowHosts?: string[];
authAllowHosts?: string[];
}): MSTeamsAttachmentFetchPolicy {
return {
allowHosts: resolveAllowedHosts(params?.allowHosts),
authAllowHosts: resolveAuthAllowedHosts(params?.authAllowHosts),
};
}
export function isUrlAllowed(url: string, allowlist: string[]): boolean {
return isHttpsUrlAllowedByHostnameSuffixAllowlist(url, allowlist);
}
export function applyAuthorizationHeaderForUrl(params: {
headers: Headers;
url: string;
authAllowHosts: string[];
bearerToken?: string;
}): void {
if (!params.bearerToken) {
params.headers.delete("Authorization");
return;
}
if (isUrlAllowed(params.url, params.authAllowHosts)) {
params.headers.set("Authorization", `Bearer ${params.bearerToken}`);
return;
}
params.headers.delete("Authorization");
}
export function resolveMediaSsrfPolicy(allowHosts: string[]): SsrFPolicy | undefined {
return buildHostnameAllowlistPolicyFromSuffixAllowlist(allowHosts);
}
/**
* Returns true if the given IPv4 or IPv6 address is in a private, loopback,
* or link-local range that must never be reached from media downloads.
*
* Delegates to the SDK's `isPrivateIpAddress` which handles IPv4-mapped IPv6,
* expanded notation, NAT64, 6to4, Teredo, octal IPv4, and fails closed on
* parse errors.
*/
export const isPrivateOrReservedIP: (ip: string) => boolean = isPrivateIpAddress;
/**
* Resolve a hostname via DNS and reject private/reserved IPs.
* Throws if the resolved IP is private or resolution fails.
*/
export async function resolveAndValidateIP(
hostname: string,
resolveFn?: (hostname: string) => Promise<{ address: string }>,
): Promise<string> {
const resolve = resolveFn ?? lookup;
let resolved: { address: string };
try {
resolved = await resolve(hostname);
} catch {
throw new Error(`DNS resolution failed for "${hostname}"`);
}
if (isPrivateOrReservedIP(resolved.address)) {
throw new Error(`Hostname "${hostname}" resolves to private/reserved IP (${resolved.address})`);
}
return resolved.address;
}
/** Maximum number of redirects to follow in safeFetch. */
const MAX_SAFE_REDIRECTS = 5;
/**
* Fetch a URL with redirect: "manual", validating each redirect target
* against the hostname allowlist and optional DNS-resolved IP (anti-SSRF).
*
* This prevents:
* - Auto-following redirects to non-allowlisted hosts
* - DNS rebinding attacks when a lookup function is provided
*/
export async function safeFetch(params: {
url: string;
allowHosts: string[];
/**
* Optional allowlist for forwarding Authorization across redirects.
* When set, Authorization is stripped before following redirects to hosts
* outside this list.
*/
authorizationAllowHosts?: string[];
fetchFn?: typeof fetch;
requestInit?: RequestInit;
resolveFn?: (hostname: string) => Promise<{ address: string }>;
}): Promise<Response> {
const fetchFn = params.fetchFn ?? fetch;
const resolveFn = params.resolveFn;
const hasDispatcher = Boolean(
params.requestInit &&
typeof params.requestInit === "object" &&
"dispatcher" in (params.requestInit as Record<string, unknown>),
);
const currentHeaders = new Headers(params.requestInit?.headers);
let currentUrl = params.url;
if (!isUrlAllowed(currentUrl, params.allowHosts)) {
throw new Error(`Initial download URL blocked: ${currentUrl}`);
}
if (resolveFn) {
try {
const initialHost = new URL(currentUrl).hostname;
await resolveAndValidateIP(initialHost, resolveFn);
} catch {
throw new Error(`Initial download URL blocked: ${currentUrl}`);
}
}
for (let i = 0; i <= MAX_SAFE_REDIRECTS; i++) {
const res = await fetchFn(currentUrl, {
...params.requestInit,
headers: currentHeaders,
redirect: "manual",
});
if (![301, 302, 303, 307, 308].includes(res.status)) {
return res;
}
const location = res.headers.get("location");
if (!location) {
return res;
}
let redirectUrl: string;
try {
redirectUrl = new URL(location, currentUrl).toString();
} catch {
throw new Error(`Invalid redirect URL: ${location}`);
}
// Validate redirect target against hostname allowlist
if (!isUrlAllowed(redirectUrl, params.allowHosts)) {
throw new Error(`Media redirect target blocked by allowlist: ${redirectUrl}`);
}
// Prevent credential bleed: only keep Authorization on redirect hops that
// are explicitly auth-allowlisted.
if (
currentHeaders.has("authorization") &&
params.authorizationAllowHosts &&
!isUrlAllowed(redirectUrl, params.authorizationAllowHosts)
) {
currentHeaders.delete("authorization");
}
// When a pinned dispatcher is already injected by an upstream guard
// (for example fetchWithSsrFGuard), let that guard own redirect handling
// after this allowlist validation step.
if (hasDispatcher) {
return res;
}
// Validate redirect target's resolved IP
if (resolveFn) {
const redirectHost = new URL(redirectUrl).hostname;
await resolveAndValidateIP(redirectHost, resolveFn);
}
currentUrl = redirectUrl;
}
throw new Error(`Too many redirects (>${MAX_SAFE_REDIRECTS})`);
}
export async function safeFetchWithPolicy(params: {
url: string;
policy: MSTeamsAttachmentFetchPolicy;
fetchFn?: typeof fetch;
requestInit?: RequestInit;
resolveFn?: (hostname: string) => Promise<{ address: string }>;
}): Promise<Response> {
return await safeFetch({
url: params.url,
allowHosts: params.policy.allowHosts,
authorizationAllowHosts: params.policy.authAllowHosts,
fetchFn: params.fetchFn,
requestInit: params.requestInit,
resolveFn: params.resolveFn,
});
}