fix: prevent nodes media base64 context bloat (#34332)

This commit is contained in:
Ayaan Zaidi 2026-03-04 16:53:16 +05:30 committed by Ayaan Zaidi
parent ed8e0a8146
commit ef4fa43df8
4 changed files with 338 additions and 15 deletions

View File

@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Agents/Nodes media outputs: add dedicated `photos_latest` action handling, block media-returning `nodes invoke` commands, keep metadata-only `camera.list` invoke allowed, and normalize empty `photos_latest` results to a consistent response shape to prevent base64 context bloat. (#34332) Thanks @obviyus.
- TUI/session-key canonicalization: normalize `openclaw tui --session` values to lowercase so uppercase session names no longer drop real-time streaming updates due to gateway/TUI key mismatches. (#33866, #34013) thanks @lynnzc.
- Outbound/send config threading: pass resolved SecretRef config through outbound adapters and helper send paths so send flows do not reload unresolved runtime config. (#33987) Thanks @joshavant.
- Sessions/subagent attachments: remove `attachments[].content.maxLength` from `sessions_spawn` schema to avoid llama.cpp GBNF repetition overflow, and preflight UTF-8 byte size before buffer allocation while keeping runtime file-size enforcement unchanged. (#33648) Thanks @anisoptera.

View File

@ -32,16 +32,21 @@ function unexpectedGatewayMethod(method: unknown): never {
throw new Error(`unexpected method: ${String(method)}`);
}
function getNodesTool() {
const tool = createOpenClawTools().find((candidate) => candidate.name === "nodes");
function getNodesTool(options?: { modelHasVision?: boolean }) {
const tool = createOpenClawTools(
options?.modelHasVision !== undefined ? { modelHasVision: options.modelHasVision } : {},
).find((candidate) => candidate.name === "nodes");
if (!tool) {
throw new Error("missing nodes tool");
}
return tool;
}
async function executeNodes(input: Record<string, unknown>) {
return getNodesTool().execute("call1", input as never);
async function executeNodes(
input: Record<string, unknown>,
options?: { modelHasVision?: boolean },
) {
return getNodesTool(options).execute("call1", input as never);
}
type NodesToolResult = Awaited<ReturnType<typeof executeNodes>>;
@ -67,6 +72,11 @@ function expectSingleImage(result: NodesToolResult, params?: { mimeType?: string
}
}
function expectNoImages(result: NodesToolResult) {
const images = (result.content ?? []).filter((block) => block.type === "image");
expect(images).toHaveLength(0);
}
function expectFirstTextContains(result: NodesToolResult, expectedText: string) {
expect(result.content?.[0]).toMatchObject({
type: "text",
@ -156,10 +166,13 @@ describe("nodes camera_snap", () => {
},
});
const result = await executeNodes({
action: "camera_snap",
node: NODE_ID,
});
const result = await executeNodes(
{
action: "camera_snap",
node: NODE_ID,
},
{ modelHasVision: true },
);
expectSingleImage(result);
});
@ -169,15 +182,39 @@ describe("nodes camera_snap", () => {
invokePayload: JPG_PAYLOAD,
});
const result = await executeNodes({
action: "camera_snap",
node: NODE_ID,
facing: "front",
});
const result = await executeNodes(
{
action: "camera_snap",
node: NODE_ID,
facing: "front",
},
{ modelHasVision: true },
);
expectSingleImage(result, { mimeType: "image/jpeg" });
});
it("omits inline base64 image blocks when model has no vision", async () => {
setupNodeInvokeMock({
invokePayload: JPG_PAYLOAD,
});
const result = await executeNodes(
{
action: "camera_snap",
node: NODE_ID,
facing: "front",
},
{ modelHasVision: false },
);
expectNoImages(result);
expect(result.content?.[0]).toMatchObject({
type: "text",
text: expect.stringMatching(/^MEDIA:/),
});
});
it("passes deviceId when provided", async () => {
setupNodeInvokeMock({
onInvoke: (invokeParams) => {
@ -299,6 +336,130 @@ describe("nodes camera_clip", () => {
});
});
describe("nodes photos_latest", () => {
it("returns empty content/details when no photos are available", async () => {
setupNodeInvokeMock({
onInvoke: (invokeParams) => {
expect(invokeParams).toMatchObject({
command: "photos.latest",
params: {
limit: 1,
maxWidth: 1600,
quality: 0.85,
},
});
return {
payload: {
photos: [],
},
};
},
});
const result = await executeNodes(
{
action: "photos_latest",
node: NODE_ID,
},
{ modelHasVision: false },
);
expect(result.content ?? []).toEqual([]);
expect(result.details).toEqual([]);
});
it("returns MEDIA paths and no inline images when model has no vision", async () => {
setupNodeInvokeMock({
remoteIp: "198.51.100.42",
onInvoke: (invokeParams) => {
expect(invokeParams).toMatchObject({
command: "photos.latest",
params: {
limit: 1,
maxWidth: 1600,
quality: 0.85,
},
});
return {
payload: {
photos: [
{
format: "jpeg",
base64: "aGVsbG8=",
width: 1,
height: 1,
createdAt: "2026-03-04T00:00:00Z",
},
],
},
};
},
});
const result = await executeNodes(
{
action: "photos_latest",
node: NODE_ID,
},
{ modelHasVision: false },
);
expectNoImages(result);
expect(result.content?.[0]).toMatchObject({
type: "text",
text: expect.stringMatching(/^MEDIA:/),
});
const details = Array.isArray(result.details) ? result.details : [];
expect(details[0]).toMatchObject({
width: 1,
height: 1,
createdAt: "2026-03-04T00:00:00Z",
});
});
it("includes inline image blocks when model has vision", async () => {
setupNodeInvokeMock({
onInvoke: (invokeParams) => {
expect(invokeParams).toMatchObject({
command: "photos.latest",
params: {
limit: 1,
maxWidth: 1600,
quality: 0.85,
},
});
return {
payload: {
photos: [
{
format: "jpeg",
base64: "aGVsbG8=",
width: 1,
height: 1,
createdAt: "2026-03-04T00:00:00Z",
},
],
},
};
},
});
const result = await executeNodes(
{
action: "photos_latest",
node: NODE_ID,
},
{ modelHasVision: true },
);
expect(result.content?.[0]).toMatchObject({
type: "text",
text: expect.stringMatching(/^MEDIA:/),
});
expectSingleImage(result, { mimeType: "image/jpeg" });
});
});
describe("nodes notifications_list", () => {
it("invokes notifications.list and returns payload", async () => {
setupNodeInvokeMock({
@ -576,3 +737,44 @@ describe("nodes run", () => {
);
});
});
describe("nodes invoke", () => {
it("allows metadata-only camera.list via generic invoke", async () => {
setupNodeInvokeMock({
onInvoke: (invokeParams) => {
expect(invokeParams).toMatchObject({
command: "camera.list",
params: {},
});
return {
payload: {
devices: [{ id: "cam-back", name: "Back Camera" }],
},
};
},
});
const result = await executeNodes({
action: "invoke",
node: NODE_ID,
invokeCommand: "camera.list",
});
expect(result.details).toMatchObject({
payload: {
devices: [{ id: "cam-back", name: "Back Camera" }],
},
});
});
it("blocks media invoke commands to avoid base64 context bloat", async () => {
await expect(
executeNodes({
action: "invoke",
node: NODE_ID,
invokeCommand: "photos.latest",
invokeParamsJson: '{"limit":1}',
}),
).rejects.toThrow(/use action="photos_latest"/i);
});
});

View File

@ -136,6 +136,7 @@ export function createOpenClawTools(options?: {
currentChannelId: options?.currentChannelId,
currentThreadTs: options?.currentThreadTs,
config: options?.config,
modelHasVision: options?.modelHasVision,
}),
createCronTool({
agentSessionKey: options?.agentSessionKey,

View File

@ -39,6 +39,7 @@ const NODES_TOOL_ACTIONS = [
"camera_snap",
"camera_list",
"camera_clip",
"photos_latest",
"screen_record",
"location_get",
"notifications_list",
@ -56,6 +57,12 @@ const NOTIFY_DELIVERIES = ["system", "overlay", "auto"] as const;
const NOTIFICATIONS_ACTIONS = ["open", "dismiss", "reply"] as const;
const CAMERA_FACING = ["front", "back", "both"] as const;
const LOCATION_ACCURACY = ["coarse", "balanced", "precise"] as const;
const MEDIA_INVOKE_ACTIONS = {
"camera.snap": "camera_snap",
"camera.clip": "camera_clip",
"photos.latest": "photos_latest",
"screen.record": "screen_record",
} as const;
const NODE_READ_ACTION_COMMANDS = {
camera_list: "camera.list",
notifications_list: "notifications.list",
@ -118,6 +125,7 @@ const NodesToolSchema = Type.Object({
quality: Type.Optional(Type.Number()),
delayMs: Type.Optional(Type.Number()),
deviceId: Type.Optional(Type.String()),
limit: Type.Optional(Type.Number()),
duration: Type.Optional(Type.String()),
durationMs: Type.Optional(Type.Number({ maximum: 300_000 })),
includeAudio: Type.Optional(Type.Boolean()),
@ -152,6 +160,7 @@ export function createNodesTool(options?: {
currentChannelId?: string;
currentThreadTs?: string | number;
config?: OpenClawConfig;
modelHasVision?: boolean;
}): AnyAgentTool {
const sessionKey = options?.agentSessionKey?.trim() || undefined;
const turnSourceChannel = options?.agentChannel?.trim() || undefined;
@ -167,7 +176,7 @@ export function createNodesTool(options?: {
label: "Nodes",
name: "nodes",
description:
"Discover and control paired nodes (status/describe/pairing/notify/camera/screen/location/notifications/run/invoke).",
"Discover and control paired nodes (status/describe/pairing/notify/camera/photos/screen/location/notifications/run/invoke).",
parameters: NodesToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
@ -301,7 +310,7 @@ export function createNodesTool(options?: {
invalidPayloadMessage: "invalid camera.snap payload",
});
content.push({ type: "text", text: `MEDIA:${filePath}` });
if (payload.base64) {
if (options?.modelHasVision && payload.base64) {
content.push({
type: "image",
data: payload.base64,
@ -320,6 +329,103 @@ export function createNodesTool(options?: {
const result: AgentToolResult<unknown> = { content, details };
return await sanitizeToolResultImages(result, "nodes:camera_snap", imageSanitization);
}
case "photos_latest": {
const node = readStringParam(params, "node", { required: true });
const resolvedNode = await resolveNode(gatewayOpts, node);
const nodeId = resolvedNode.nodeId;
const limitRaw =
typeof params.limit === "number" && Number.isFinite(params.limit)
? Math.floor(params.limit)
: DEFAULT_PHOTOS_LIMIT;
const limit = Math.max(1, Math.min(limitRaw, MAX_PHOTOS_LIMIT));
const maxWidth =
typeof params.maxWidth === "number" && Number.isFinite(params.maxWidth)
? params.maxWidth
: DEFAULT_PHOTOS_MAX_WIDTH;
const quality =
typeof params.quality === "number" && Number.isFinite(params.quality)
? params.quality
: DEFAULT_PHOTOS_QUALITY;
const raw = await callGatewayTool<{ payload: unknown }>("node.invoke", gatewayOpts, {
nodeId,
command: "photos.latest",
params: {
limit,
maxWidth,
quality,
},
idempotencyKey: crypto.randomUUID(),
});
const payload =
raw?.payload && typeof raw.payload === "object" && !Array.isArray(raw.payload)
? (raw.payload as Record<string, unknown>)
: {};
const photos = Array.isArray(payload.photos) ? payload.photos : [];
if (photos.length === 0) {
const result: AgentToolResult<unknown> = {
content: [],
details: [],
};
return await sanitizeToolResultImages(
result,
"nodes:photos_latest",
imageSanitization,
);
}
const content: AgentToolResult<unknown>["content"] = [];
const details: Array<Record<string, unknown>> = [];
for (const [index, photoRaw] of photos.entries()) {
const photo = parseCameraSnapPayload(photoRaw);
const normalizedFormat = photo.format.toLowerCase();
if (
normalizedFormat !== "jpg" &&
normalizedFormat !== "jpeg" &&
normalizedFormat !== "png"
) {
throw new Error(`unsupported photos.latest format: ${photo.format}`);
}
const isJpeg = normalizedFormat === "jpg" || normalizedFormat === "jpeg";
const filePath = cameraTempPath({
kind: "snap",
ext: isJpeg ? "jpg" : "png",
id: crypto.randomUUID(),
});
await writeCameraPayloadToFile({
filePath,
payload: photo,
expectedHost: resolvedNode.remoteIp,
invalidPayloadMessage: "invalid photos.latest payload",
});
content.push({ type: "text", text: `MEDIA:${filePath}` });
if (options?.modelHasVision && photo.base64) {
content.push({
type: "image",
data: photo.base64,
mimeType:
imageMimeFromFormat(photo.format) ?? (isJpeg ? "image/jpeg" : "image/png"),
});
}
const createdAt =
photoRaw && typeof photoRaw === "object" && !Array.isArray(photoRaw)
? (photoRaw as Record<string, unknown>).createdAt
: undefined;
details.push({
index,
path: filePath,
width: photo.width,
height: photo.height,
...(typeof createdAt === "string" ? { createdAt } : {}),
});
}
const result: AgentToolResult<unknown> = { content, details };
return await sanitizeToolResultImages(result, "nodes:photos_latest", imageSanitization);
}
case "camera_list":
case "notifications_list":
case "device_status":
@ -645,6 +751,14 @@ export function createNodesTool(options?: {
const node = readStringParam(params, "node", { required: true });
const nodeId = await resolveNodeId(gatewayOpts, node);
const invokeCommand = readStringParam(params, "invokeCommand", { required: true });
const invokeCommandNormalized = invokeCommand.trim().toLowerCase();
const dedicatedAction =
MEDIA_INVOKE_ACTIONS[invokeCommandNormalized as keyof typeof MEDIA_INVOKE_ACTIONS];
if (dedicatedAction) {
throw new Error(
`invokeCommand "${invokeCommand}" returns media payloads and is blocked to prevent base64 context bloat; use action="${dedicatedAction}"`,
);
}
const invokeParamsJson =
typeof params.invokeParamsJson === "string" ? params.invokeParamsJson.trim() : "";
let invokeParams: unknown = {};
@ -695,3 +809,8 @@ export function createNodesTool(options?: {
},
};
}
const DEFAULT_PHOTOS_LIMIT = 1;
const MAX_PHOTOS_LIMIT = 20;
const DEFAULT_PHOTOS_MAX_WIDTH = 1600;
const DEFAULT_PHOTOS_QUALITY = 0.85;