mirror of https://github.com/openclaw/openclaw.git
fix: prevent nodes media base64 context bloat (#34332)
This commit is contained in:
parent
ed8e0a8146
commit
ef4fa43df8
|
|
@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai
|
|||
|
||||
### Fixes
|
||||
|
||||
- Agents/Nodes media outputs: add dedicated `photos_latest` action handling, block media-returning `nodes invoke` commands, keep metadata-only `camera.list` invoke allowed, and normalize empty `photos_latest` results to a consistent response shape to prevent base64 context bloat. (#34332) Thanks @obviyus.
|
||||
- TUI/session-key canonicalization: normalize `openclaw tui --session` values to lowercase so uppercase session names no longer drop real-time streaming updates due to gateway/TUI key mismatches. (#33866, #34013) thanks @lynnzc.
|
||||
- Outbound/send config threading: pass resolved SecretRef config through outbound adapters and helper send paths so send flows do not reload unresolved runtime config. (#33987) Thanks @joshavant.
|
||||
- Sessions/subagent attachments: remove `attachments[].content.maxLength` from `sessions_spawn` schema to avoid llama.cpp GBNF repetition overflow, and preflight UTF-8 byte size before buffer allocation while keeping runtime file-size enforcement unchanged. (#33648) Thanks @anisoptera.
|
||||
|
|
|
|||
|
|
@ -32,16 +32,21 @@ function unexpectedGatewayMethod(method: unknown): never {
|
|||
throw new Error(`unexpected method: ${String(method)}`);
|
||||
}
|
||||
|
||||
function getNodesTool() {
|
||||
const tool = createOpenClawTools().find((candidate) => candidate.name === "nodes");
|
||||
function getNodesTool(options?: { modelHasVision?: boolean }) {
|
||||
const tool = createOpenClawTools(
|
||||
options?.modelHasVision !== undefined ? { modelHasVision: options.modelHasVision } : {},
|
||||
).find((candidate) => candidate.name === "nodes");
|
||||
if (!tool) {
|
||||
throw new Error("missing nodes tool");
|
||||
}
|
||||
return tool;
|
||||
}
|
||||
|
||||
async function executeNodes(input: Record<string, unknown>) {
|
||||
return getNodesTool().execute("call1", input as never);
|
||||
async function executeNodes(
|
||||
input: Record<string, unknown>,
|
||||
options?: { modelHasVision?: boolean },
|
||||
) {
|
||||
return getNodesTool(options).execute("call1", input as never);
|
||||
}
|
||||
|
||||
type NodesToolResult = Awaited<ReturnType<typeof executeNodes>>;
|
||||
|
|
@ -67,6 +72,11 @@ function expectSingleImage(result: NodesToolResult, params?: { mimeType?: string
|
|||
}
|
||||
}
|
||||
|
||||
function expectNoImages(result: NodesToolResult) {
|
||||
const images = (result.content ?? []).filter((block) => block.type === "image");
|
||||
expect(images).toHaveLength(0);
|
||||
}
|
||||
|
||||
function expectFirstTextContains(result: NodesToolResult, expectedText: string) {
|
||||
expect(result.content?.[0]).toMatchObject({
|
||||
type: "text",
|
||||
|
|
@ -156,10 +166,13 @@ describe("nodes camera_snap", () => {
|
|||
},
|
||||
});
|
||||
|
||||
const result = await executeNodes({
|
||||
action: "camera_snap",
|
||||
node: NODE_ID,
|
||||
});
|
||||
const result = await executeNodes(
|
||||
{
|
||||
action: "camera_snap",
|
||||
node: NODE_ID,
|
||||
},
|
||||
{ modelHasVision: true },
|
||||
);
|
||||
|
||||
expectSingleImage(result);
|
||||
});
|
||||
|
|
@ -169,15 +182,39 @@ describe("nodes camera_snap", () => {
|
|||
invokePayload: JPG_PAYLOAD,
|
||||
});
|
||||
|
||||
const result = await executeNodes({
|
||||
action: "camera_snap",
|
||||
node: NODE_ID,
|
||||
facing: "front",
|
||||
});
|
||||
const result = await executeNodes(
|
||||
{
|
||||
action: "camera_snap",
|
||||
node: NODE_ID,
|
||||
facing: "front",
|
||||
},
|
||||
{ modelHasVision: true },
|
||||
);
|
||||
|
||||
expectSingleImage(result, { mimeType: "image/jpeg" });
|
||||
});
|
||||
|
||||
it("omits inline base64 image blocks when model has no vision", async () => {
|
||||
setupNodeInvokeMock({
|
||||
invokePayload: JPG_PAYLOAD,
|
||||
});
|
||||
|
||||
const result = await executeNodes(
|
||||
{
|
||||
action: "camera_snap",
|
||||
node: NODE_ID,
|
||||
facing: "front",
|
||||
},
|
||||
{ modelHasVision: false },
|
||||
);
|
||||
|
||||
expectNoImages(result);
|
||||
expect(result.content?.[0]).toMatchObject({
|
||||
type: "text",
|
||||
text: expect.stringMatching(/^MEDIA:/),
|
||||
});
|
||||
});
|
||||
|
||||
it("passes deviceId when provided", async () => {
|
||||
setupNodeInvokeMock({
|
||||
onInvoke: (invokeParams) => {
|
||||
|
|
@ -299,6 +336,130 @@ describe("nodes camera_clip", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe("nodes photos_latest", () => {
|
||||
it("returns empty content/details when no photos are available", async () => {
|
||||
setupNodeInvokeMock({
|
||||
onInvoke: (invokeParams) => {
|
||||
expect(invokeParams).toMatchObject({
|
||||
command: "photos.latest",
|
||||
params: {
|
||||
limit: 1,
|
||||
maxWidth: 1600,
|
||||
quality: 0.85,
|
||||
},
|
||||
});
|
||||
return {
|
||||
payload: {
|
||||
photos: [],
|
||||
},
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const result = await executeNodes(
|
||||
{
|
||||
action: "photos_latest",
|
||||
node: NODE_ID,
|
||||
},
|
||||
{ modelHasVision: false },
|
||||
);
|
||||
|
||||
expect(result.content ?? []).toEqual([]);
|
||||
expect(result.details).toEqual([]);
|
||||
});
|
||||
|
||||
it("returns MEDIA paths and no inline images when model has no vision", async () => {
|
||||
setupNodeInvokeMock({
|
||||
remoteIp: "198.51.100.42",
|
||||
onInvoke: (invokeParams) => {
|
||||
expect(invokeParams).toMatchObject({
|
||||
command: "photos.latest",
|
||||
params: {
|
||||
limit: 1,
|
||||
maxWidth: 1600,
|
||||
quality: 0.85,
|
||||
},
|
||||
});
|
||||
return {
|
||||
payload: {
|
||||
photos: [
|
||||
{
|
||||
format: "jpeg",
|
||||
base64: "aGVsbG8=",
|
||||
width: 1,
|
||||
height: 1,
|
||||
createdAt: "2026-03-04T00:00:00Z",
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const result = await executeNodes(
|
||||
{
|
||||
action: "photos_latest",
|
||||
node: NODE_ID,
|
||||
},
|
||||
{ modelHasVision: false },
|
||||
);
|
||||
|
||||
expectNoImages(result);
|
||||
expect(result.content?.[0]).toMatchObject({
|
||||
type: "text",
|
||||
text: expect.stringMatching(/^MEDIA:/),
|
||||
});
|
||||
const details = Array.isArray(result.details) ? result.details : [];
|
||||
expect(details[0]).toMatchObject({
|
||||
width: 1,
|
||||
height: 1,
|
||||
createdAt: "2026-03-04T00:00:00Z",
|
||||
});
|
||||
});
|
||||
|
||||
it("includes inline image blocks when model has vision", async () => {
|
||||
setupNodeInvokeMock({
|
||||
onInvoke: (invokeParams) => {
|
||||
expect(invokeParams).toMatchObject({
|
||||
command: "photos.latest",
|
||||
params: {
|
||||
limit: 1,
|
||||
maxWidth: 1600,
|
||||
quality: 0.85,
|
||||
},
|
||||
});
|
||||
return {
|
||||
payload: {
|
||||
photos: [
|
||||
{
|
||||
format: "jpeg",
|
||||
base64: "aGVsbG8=",
|
||||
width: 1,
|
||||
height: 1,
|
||||
createdAt: "2026-03-04T00:00:00Z",
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const result = await executeNodes(
|
||||
{
|
||||
action: "photos_latest",
|
||||
node: NODE_ID,
|
||||
},
|
||||
{ modelHasVision: true },
|
||||
);
|
||||
|
||||
expect(result.content?.[0]).toMatchObject({
|
||||
type: "text",
|
||||
text: expect.stringMatching(/^MEDIA:/),
|
||||
});
|
||||
expectSingleImage(result, { mimeType: "image/jpeg" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("nodes notifications_list", () => {
|
||||
it("invokes notifications.list and returns payload", async () => {
|
||||
setupNodeInvokeMock({
|
||||
|
|
@ -576,3 +737,44 @@ describe("nodes run", () => {
|
|||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("nodes invoke", () => {
|
||||
it("allows metadata-only camera.list via generic invoke", async () => {
|
||||
setupNodeInvokeMock({
|
||||
onInvoke: (invokeParams) => {
|
||||
expect(invokeParams).toMatchObject({
|
||||
command: "camera.list",
|
||||
params: {},
|
||||
});
|
||||
return {
|
||||
payload: {
|
||||
devices: [{ id: "cam-back", name: "Back Camera" }],
|
||||
},
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const result = await executeNodes({
|
||||
action: "invoke",
|
||||
node: NODE_ID,
|
||||
invokeCommand: "camera.list",
|
||||
});
|
||||
|
||||
expect(result.details).toMatchObject({
|
||||
payload: {
|
||||
devices: [{ id: "cam-back", name: "Back Camera" }],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("blocks media invoke commands to avoid base64 context bloat", async () => {
|
||||
await expect(
|
||||
executeNodes({
|
||||
action: "invoke",
|
||||
node: NODE_ID,
|
||||
invokeCommand: "photos.latest",
|
||||
invokeParamsJson: '{"limit":1}',
|
||||
}),
|
||||
).rejects.toThrow(/use action="photos_latest"/i);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -136,6 +136,7 @@ export function createOpenClawTools(options?: {
|
|||
currentChannelId: options?.currentChannelId,
|
||||
currentThreadTs: options?.currentThreadTs,
|
||||
config: options?.config,
|
||||
modelHasVision: options?.modelHasVision,
|
||||
}),
|
||||
createCronTool({
|
||||
agentSessionKey: options?.agentSessionKey,
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ const NODES_TOOL_ACTIONS = [
|
|||
"camera_snap",
|
||||
"camera_list",
|
||||
"camera_clip",
|
||||
"photos_latest",
|
||||
"screen_record",
|
||||
"location_get",
|
||||
"notifications_list",
|
||||
|
|
@ -56,6 +57,12 @@ const NOTIFY_DELIVERIES = ["system", "overlay", "auto"] as const;
|
|||
const NOTIFICATIONS_ACTIONS = ["open", "dismiss", "reply"] as const;
|
||||
const CAMERA_FACING = ["front", "back", "both"] as const;
|
||||
const LOCATION_ACCURACY = ["coarse", "balanced", "precise"] as const;
|
||||
const MEDIA_INVOKE_ACTIONS = {
|
||||
"camera.snap": "camera_snap",
|
||||
"camera.clip": "camera_clip",
|
||||
"photos.latest": "photos_latest",
|
||||
"screen.record": "screen_record",
|
||||
} as const;
|
||||
const NODE_READ_ACTION_COMMANDS = {
|
||||
camera_list: "camera.list",
|
||||
notifications_list: "notifications.list",
|
||||
|
|
@ -118,6 +125,7 @@ const NodesToolSchema = Type.Object({
|
|||
quality: Type.Optional(Type.Number()),
|
||||
delayMs: Type.Optional(Type.Number()),
|
||||
deviceId: Type.Optional(Type.String()),
|
||||
limit: Type.Optional(Type.Number()),
|
||||
duration: Type.Optional(Type.String()),
|
||||
durationMs: Type.Optional(Type.Number({ maximum: 300_000 })),
|
||||
includeAudio: Type.Optional(Type.Boolean()),
|
||||
|
|
@ -152,6 +160,7 @@ export function createNodesTool(options?: {
|
|||
currentChannelId?: string;
|
||||
currentThreadTs?: string | number;
|
||||
config?: OpenClawConfig;
|
||||
modelHasVision?: boolean;
|
||||
}): AnyAgentTool {
|
||||
const sessionKey = options?.agentSessionKey?.trim() || undefined;
|
||||
const turnSourceChannel = options?.agentChannel?.trim() || undefined;
|
||||
|
|
@ -167,7 +176,7 @@ export function createNodesTool(options?: {
|
|||
label: "Nodes",
|
||||
name: "nodes",
|
||||
description:
|
||||
"Discover and control paired nodes (status/describe/pairing/notify/camera/screen/location/notifications/run/invoke).",
|
||||
"Discover and control paired nodes (status/describe/pairing/notify/camera/photos/screen/location/notifications/run/invoke).",
|
||||
parameters: NodesToolSchema,
|
||||
execute: async (_toolCallId, args) => {
|
||||
const params = args as Record<string, unknown>;
|
||||
|
|
@ -301,7 +310,7 @@ export function createNodesTool(options?: {
|
|||
invalidPayloadMessage: "invalid camera.snap payload",
|
||||
});
|
||||
content.push({ type: "text", text: `MEDIA:${filePath}` });
|
||||
if (payload.base64) {
|
||||
if (options?.modelHasVision && payload.base64) {
|
||||
content.push({
|
||||
type: "image",
|
||||
data: payload.base64,
|
||||
|
|
@ -320,6 +329,103 @@ export function createNodesTool(options?: {
|
|||
const result: AgentToolResult<unknown> = { content, details };
|
||||
return await sanitizeToolResultImages(result, "nodes:camera_snap", imageSanitization);
|
||||
}
|
||||
case "photos_latest": {
|
||||
const node = readStringParam(params, "node", { required: true });
|
||||
const resolvedNode = await resolveNode(gatewayOpts, node);
|
||||
const nodeId = resolvedNode.nodeId;
|
||||
const limitRaw =
|
||||
typeof params.limit === "number" && Number.isFinite(params.limit)
|
||||
? Math.floor(params.limit)
|
||||
: DEFAULT_PHOTOS_LIMIT;
|
||||
const limit = Math.max(1, Math.min(limitRaw, MAX_PHOTOS_LIMIT));
|
||||
const maxWidth =
|
||||
typeof params.maxWidth === "number" && Number.isFinite(params.maxWidth)
|
||||
? params.maxWidth
|
||||
: DEFAULT_PHOTOS_MAX_WIDTH;
|
||||
const quality =
|
||||
typeof params.quality === "number" && Number.isFinite(params.quality)
|
||||
? params.quality
|
||||
: DEFAULT_PHOTOS_QUALITY;
|
||||
const raw = await callGatewayTool<{ payload: unknown }>("node.invoke", gatewayOpts, {
|
||||
nodeId,
|
||||
command: "photos.latest",
|
||||
params: {
|
||||
limit,
|
||||
maxWidth,
|
||||
quality,
|
||||
},
|
||||
idempotencyKey: crypto.randomUUID(),
|
||||
});
|
||||
const payload =
|
||||
raw?.payload && typeof raw.payload === "object" && !Array.isArray(raw.payload)
|
||||
? (raw.payload as Record<string, unknown>)
|
||||
: {};
|
||||
const photos = Array.isArray(payload.photos) ? payload.photos : [];
|
||||
|
||||
if (photos.length === 0) {
|
||||
const result: AgentToolResult<unknown> = {
|
||||
content: [],
|
||||
details: [],
|
||||
};
|
||||
return await sanitizeToolResultImages(
|
||||
result,
|
||||
"nodes:photos_latest",
|
||||
imageSanitization,
|
||||
);
|
||||
}
|
||||
|
||||
const content: AgentToolResult<unknown>["content"] = [];
|
||||
const details: Array<Record<string, unknown>> = [];
|
||||
|
||||
for (const [index, photoRaw] of photos.entries()) {
|
||||
const photo = parseCameraSnapPayload(photoRaw);
|
||||
const normalizedFormat = photo.format.toLowerCase();
|
||||
if (
|
||||
normalizedFormat !== "jpg" &&
|
||||
normalizedFormat !== "jpeg" &&
|
||||
normalizedFormat !== "png"
|
||||
) {
|
||||
throw new Error(`unsupported photos.latest format: ${photo.format}`);
|
||||
}
|
||||
const isJpeg = normalizedFormat === "jpg" || normalizedFormat === "jpeg";
|
||||
const filePath = cameraTempPath({
|
||||
kind: "snap",
|
||||
ext: isJpeg ? "jpg" : "png",
|
||||
id: crypto.randomUUID(),
|
||||
});
|
||||
await writeCameraPayloadToFile({
|
||||
filePath,
|
||||
payload: photo,
|
||||
expectedHost: resolvedNode.remoteIp,
|
||||
invalidPayloadMessage: "invalid photos.latest payload",
|
||||
});
|
||||
|
||||
content.push({ type: "text", text: `MEDIA:${filePath}` });
|
||||
if (options?.modelHasVision && photo.base64) {
|
||||
content.push({
|
||||
type: "image",
|
||||
data: photo.base64,
|
||||
mimeType:
|
||||
imageMimeFromFormat(photo.format) ?? (isJpeg ? "image/jpeg" : "image/png"),
|
||||
});
|
||||
}
|
||||
|
||||
const createdAt =
|
||||
photoRaw && typeof photoRaw === "object" && !Array.isArray(photoRaw)
|
||||
? (photoRaw as Record<string, unknown>).createdAt
|
||||
: undefined;
|
||||
details.push({
|
||||
index,
|
||||
path: filePath,
|
||||
width: photo.width,
|
||||
height: photo.height,
|
||||
...(typeof createdAt === "string" ? { createdAt } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
const result: AgentToolResult<unknown> = { content, details };
|
||||
return await sanitizeToolResultImages(result, "nodes:photos_latest", imageSanitization);
|
||||
}
|
||||
case "camera_list":
|
||||
case "notifications_list":
|
||||
case "device_status":
|
||||
|
|
@ -645,6 +751,14 @@ export function createNodesTool(options?: {
|
|||
const node = readStringParam(params, "node", { required: true });
|
||||
const nodeId = await resolveNodeId(gatewayOpts, node);
|
||||
const invokeCommand = readStringParam(params, "invokeCommand", { required: true });
|
||||
const invokeCommandNormalized = invokeCommand.trim().toLowerCase();
|
||||
const dedicatedAction =
|
||||
MEDIA_INVOKE_ACTIONS[invokeCommandNormalized as keyof typeof MEDIA_INVOKE_ACTIONS];
|
||||
if (dedicatedAction) {
|
||||
throw new Error(
|
||||
`invokeCommand "${invokeCommand}" returns media payloads and is blocked to prevent base64 context bloat; use action="${dedicatedAction}"`,
|
||||
);
|
||||
}
|
||||
const invokeParamsJson =
|
||||
typeof params.invokeParamsJson === "string" ? params.invokeParamsJson.trim() : "";
|
||||
let invokeParams: unknown = {};
|
||||
|
|
@ -695,3 +809,8 @@ export function createNodesTool(options?: {
|
|||
},
|
||||
};
|
||||
}
|
||||
|
||||
const DEFAULT_PHOTOS_LIMIT = 1;
|
||||
const MAX_PHOTOS_LIMIT = 20;
|
||||
const DEFAULT_PHOTOS_MAX_WIDTH = 1600;
|
||||
const DEFAULT_PHOTOS_QUALITY = 0.85;
|
||||
|
|
|
|||
Loading…
Reference in New Issue