openclaw/src/gateway/probe.ts

208 lines
5.9 KiB
TypeScript

import { randomUUID } from "node:crypto";
import { formatErrorMessage } from "../infra/errors.js";
import type { SystemPresence } from "../infra/system-presence.js";
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js";
import { GatewayClient } from "./client.js";
import { READ_SCOPE } from "./method-scopes.js";
import { isLoopbackHost } from "./net.js";
export type GatewayProbeAuth = {
token?: string;
password?: string;
};
export type GatewayProbeClose = {
code: number;
reason: string;
hint?: string;
};
export type GatewayProbeResult = {
ok: boolean;
url: string;
connectLatencyMs: number | null;
error: string | null;
close: GatewayProbeClose | null;
health: unknown;
status: unknown;
presence: SystemPresence[] | null;
configSnapshot: unknown;
};
export const MIN_PROBE_TIMEOUT_MS = 250;
export const MAX_TIMER_DELAY_MS = 2_147_483_647;
export function clampProbeTimeoutMs(timeoutMs: number): number {
return Math.min(MAX_TIMER_DELAY_MS, Math.max(MIN_PROBE_TIMEOUT_MS, timeoutMs));
}
export async function probeGateway(opts: {
url: string;
auth?: GatewayProbeAuth;
timeoutMs: number;
includeDetails?: boolean;
detailLevel?: "none" | "presence" | "full";
tlsFingerprint?: string;
}): Promise<GatewayProbeResult> {
const startedAt = Date.now();
const instanceId = randomUUID();
let connectLatencyMs: number | null = null;
let connectError: string | null = null;
let close: GatewayProbeClose | null = null;
const detailLevel = opts.includeDetails === false ? "none" : (opts.detailLevel ?? "full");
const deviceIdentity = await (async () => {
let hostname: string;
try {
hostname = new URL(opts.url).hostname;
} catch {
return null;
}
// Local authenticated probes should stay device-bound so read/detail RPCs
// are not scope-limited by the shared-auth scope stripping hardening.
if (isLoopbackHost(hostname) && !(opts.auth?.token || opts.auth?.password)) {
return null;
}
try {
const { loadOrCreateDeviceIdentity } = await import("../infra/device-identity.js");
return loadOrCreateDeviceIdentity();
} catch {
// Read-only or restricted environments should still be able to run
// token/password-auth detail probes without crashing on identity persistence.
return null;
}
})();
return await new Promise<GatewayProbeResult>((resolve) => {
let settled = false;
let timer: ReturnType<typeof setTimeout> | null = null;
const clearProbeTimer = () => {
if (timer) {
clearTimeout(timer);
timer = null;
}
};
const armProbeTimer = (onTimeout: () => void) => {
clearProbeTimer();
timer = setTimeout(onTimeout, clampProbeTimeoutMs(opts.timeoutMs));
};
const settle = (result: Omit<GatewayProbeResult, "url">) => {
if (settled) {
return;
}
settled = true;
clearProbeTimer();
client.stop();
resolve({ url: opts.url, ...result });
};
const client = new GatewayClient({
url: opts.url,
token: opts.auth?.token,
password: opts.auth?.password,
tlsFingerprint: opts.tlsFingerprint,
scopes: [READ_SCOPE],
clientName: GATEWAY_CLIENT_NAMES.CLI,
clientVersion: "dev",
mode: GATEWAY_CLIENT_MODES.PROBE,
instanceId,
deviceIdentity,
onConnectError: (err) => {
connectError = formatErrorMessage(err);
},
onClose: (code, reason) => {
close = { code, reason };
},
onHelloOk: async () => {
connectLatencyMs = Date.now() - startedAt;
if (detailLevel === "none") {
settle({
ok: true,
connectLatencyMs,
error: null,
close,
health: null,
status: null,
presence: null,
configSnapshot: null,
});
return;
}
// Once the gateway has accepted the session, a slow follow-up RPC should no longer
// downgrade the probe to "unreachable". Give detail fetching its own budget.
armProbeTimer(() => {
settle({
ok: false,
connectLatencyMs,
error: "timeout",
close,
health: null,
status: null,
presence: null,
configSnapshot: null,
});
});
try {
if (detailLevel === "presence") {
const presence = await client.request("system-presence");
settle({
ok: true,
connectLatencyMs,
error: null,
close,
health: null,
status: null,
presence: Array.isArray(presence) ? (presence as SystemPresence[]) : null,
configSnapshot: null,
});
return;
}
const [health, status, presence, configSnapshot] = await Promise.all([
client.request("health"),
client.request("status"),
client.request("system-presence"),
client.request("config.get", {}),
]);
settle({
ok: true,
connectLatencyMs,
error: null,
close,
health,
status,
presence: Array.isArray(presence) ? (presence as SystemPresence[]) : null,
configSnapshot,
});
} catch (err) {
settle({
ok: false,
connectLatencyMs,
error: formatErrorMessage(err),
close,
health: null,
status: null,
presence: null,
configSnapshot: null,
});
}
},
});
armProbeTimer(() => {
settle({
ok: false,
connectLatencyMs,
error: connectError ? `connect failed: ${connectError}` : "timeout",
close,
health: null,
status: null,
presence: null,
configSnapshot: null,
});
});
client.start();
});
}