fix(gateway): avoid probe false negatives after connect

This commit is contained in:
Vincent Koc 2026-03-23 08:49:31 -07:00
parent 1e5f38a1a8
commit 93df5f613e
2 changed files with 29 additions and 3 deletions

View File

@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
- Agents/Anthropic: preserve latest assistant thinking and redacted-thinking block ordering during transcript image sanitization so follow-up turns do not trip Anthropic's unmodified-thinking validation. (#52961) Thanks @vincentkoc.
- Gateway/supervision: stop lock conflicts from crash-looping under launchd and systemd by keeping the duplicate process in a retry wait instead of exiting as a failure while another healthy gateway still owns the lock. Fixes #52922. Thanks @vincentkoc.
- Browser/Chrome MCP: wait for existing-session browser tabs to become usable after attach instead of treating the initial Chrome MCP handshake as ready, which reduces user-profile timeouts and repeated consent churn on macOS Chrome attach flows. Fixes #52930. Thanks @vincentkoc.
- Gateway/probe: stop successful gateway handshakes from timing out as unreachable while post-connect detail RPCs are still loading, so slow devices report a reachable RPC failure instead of a false negative dead gateway. Fixes #52927. Thanks @vincentkoc.
## 2026.3.22

View File

@ -64,12 +64,23 @@ export async function probeGateway(opts: {
return await new Promise<GatewayProbeResult>((resolve) => {
let settled = false;
let timer: ReturnType<typeof setTimeout> | null = null;
const clearProbeTimer = () => {
if (timer) {
clearTimeout(timer);
timer = null;
}
};
const armProbeTimer = (onTimeout: () => void) => {
clearProbeTimer();
timer = setTimeout(onTimeout, clampProbeTimeoutMs(opts.timeoutMs));
};
const settle = (result: Omit<GatewayProbeResult, "url">) => {
if (settled) {
return;
}
settled = true;
clearTimeout(timer);
clearProbeTimer();
client.stop();
resolve({ url: opts.url, ...result });
};
@ -105,6 +116,20 @@ export async function probeGateway(opts: {
});
return;
}
// Once the gateway has accepted the session, a slow follow-up RPC should no longer
// downgrade the probe to "unreachable". Give detail fetching its own budget.
armProbeTimer(() => {
settle({
ok: false,
connectLatencyMs,
error: "timeout",
close,
health: null,
status: null,
presence: null,
configSnapshot: null,
});
});
try {
if (detailLevel === "presence") {
const presence = await client.request("system-presence");
@ -151,7 +176,7 @@ export async function probeGateway(opts: {
},
});
const timer = setTimeout(() => {
armProbeTimer(() => {
settle({
ok: false,
connectLatencyMs,
@ -162,7 +187,7 @@ export async function probeGateway(opts: {
presence: null,
configSnapshot: null,
});
}, clampProbeTimeoutMs(opts.timeoutMs));
});
client.start();
});