diff --git a/CHANGELOG.md b/CHANGELOG.md index d3648b85171..db371508f02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai - Agents/Anthropic: preserve latest assistant thinking and redacted-thinking block ordering during transcript image sanitization so follow-up turns do not trip Anthropic's unmodified-thinking validation. (#52961) Thanks @vincentkoc. - Gateway/supervision: stop lock conflicts from crash-looping under launchd and systemd by keeping the duplicate process in a retry wait instead of exiting as a failure while another healthy gateway still owns the lock. Fixes #52922. Thanks @vincentkoc. - Browser/Chrome MCP: wait for existing-session browser tabs to become usable after attach instead of treating the initial Chrome MCP handshake as ready, which reduces user-profile timeouts and repeated consent churn on macOS Chrome attach flows. Fixes #52930. Thanks @vincentkoc. +- Gateway/probe: stop successful gateway handshakes from timing out as unreachable while post-connect detail RPCs are still loading, so slow devices report a reachable RPC failure instead of a false negative dead gateway. Fixes #52927. Thanks @vincentkoc. ## 2026.3.22 diff --git a/src/gateway/probe.ts b/src/gateway/probe.ts index b285c395c3d..f09ad3c997f 100644 --- a/src/gateway/probe.ts +++ b/src/gateway/probe.ts @@ -64,12 +64,23 @@ export async function probeGateway(opts: { return await new Promise((resolve) => { let settled = false; + let timer: ReturnType | null = null; + const clearProbeTimer = () => { + if (timer) { + clearTimeout(timer); + timer = null; + } + }; + const armProbeTimer = (onTimeout: () => void) => { + clearProbeTimer(); + timer = setTimeout(onTimeout, clampProbeTimeoutMs(opts.timeoutMs)); + }; const settle = (result: Omit) => { if (settled) { return; } settled = true; - clearTimeout(timer); + clearProbeTimer(); client.stop(); resolve({ url: opts.url, ...result }); }; @@ -105,6 +116,20 @@ export async function probeGateway(opts: { }); return; } + // Once the gateway has accepted the session, a slow follow-up RPC should no longer + // downgrade the probe to "unreachable". Give detail fetching its own budget. + armProbeTimer(() => { + settle({ + ok: false, + connectLatencyMs, + error: "timeout", + close, + health: null, + status: null, + presence: null, + configSnapshot: null, + }); + }); try { if (detailLevel === "presence") { const presence = await client.request("system-presence"); @@ -151,7 +176,7 @@ export async function probeGateway(opts: { }, }); - const timer = setTimeout(() => { + armProbeTimer(() => { settle({ ok: false, connectLatencyMs, @@ -162,7 +187,7 @@ export async function probeGateway(opts: { presence: null, configSnapshot: null, }); - }, clampProbeTimeoutMs(opts.timeoutMs)); + }); client.start(); });