mirror of https://github.com/openclaw/openclaw.git
fix: improve WS handshake reliability on slow-startup environments (#60075)
* fix: import CHANNEL_IDS from leaf module to avoid TDZ on init (#48832) schema.ts and validation.ts imported CHANNEL_IDS from channels/registry.js, which re-exports from channels/ids.js but also imports plugins/runtime.js. When the bundler resolves this dependency graph, the re-exported CHANNEL_IDS can be undefined at the point config/validation.ts evaluates (temporal dead zone), causing 'CHANNEL_IDS is not iterable' on startup. Fix: import CHANNEL_IDS directly from channels/ids.js (the leaf module with zero heavy dependencies) and normalizeChatChannelId from channels/chat-meta.js. Fixes #48832 * fix: improve WS handshake reliability on slow-startup environments (#48736) On Windows with large dist bundles (46MB/639 files), heavy synchronous module loading blocks the event loop during CLI startup, preventing timely processing of the connect.challenge frame and causing ~80% handshake timeout failures. Changes: - Yield event loop (setImmediate) before starting WS connection in callGateway to let pending I/O drain after heavy module loading - Add OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS env var override for client-side connect challenge timeout (server already has OPENCLAW_HANDSHAKE_TIMEOUT_MS) - Include diagnostic timing in challenge timeout error messages (elapsed vs limit) for easier debugging - Add tests for env var override and resolution logic --------- Co-authored-by: Brad Groux <bradgroux@users.noreply.github.com>
This commit is contained in:
parent
0aa98a8e3b
commit
6e94b047e2
|
|
@ -800,6 +800,11 @@ async function executeGatewayRequestWithScopes<T>(params: {
|
|||
}): Promise<T> {
|
||||
const { opts, scopes, url, token, password, tlsFingerprint, timeoutMs, safeTimerTimeoutMs } =
|
||||
params;
|
||||
// Yield to the event loop before starting the WebSocket connection.
|
||||
// On Windows with large dist bundles, heavy synchronous module loading
|
||||
// can starve the event loop, preventing timely processing of the
|
||||
// connect.challenge frame and causing handshake timeouts (#48736).
|
||||
await new Promise<void>((r) => setImmediate(r));
|
||||
return await new Promise<T>((resolve, reject) => {
|
||||
let settled = false;
|
||||
let ignoreClose = false;
|
||||
|
|
|
|||
|
|
@ -727,12 +727,18 @@ export class GatewayClient {
|
|||
|
||||
private armConnectChallengeTimeout() {
|
||||
const connectChallengeTimeoutMs = resolveGatewayClientConnectChallengeTimeoutMs(this.opts);
|
||||
const armedAt = Date.now();
|
||||
this.clearConnectChallengeTimeout();
|
||||
this.connectTimer = setTimeout(() => {
|
||||
if (this.connectSent || this.ws?.readyState !== WebSocket.OPEN) {
|
||||
return;
|
||||
}
|
||||
this.opts.onConnectError?.(new Error("gateway connect challenge timeout"));
|
||||
const elapsedMs = Date.now() - armedAt;
|
||||
this.opts.onConnectError?.(
|
||||
new Error(
|
||||
`gateway connect challenge timeout (waited ${elapsedMs}ms, limit ${connectChallengeTimeoutMs}ms)`,
|
||||
),
|
||||
);
|
||||
this.ws?.close(1008, "connect challenge timeout");
|
||||
}, connectChallengeTimeoutMs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { describe, expect, test } from "vitest";
|
|||
import {
|
||||
clampConnectChallengeTimeoutMs,
|
||||
DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS,
|
||||
getConnectChallengeTimeoutMsFromEnv,
|
||||
getPreauthHandshakeTimeoutMsFromEnv,
|
||||
MAX_CONNECT_CHALLENGE_TIMEOUT_MS,
|
||||
MIN_CONNECT_CHALLENGE_TIMEOUT_MS,
|
||||
|
|
@ -34,4 +35,30 @@ describe("gateway handshake timeouts", () => {
|
|||
}),
|
||||
).toBe(20);
|
||||
});
|
||||
|
||||
test("getConnectChallengeTimeoutMsFromEnv reads OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS", () => {
|
||||
expect(getConnectChallengeTimeoutMsFromEnv({})).toBeUndefined();
|
||||
expect(
|
||||
getConnectChallengeTimeoutMsFromEnv({ OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS: "15000" }),
|
||||
).toBe(15_000);
|
||||
expect(
|
||||
getConnectChallengeTimeoutMsFromEnv({ OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS: "garbage" }),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
test("resolveConnectChallengeTimeoutMs falls back to env override", () => {
|
||||
const original = process.env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS;
|
||||
try {
|
||||
process.env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS = "5000";
|
||||
expect(resolveConnectChallengeTimeoutMs()).toBe(5_000);
|
||||
// Explicit value still takes precedence over env
|
||||
expect(resolveConnectChallengeTimeoutMs(3_000)).toBe(3_000);
|
||||
} finally {
|
||||
if (original === undefined) {
|
||||
delete process.env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS;
|
||||
} else {
|
||||
process.env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS = original;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -9,10 +9,28 @@ export function clampConnectChallengeTimeoutMs(timeoutMs: number): number {
|
|||
);
|
||||
}
|
||||
|
||||
export function getConnectChallengeTimeoutMsFromEnv(
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): number | undefined {
|
||||
const raw = env.OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS;
|
||||
if (raw) {
|
||||
const parsed = Number(raw);
|
||||
if (Number.isFinite(parsed) && parsed > 0) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveConnectChallengeTimeoutMs(timeoutMs?: number | null): number {
|
||||
return typeof timeoutMs === "number" && Number.isFinite(timeoutMs)
|
||||
? clampConnectChallengeTimeoutMs(timeoutMs)
|
||||
: DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS;
|
||||
if (typeof timeoutMs === "number" && Number.isFinite(timeoutMs)) {
|
||||
return clampConnectChallengeTimeoutMs(timeoutMs);
|
||||
}
|
||||
const envOverride = getConnectChallengeTimeoutMsFromEnv();
|
||||
if (envOverride !== undefined) {
|
||||
return clampConnectChallengeTimeoutMs(envOverride);
|
||||
}
|
||||
return DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
export function getPreauthHandshakeTimeoutMsFromEnv(env: NodeJS.ProcessEnv = process.env): number {
|
||||
|
|
|
|||
Loading…
Reference in New Issue