fix(gateway): harden supervised lock and browser attach readiness

This commit is contained in:
Vincent Koc 2026-03-23 08:41:17 -07:00
parent b84a130788
commit beadd4c553
4 changed files with 51 additions and 1 deletions

View File

@ -13,6 +13,8 @@ Docs: https://docs.openclaw.ai
- Release/install: Keep previously released bundled plugins and Control UI assets in published openclaw npm installs, and fail release checks when those shipped artifacts are missing. Thanks @vincentkoc.
- Doctor/WhatsApp: stop auto-enable from appending built-in channel ids like `whatsapp` to `plugins.allow`, so `openclaw doctor --fix` no longer writes schema-invalid plugin allowlist entries when repairing built-in channels. Fixes #52931. Thanks @vincentkoc.
- Agents/Anthropic: preserve latest assistant thinking and redacted-thinking block ordering during transcript image sanitization so follow-up turns do not trip Anthropic's unmodified-thinking validation. (#52961) Thanks @vincentkoc.
- Gateway/supervision: stop lock conflicts from crash-looping under launchd and systemd by keeping the duplicate process in a retry wait instead of exiting as a failure while another healthy gateway still owns the lock. Fixes #52922. Thanks @vincentkoc.
- Browser/Chrome MCP: wait for existing-session browser tabs to become usable after attach instead of treating the initial Chrome MCP handshake as ready, which reduces user-profile timeouts and repeated consent churn on macOS Chrome attach flows. Fixes #52930. Thanks @vincentkoc.
## 2026.3.22

View File

@ -17,6 +17,8 @@ export const PROFILE_WS_REACHABILITY_MIN_TIMEOUT_MS = 200;
export const PROFILE_WS_REACHABILITY_MAX_TIMEOUT_MS = 2000;
export const PROFILE_ATTACH_RETRY_TIMEOUT_MS = 1200;
export const PROFILE_POST_RESTART_WS_TIMEOUT_MS = 600;
export const CHROME_MCP_ATTACH_READY_WINDOW_MS = 8000;
export const CHROME_MCP_ATTACH_READY_POLL_MS = 200;
function normalizeTimeoutMs(value: number | undefined): number | undefined {
if (typeof value !== "number" || !Number.isFinite(value)) {

View File

@ -1,5 +1,7 @@
import fs from "node:fs";
import {
CHROME_MCP_ATTACH_READY_POLL_MS,
CHROME_MCP_ATTACH_READY_WINDOW_MS,
PROFILE_ATTACH_RETRY_TIMEOUT_MS,
PROFILE_POST_RESTART_WS_TIMEOUT_MS,
resolveCdpReachabilityTimeouts,
@ -151,6 +153,25 @@ export function createProfileAvailability({
);
};
const waitForChromeMcpReadyAfterAttach = async (): Promise<void> => {
const deadlineMs = Date.now() + CHROME_MCP_ATTACH_READY_WINDOW_MS;
let lastError: unknown;
while (Date.now() < deadlineMs) {
try {
await listChromeMcpTabs(profile.name, profile.userDataDir);
return;
} catch (err) {
lastError = err;
}
await new Promise((r) => setTimeout(r, CHROME_MCP_ATTACH_READY_POLL_MS));
}
const detail = lastError instanceof Error ? ` Last error: ${lastError.message}` : "";
throw new BrowserProfileUnavailableError(
`Chrome MCP existing-session attach for profile "${profile.name}" timed out waiting for tabs to become available.` +
` Approve the browser attach prompt, keep the browser open, and retry.${detail}`,
);
};
const ensureBrowserAvailable = async (): Promise<void> => {
await reconcileProfileRuntime();
if (capabilities.usesChromeMcp) {
@ -160,6 +181,7 @@ export function createProfileAvailability({
);
}
await ensureChromeMcpAvailable(profile.name, profile.userDataDir);
await waitForChromeMcpReadyAfterAttach();
return;
}
const current = state();

View File

@ -19,6 +19,7 @@ import { setVerbose } from "../../globals.js";
import { GatewayLockError } from "../../infra/gateway-lock.js";
import { formatPortDiagnostics, inspectPortUsage } from "../../infra/ports.js";
import { cleanStaleGatewayProcessesSync } from "../../infra/restart-stale-pids.js";
import { detectRespawnSupervisor } from "../../infra/supervisor-markers.js";
import { setConsoleSubsystemFilter, setConsoleTimestampPrefix } from "../../logging/console.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { defaultRuntime } from "../../runtime.js";
@ -82,6 +83,8 @@ const GATEWAY_RUN_BOOLEAN_KEYS = [
"rawStream",
] as const;
const SUPERVISED_GATEWAY_LOCK_RETRY_MS = 5000;
const GATEWAY_AUTH_MODES: readonly GatewayAuthMode[] = [
"none",
"token",
@ -418,7 +421,7 @@ async function runGatewayCommand(opts: GatewayRunOpts) {
}
: undefined;
try {
const startLoop = async () =>
await runGatewayLoop({
runtime: defaultRuntime,
lockPort: port,
@ -429,6 +432,27 @@ async function runGatewayCommand(opts: GatewayRunOpts) {
tailscale: tailscaleOverride,
}),
});
try {
const supervisor = detectRespawnSupervisor(process.env);
while (true) {
try {
await startLoop();
break;
} catch (err) {
const isGatewayAlreadyRunning =
err instanceof GatewayLockError &&
typeof err.message === "string" &&
err.message.includes("gateway already running");
if (!supervisor || !isGatewayAlreadyRunning) {
throw err;
}
gatewayLog.warn(
`gateway already running under ${supervisor}; waiting ${SUPERVISED_GATEWAY_LOCK_RETRY_MS}ms before retrying startup`,
);
await new Promise((resolve) => setTimeout(resolve, SUPERVISED_GATEWAY_LOCK_RETRY_MS));
}
}
} catch (err) {
if (
err instanceof GatewayLockError ||