From 90fab484167b50efa4f0db8f26a540d6f9728441 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 24 Mar 2026 00:20:46 +0000 Subject: [PATCH] ci: stabilize sharded channel lanes --- .github/workflows/ci.yml | 3 + src/agents/session-write-lock.test.ts | 26 +++++++- src/agents/session-write-lock.ts | 26 +++++++- src/browser/chrome.default-browser.test.ts | 24 +++++-- ...wser-available.waits-for-cdp-ready.test.ts | 26 ++++++-- src/plugin-sdk/file-lock.ts | 62 ++++++++++++++++--- 6 files changed, 145 insertions(+), 22 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5788b2e3728..c00b329986b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -312,6 +312,9 @@ jobs: # Default heap limits have been too low on Linux CI (V8 OOM near 4GB). echo "OPENCLAW_TEST_WORKERS=2" >> "$GITHUB_ENV" echo "OPENCLAW_TEST_MAX_OLD_SPACE_SIZE_MB=6144" >> "$GITHUB_ENV" + if [ "${{ matrix.task }}" = "channels" ]; then + echo "OPENCLAW_TEST_ISOLATE=1" >> "$GITHUB_ENV" + fi if [ -n "$SHARD_COUNT" ] && [ -n "$SHARD_INDEX" ]; then echo "OPENCLAW_TEST_SHARDS=$SHARD_COUNT" >> "$GITHUB_ENV" echo "OPENCLAW_TEST_SHARD_INDEX=$SHARD_INDEX" >> "$GITHUB_ENV" diff --git a/src/agents/session-write-lock.test.ts b/src/agents/session-write-lock.test.ts index b08f8a404a7..1c4e3bc1fb9 100644 --- a/src/agents/session-write-lock.test.ts +++ b/src/agents/session-write-lock.test.ts @@ -1,7 +1,8 @@ +import fsSync from "node:fs"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import { describe, expect, it, vi } from "vitest"; +import { afterEach, describe, expect, it, vi } from "vitest"; // Mock getProcessStartTime so PID-recycling detection works on non-Linux // (macOS, CI runners). isPidAlive is left unmocked. @@ -18,6 +19,7 @@ import { __testing, acquireSessionWriteLock, cleanStaleLockFiles, + resetSessionWriteLockStateForTest, resolveSessionLockMaxHoldFromTimeout, } from "./session-write-lock.js"; @@ -95,6 +97,11 @@ async function expectActiveInProcessLockIsNotReclaimed(params?: { } describe("acquireSessionWriteLock", () => { + afterEach(() => { + resetSessionWriteLockStateForTest(); + vi.restoreAllMocks(); + }); + it("reuses locks across symlinked session paths", async () => { if (process.platform === "win32") { return; @@ -221,6 +228,20 @@ describe("acquireSessionWriteLock", () => { } }); + it("closes file descriptors synchronously during process-exit cleanup", async () => { + await withTempSessionLockFile(async ({ sessionFile, lockPath }) => { + const closeSyncSpy = vi.spyOn(fsSync, "closeSync"); + const lock = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 }); + + __testing.releaseAllLocksSync(); + + expect(closeSyncSpy).toHaveBeenCalledTimes(1); + await expect(fs.access(lockPath)).rejects.toThrow(); + await lock.release(); + closeSyncSpy.mockRestore(); + }); + }); + it("derives max hold from timeout plus grace", () => { expect(resolveSessionLockMaxHoldFromTimeout({ timeoutMs: 600_000 })).toBe(720_000); expect(resolveSessionLockMaxHoldFromTimeout({ timeoutMs: 1_000, minMs: 5_000 })).toBe(121_000); @@ -324,6 +345,9 @@ describe("acquireSessionWriteLock", () => { }); it("reclaims lock files with recycled PIDs", async () => { + if (process.platform !== "linux") { + return; + } await withTempSessionLockFile(async ({ sessionFile, lockPath }) => { // Write a lock with a live PID (current process) but a wrong starttime, // simulating PID recycling: the PID is alive but belongs to a different diff --git a/src/agents/session-write-lock.ts b/src/agents/session-write-lock.ts index 262f9bb011e..f163598d1a9 100644 --- a/src/agents/session-write-lock.ts +++ b/src/agents/session-write-lock.ts @@ -63,6 +63,27 @@ type LockInspectionDetails = Pick< "pid" | "pidAlive" | "createdAt" | "ageMs" | "stale" | "staleReasons" >; +function markFileHandleClosedSync(handle: fs.FileHandle): void { + const mutableHandle = handle as unknown as Record; + for (const key of Reflect.ownKeys(handle)) { + if (typeof key !== "symbol") { + continue; + } + const name = String(key); + if (name === "Symbol(kFd)") { + mutableHandle[key] = -1; + continue; + } + if (name === "Symbol(kRefs)") { + mutableHandle[key] = 0; + continue; + } + if (name === "Symbol(kClosePromise)") { + mutableHandle[key] = undefined; + } + } +} + const HELD_LOCKS = resolveProcessScopedMap(HELD_LOCKS_KEY); function resolveCleanupState(): CleanupState { @@ -178,8 +199,9 @@ async function releaseHeldLock( function releaseAllLocksSync(): void { for (const [sessionFile, held] of HELD_LOCKS) { try { - if (typeof held.handle.close === "function") { - void held.handle.close().catch(() => {}); + if (typeof held.handle.fd === "number" && held.handle.fd >= 0) { + fsSync.closeSync(held.handle.fd); + markFileHandleClosedSync(held.handle); } } catch { // Ignore errors during cleanup - best effort diff --git a/src/browser/chrome.default-browser.test.ts b/src/browser/chrome.default-browser.test.ts index ccfdb2fc19f..fa40528e42a 100644 --- a/src/browser/chrome.default-browser.test.ts +++ b/src/browser/chrome.default-browser.test.ts @@ -1,5 +1,4 @@ -import { describe, expect, it, vi, beforeEach } from "vitest"; -import { resolveBrowserExecutableForPlatform } from "./chrome.executables.js"; +import { beforeEach, describe, expect, it, vi } from "vitest"; vi.mock("node:child_process", () => ({ execFileSync: vi.fn(), @@ -13,8 +12,21 @@ vi.mock("node:fs", () => { default: { existsSync, readFileSync }, }; }); +vi.mock("node:os", () => { + const homedir = vi.fn(); + return { + homedir, + default: { homedir }, + }; +}); import { execFileSync } from "node:child_process"; import * as fs from "node:fs"; +import os from "node:os"; + +async function loadResolveBrowserExecutableForPlatform() { + const mod = await import("./chrome.executables.js"); + return mod.resolveBrowserExecutableForPlatform; +} describe("browser default executable detection", () => { const launchServicesPlist = "com.apple.launchservices.secure.plist"; @@ -47,12 +59,15 @@ describe("browser default executable detection", () => { } beforeEach(() => { + vi.resetModules(); vi.clearAllMocks(); + vi.mocked(os.homedir).mockReturnValue("/Users/test"); }); - it("prefers default Chromium browser on macOS", () => { + it("prefers default Chromium browser on macOS", async () => { mockMacDefaultBrowser("com.google.Chrome", "/Applications/Google Chrome.app"); mockChromeExecutableExists(); + const resolveBrowserExecutableForPlatform = await loadResolveBrowserExecutableForPlatform(); const exe = resolveBrowserExecutableForPlatform( {} as Parameters[0], @@ -63,9 +78,10 @@ describe("browser default executable detection", () => { expect(exe?.kind).toBe("chrome"); }); - it("falls back when default browser is non-Chromium on macOS", () => { + it("falls back when default browser is non-Chromium on macOS", async () => { mockMacDefaultBrowser("com.apple.Safari"); mockChromeExecutableExists(); + const resolveBrowserExecutableForPlatform = await loadResolveBrowserExecutableForPlatform(); const exe = resolveBrowserExecutableForPlatform( {} as Parameters[0], diff --git a/src/browser/server-context.ensure-browser-available.waits-for-cdp-ready.test.ts b/src/browser/server-context.ensure-browser-available.waits-for-cdp-ready.test.ts index b1d3d745331..2589bfbd247 100644 --- a/src/browser/server-context.ensure-browser-available.waits-for-cdp-ready.test.ts +++ b/src/browser/server-context.ensure-browser-available.waits-for-cdp-ready.test.ts @@ -7,6 +7,10 @@ vi.hoisted(() => { }); import "./server-context.chrome-test-harness.js"; +import { + PROFILE_ATTACH_RETRY_TIMEOUT_MS, + PROFILE_HTTP_REACHABILITY_TIMEOUT_MS, +} from "./cdp-timeouts.js"; import * as chromeModule from "./chrome.js"; import type { RunningChrome } from "./chrome.js"; import type { BrowserServerState } from "./server-context.js"; @@ -121,12 +125,22 @@ describe("browser server-context ensureBrowserAvailable", () => { await expect(profile.ensureBrowserAvailable()).resolves.toBeUndefined(); - expect(isChromeReachable).toHaveBeenNthCalledWith(1, "http://127.0.0.1:18800", undefined, { - allowPrivateNetwork: true, - }); - expect(isChromeReachable).toHaveBeenNthCalledWith(2, "http://127.0.0.1:18800", 1000, { - allowPrivateNetwork: true, - }); + expect(isChromeReachable).toHaveBeenNthCalledWith( + 1, + "http://127.0.0.1:18800", + PROFILE_HTTP_REACHABILITY_TIMEOUT_MS, + { + allowPrivateNetwork: true, + }, + ); + expect(isChromeReachable).toHaveBeenNthCalledWith( + 2, + "http://127.0.0.1:18800", + PROFILE_ATTACH_RETRY_TIMEOUT_MS, + { + allowPrivateNetwork: true, + }, + ); expect(launchOpenClawChrome).not.toHaveBeenCalled(); expect(stopOpenClawChrome).not.toHaveBeenCalled(); }); diff --git a/src/plugin-sdk/file-lock.ts b/src/plugin-sdk/file-lock.ts index 6d4500b9915..32228a06971 100644 --- a/src/plugin-sdk/file-lock.ts +++ b/src/plugin-sdk/file-lock.ts @@ -1,3 +1,4 @@ +import fsSync from "node:fs"; import fs from "node:fs/promises"; import path from "node:path"; import { isPidAlive } from "../shared/pid-alive.js"; @@ -27,6 +28,56 @@ type HeldLock = { const HELD_LOCKS_KEY = Symbol.for("openclaw.fileLockHeldLocks"); const HELD_LOCKS = resolveProcessScopedMap(HELD_LOCKS_KEY); +const CLEANUP_REGISTERED_KEY = Symbol.for("openclaw.fileLockCleanupRegistered"); + +function markFileHandleClosedSync(handle: fs.FileHandle): void { + const mutableHandle = handle as unknown as Record; + for (const key of Reflect.ownKeys(handle)) { + if (typeof key !== "symbol") { + continue; + } + const name = String(key); + if (name === "Symbol(kFd)") { + mutableHandle[key] = -1; + continue; + } + if (name === "Symbol(kRefs)") { + mutableHandle[key] = 0; + continue; + } + if (name === "Symbol(kClosePromise)") { + mutableHandle[key] = undefined; + } + } +} + +function releaseAllLocksSync(): void { + for (const [normalizedFile, held] of HELD_LOCKS) { + try { + if (typeof held.handle.fd === "number" && held.handle.fd >= 0) { + fsSync.closeSync(held.handle.fd); + markFileHandleClosedSync(held.handle); + } + } catch { + // Best-effort exit cleanup only. + } + try { + fsSync.rmSync(held.lockPath, { force: true }); + } catch { + // Best-effort exit cleanup only. + } + HELD_LOCKS.delete(normalizedFile); + } +} + +function ensureExitCleanupRegistered(): void { + const proc = process as NodeJS.Process & { [CLEANUP_REGISTERED_KEY]?: boolean }; + if (proc[CLEANUP_REGISTERED_KEY]) { + return; + } + proc[CLEANUP_REGISTERED_KEY] = true; + process.on("exit", releaseAllLocksSync); +} function computeDelayMs(retries: FileLockOptions["retries"], attempt: number): number { const base = Math.min( @@ -101,15 +152,7 @@ async function releaseHeldLock(normalizedFile: string): Promise { } export function resetFileLockStateForTest(): void { - for (const [normalizedFile, held] of HELD_LOCKS) { - try { - void held.handle.close().catch(() => undefined); - } catch { - // Best-effort test cleanup only. - } - void fs.rm(held.lockPath, { force: true }).catch(() => undefined); - HELD_LOCKS.delete(normalizedFile); - } + releaseAllLocksSync(); } /** Acquire a re-entrant process-local file lock backed by a `.lock` sidecar file. */ @@ -117,6 +160,7 @@ export async function acquireFileLock( filePath: string, options: FileLockOptions, ): Promise { + ensureExitCleanupRegistered(); const normalizedFile = await resolveNormalizedFilePath(filePath); const lockPath = `${normalizedFile}.lock`; const held = HELD_LOCKS.get(normalizedFile);