ci: stabilize sharded channel lanes

This commit is contained in:
Peter Steinberger 2026-03-24 00:20:46 +00:00
parent e32148f1dd
commit 90fab48416
6 changed files with 145 additions and 22 deletions

View File

@ -312,6 +312,9 @@ jobs:
# Default heap limits have been too low on Linux CI (V8 OOM near 4GB).
echo "OPENCLAW_TEST_WORKERS=2" >> "$GITHUB_ENV"
echo "OPENCLAW_TEST_MAX_OLD_SPACE_SIZE_MB=6144" >> "$GITHUB_ENV"
if [ "${{ matrix.task }}" = "channels" ]; then
echo "OPENCLAW_TEST_ISOLATE=1" >> "$GITHUB_ENV"
fi
if [ -n "$SHARD_COUNT" ] && [ -n "$SHARD_INDEX" ]; then
echo "OPENCLAW_TEST_SHARDS=$SHARD_COUNT" >> "$GITHUB_ENV"
echo "OPENCLAW_TEST_SHARD_INDEX=$SHARD_INDEX" >> "$GITHUB_ENV"

View File

@ -1,7 +1,8 @@
import fsSync from "node:fs";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it, vi } from "vitest";
import { afterEach, describe, expect, it, vi } from "vitest";
// Mock getProcessStartTime so PID-recycling detection works on non-Linux
// (macOS, CI runners). isPidAlive is left unmocked.
@ -18,6 +19,7 @@ import {
__testing,
acquireSessionWriteLock,
cleanStaleLockFiles,
resetSessionWriteLockStateForTest,
resolveSessionLockMaxHoldFromTimeout,
} from "./session-write-lock.js";
@ -95,6 +97,11 @@ async function expectActiveInProcessLockIsNotReclaimed(params?: {
}
describe("acquireSessionWriteLock", () => {
afterEach(() => {
resetSessionWriteLockStateForTest();
vi.restoreAllMocks();
});
it("reuses locks across symlinked session paths", async () => {
if (process.platform === "win32") {
return;
@ -221,6 +228,20 @@ describe("acquireSessionWriteLock", () => {
}
});
it("closes file descriptors synchronously during process-exit cleanup", async () => {
await withTempSessionLockFile(async ({ sessionFile, lockPath }) => {
const closeSyncSpy = vi.spyOn(fsSync, "closeSync");
const lock = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 });
__testing.releaseAllLocksSync();
expect(closeSyncSpy).toHaveBeenCalledTimes(1);
await expect(fs.access(lockPath)).rejects.toThrow();
await lock.release();
closeSyncSpy.mockRestore();
});
});
it("derives max hold from timeout plus grace", () => {
expect(resolveSessionLockMaxHoldFromTimeout({ timeoutMs: 600_000 })).toBe(720_000);
expect(resolveSessionLockMaxHoldFromTimeout({ timeoutMs: 1_000, minMs: 5_000 })).toBe(121_000);
@ -324,6 +345,9 @@ describe("acquireSessionWriteLock", () => {
});
it("reclaims lock files with recycled PIDs", async () => {
if (process.platform !== "linux") {
return;
}
await withTempSessionLockFile(async ({ sessionFile, lockPath }) => {
// Write a lock with a live PID (current process) but a wrong starttime,
// simulating PID recycling: the PID is alive but belongs to a different

View File

@ -63,6 +63,27 @@ type LockInspectionDetails = Pick<
"pid" | "pidAlive" | "createdAt" | "ageMs" | "stale" | "staleReasons"
>;
function markFileHandleClosedSync(handle: fs.FileHandle): void {
const mutableHandle = handle as unknown as Record<PropertyKey, unknown>;
for (const key of Reflect.ownKeys(handle)) {
if (typeof key !== "symbol") {
continue;
}
const name = String(key);
if (name === "Symbol(kFd)") {
mutableHandle[key] = -1;
continue;
}
if (name === "Symbol(kRefs)") {
mutableHandle[key] = 0;
continue;
}
if (name === "Symbol(kClosePromise)") {
mutableHandle[key] = undefined;
}
}
}
const HELD_LOCKS = resolveProcessScopedMap<HeldLock>(HELD_LOCKS_KEY);
function resolveCleanupState(): CleanupState {
@ -178,8 +199,9 @@ async function releaseHeldLock(
function releaseAllLocksSync(): void {
for (const [sessionFile, held] of HELD_LOCKS) {
try {
if (typeof held.handle.close === "function") {
void held.handle.close().catch(() => {});
if (typeof held.handle.fd === "number" && held.handle.fd >= 0) {
fsSync.closeSync(held.handle.fd);
markFileHandleClosedSync(held.handle);
}
} catch {
// Ignore errors during cleanup - best effort

View File

@ -1,5 +1,4 @@
import { describe, expect, it, vi, beforeEach } from "vitest";
import { resolveBrowserExecutableForPlatform } from "./chrome.executables.js";
import { beforeEach, describe, expect, it, vi } from "vitest";
vi.mock("node:child_process", () => ({
execFileSync: vi.fn(),
@ -13,8 +12,21 @@ vi.mock("node:fs", () => {
default: { existsSync, readFileSync },
};
});
vi.mock("node:os", () => {
const homedir = vi.fn();
return {
homedir,
default: { homedir },
};
});
import { execFileSync } from "node:child_process";
import * as fs from "node:fs";
import os from "node:os";
async function loadResolveBrowserExecutableForPlatform() {
const mod = await import("./chrome.executables.js");
return mod.resolveBrowserExecutableForPlatform;
}
describe("browser default executable detection", () => {
const launchServicesPlist = "com.apple.launchservices.secure.plist";
@ -47,12 +59,15 @@ describe("browser default executable detection", () => {
}
beforeEach(() => {
vi.resetModules();
vi.clearAllMocks();
vi.mocked(os.homedir).mockReturnValue("/Users/test");
});
it("prefers default Chromium browser on macOS", () => {
it("prefers default Chromium browser on macOS", async () => {
mockMacDefaultBrowser("com.google.Chrome", "/Applications/Google Chrome.app");
mockChromeExecutableExists();
const resolveBrowserExecutableForPlatform = await loadResolveBrowserExecutableForPlatform();
const exe = resolveBrowserExecutableForPlatform(
{} as Parameters<typeof resolveBrowserExecutableForPlatform>[0],
@ -63,9 +78,10 @@ describe("browser default executable detection", () => {
expect(exe?.kind).toBe("chrome");
});
it("falls back when default browser is non-Chromium on macOS", () => {
it("falls back when default browser is non-Chromium on macOS", async () => {
mockMacDefaultBrowser("com.apple.Safari");
mockChromeExecutableExists();
const resolveBrowserExecutableForPlatform = await loadResolveBrowserExecutableForPlatform();
const exe = resolveBrowserExecutableForPlatform(
{} as Parameters<typeof resolveBrowserExecutableForPlatform>[0],

View File

@ -7,6 +7,10 @@ vi.hoisted(() => {
});
import "./server-context.chrome-test-harness.js";
import {
PROFILE_ATTACH_RETRY_TIMEOUT_MS,
PROFILE_HTTP_REACHABILITY_TIMEOUT_MS,
} from "./cdp-timeouts.js";
import * as chromeModule from "./chrome.js";
import type { RunningChrome } from "./chrome.js";
import type { BrowserServerState } from "./server-context.js";
@ -121,12 +125,22 @@ describe("browser server-context ensureBrowserAvailable", () => {
await expect(profile.ensureBrowserAvailable()).resolves.toBeUndefined();
expect(isChromeReachable).toHaveBeenNthCalledWith(1, "http://127.0.0.1:18800", undefined, {
allowPrivateNetwork: true,
});
expect(isChromeReachable).toHaveBeenNthCalledWith(2, "http://127.0.0.1:18800", 1000, {
allowPrivateNetwork: true,
});
expect(isChromeReachable).toHaveBeenNthCalledWith(
1,
"http://127.0.0.1:18800",
PROFILE_HTTP_REACHABILITY_TIMEOUT_MS,
{
allowPrivateNetwork: true,
},
);
expect(isChromeReachable).toHaveBeenNthCalledWith(
2,
"http://127.0.0.1:18800",
PROFILE_ATTACH_RETRY_TIMEOUT_MS,
{
allowPrivateNetwork: true,
},
);
expect(launchOpenClawChrome).not.toHaveBeenCalled();
expect(stopOpenClawChrome).not.toHaveBeenCalled();
});

View File

@ -1,3 +1,4 @@
import fsSync from "node:fs";
import fs from "node:fs/promises";
import path from "node:path";
import { isPidAlive } from "../shared/pid-alive.js";
@ -27,6 +28,56 @@ type HeldLock = {
const HELD_LOCKS_KEY = Symbol.for("openclaw.fileLockHeldLocks");
const HELD_LOCKS = resolveProcessScopedMap<HeldLock>(HELD_LOCKS_KEY);
const CLEANUP_REGISTERED_KEY = Symbol.for("openclaw.fileLockCleanupRegistered");
function markFileHandleClosedSync(handle: fs.FileHandle): void {
const mutableHandle = handle as unknown as Record<PropertyKey, unknown>;
for (const key of Reflect.ownKeys(handle)) {
if (typeof key !== "symbol") {
continue;
}
const name = String(key);
if (name === "Symbol(kFd)") {
mutableHandle[key] = -1;
continue;
}
if (name === "Symbol(kRefs)") {
mutableHandle[key] = 0;
continue;
}
if (name === "Symbol(kClosePromise)") {
mutableHandle[key] = undefined;
}
}
}
function releaseAllLocksSync(): void {
for (const [normalizedFile, held] of HELD_LOCKS) {
try {
if (typeof held.handle.fd === "number" && held.handle.fd >= 0) {
fsSync.closeSync(held.handle.fd);
markFileHandleClosedSync(held.handle);
}
} catch {
// Best-effort exit cleanup only.
}
try {
fsSync.rmSync(held.lockPath, { force: true });
} catch {
// Best-effort exit cleanup only.
}
HELD_LOCKS.delete(normalizedFile);
}
}
function ensureExitCleanupRegistered(): void {
const proc = process as NodeJS.Process & { [CLEANUP_REGISTERED_KEY]?: boolean };
if (proc[CLEANUP_REGISTERED_KEY]) {
return;
}
proc[CLEANUP_REGISTERED_KEY] = true;
process.on("exit", releaseAllLocksSync);
}
function computeDelayMs(retries: FileLockOptions["retries"], attempt: number): number {
const base = Math.min(
@ -101,15 +152,7 @@ async function releaseHeldLock(normalizedFile: string): Promise<void> {
}
export function resetFileLockStateForTest(): void {
for (const [normalizedFile, held] of HELD_LOCKS) {
try {
void held.handle.close().catch(() => undefined);
} catch {
// Best-effort test cleanup only.
}
void fs.rm(held.lockPath, { force: true }).catch(() => undefined);
HELD_LOCKS.delete(normalizedFile);
}
releaseAllLocksSync();
}
/** Acquire a re-entrant process-local file lock backed by a `.lock` sidecar file. */
@ -117,6 +160,7 @@ export async function acquireFileLock(
filePath: string,
options: FileLockOptions,
): Promise<FileLockHandle> {
ensureExitCleanupRegistered();
const normalizedFile = await resolveNormalizedFilePath(filePath);
const lockPath = `${normalizedFile}.lock`;
const held = HELD_LOCKS.get(normalizedFile);