From 8c21284c1cbc151ab22b5f0aa724da21e5ea6a28 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Fri, 13 Mar 2026 20:36:47 +0000
Subject: [PATCH] refactor: share stale pid polling fixtures

---
 src/infra/restart-stale-pids.test.ts | 266 +++++++++------------------
 1 file changed, 85 insertions(+), 181 deletions(-)
diff --git a/src/infra/restart-stale-pids.test.ts b/src/infra/restart-stale-pids.test.ts
index f7bf0709d9f..b7589d26e15 100644
--- a/src/infra/restart-stale-pids.test.ts
+++ b/src/infra/restart-stale-pids.test.ts
@@ -42,6 +42,51 @@ function lsofOutput(entries: Array<{ pid: number; cmd: string }>): string {
   return entries.map(({ pid, cmd }) => `p${pid}\nc${cmd}`).join("\n") + "\n";
 }
 
+type MockLsofResult = {
+  error: Error | null;
+  status: number | null;
+  stdout: string;
+  stderr: string;
+};
+
+function createLsofResult(overrides: Partial<MockLsofResult> = {}): MockLsofResult {
+  return {
+    error: null,
+    status: 0,
+    stdout: "",
+    stderr: "",
+    ...overrides,
+  };
+}
+
+function createOpenClawBusyResult(pid: number, overrides: Partial<MockLsofResult> = {}) {
+  return createLsofResult({
+    stdout: lsofOutput([{ pid, cmd: "openclaw-gateway" }]),
+    ...overrides,
+  });
+}
+
+function createErrnoResult(code: string, message: string) {
+  const error = new Error(message) as NodeJS.ErrnoException;
+  error.code = code;
+  return createLsofResult({ error, status: null });
+}
+
+function installInitialBusyPoll(
+  stalePid: number,
+  resolvePoll: (call: number) => MockLsofResult,
+): () => number {
+  let call = 0;
+  mockSpawnSync.mockImplementation(() => {
+    call += 1;
+    if (call === 1) {
+      return createOpenClawBusyResult(stalePid);
+    }
+    return resolvePoll(call);
+  });
+  return () => call;
+}
+
 describe.skipIf(isWindows)("restart-stale-pids", () => {
   beforeEach(() => {
     mockSpawnSync.mockReset();
@@ -201,20 +246,7 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // lsof exits with status 1 when no matching processes are found — this is
       // the canonical "port is free" signal, not an error.
       const stalePid = process.pid + 500;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        // Poll returns status 1 — no listeners
-        return { error: null, status: 1, stdout: "", stderr: "" };
-      });
+      installInitialBusyPoll(stalePid, () => createLsofResult({ status: 1 }));
       vi.spyOn(process, "kill").mockReturnValue(true);
       // Should complete cleanly (port reported free on status 1)
       expect(() => cleanStaleGatewayProcessesSync()).not.toThrow();
@@ -225,27 +257,17 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // bad flag, runtime error) must not be mapped to free:true. They are
       // inconclusive and should keep the polling loop running until budget expires.
       const stalePid = process.pid + 501;
-      let call = 0;
       const events: string[] = [];
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          events.push("initial-find");
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      events.push("initial-find");
+      installInitialBusyPoll(stalePid, (call) => {
         if (call === 2) {
           // Permission/runtime error — status 2, should NOT be treated as free
           events.push("error-poll");
-          return { error: null, status: 2, stdout: "", stderr: "lsof: permission denied" };
+          return createLsofResult({ status: 2, stderr: "lsof: permission denied" });
         }
         // Eventually port is free
         events.push("free-poll");
-        return { error: null, status: 1, stdout: "", stderr: "" };
+        return createLsofResult({ status: 1 });
       });
       vi.spyOn(process, "kill").mockReturnValue(true);
       cleanStaleGatewayProcessesSync();
@@ -263,29 +285,13 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // The fix: pollPortOnce now parses res.stdout directly from the first
       // spawnSync call. Exactly ONE lsof invocation per poll cycle.
       const stalePid = process.pid + 400;
-      let spawnCount = 0;
-      mockSpawnSync.mockImplementation(() => {
-        spawnCount++;
-        if (spawnCount === 1) {
-          // Initial findGatewayPidsOnPortSync — returns stale pid
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        if (spawnCount === 2) {
+      const getCallCount = installInitialBusyPoll(stalePid, (call) => {
+        if (call === 2) {
           // First waitForPortFreeSync poll — status 0, port busy (should parse inline, not spawn again)
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
+          return createOpenClawBusyResult(stalePid);
         }
         // Port free on third call
-        return { error: null, status: 0, stdout: "", stderr: "" };
+        return createLsofResult();
       });
 
       vi.spyOn(process, "kill").mockReturnValue(true);
@@ -294,7 +300,7 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // If pollPortOnce made a second lsof call internally, spawnCount would
       // be at least 4 (initial + 2 polls each doubled). With the fix, each poll
       // is exactly one spawn: initial(1) + busy-poll(1) + free-poll(1) = 3.
-      expect(spawnCount).toBe(3);
+      expect(getCallCount()).toBe(3);
     });
 
     it("lsof status 1 with non-empty openclaw stdout is treated as busy, not free (Linux container edge case)", () => {
@@ -302,34 +308,21 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // lsof can exit 1 AND still emit output for processes it could read.
       // status 1 + non-empty openclaw stdout must not be treated as port-free.
       const stalePid = process.pid + 601;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          // Initial scan: finds stale pid
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      const getCallCount = installInitialBusyPoll(stalePid, (call) => {
         if (call === 2) {
           // status 1 + openclaw pid in stdout — container-restricted lsof reports partial results
-          return {
-            error: null,
+          return createOpenClawBusyResult(stalePid, {
             status: 1,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
             stderr: "lsof: WARNING: can't stat() fuse",
-          };
+          });
         }
         // Third poll: port is genuinely free
-        return { error: null, status: 1, stdout: "", stderr: "" };
+        return createLsofResult({ status: 1 });
       });
       vi.spyOn(process, "kill").mockReturnValue(true);
       cleanStaleGatewayProcessesSync();
       // Poll 2 returned busy (not free), so we must have polled at least 3 times
-      expect(call).toBeGreaterThanOrEqual(3);
+      expect(getCallCount()).toBeGreaterThanOrEqual(3);
     });
 
     it("pollPortOnce outer catch returns { free: null, permanent: false } when resolveLsofCommandSync throws", () => {
@@ -382,20 +375,7 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
 
     it("sends SIGTERM to stale pids and returns them", () => {
       const stalePid = process.pid + 100;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        // waitForPortFreeSync polls: port free immediately
-        return { error: null, status: 0, stdout: "", stderr: "" };
-      });
+      installInitialBusyPoll(stalePid, () => createLsofResult());
 
       const killSpy = vi.spyOn(process, "kill").mockReturnValue(true);
       const result = cleanStaleGatewayProcessesSync();
@@ -474,24 +454,11 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // immediately on ENOENT rather than spinning the full 2-second budget.
       const stalePid = process.pid + 300;
       const events: string[] = [];
-      let call = 0;
-
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          events.push("initial-find");
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      events.push("initial-find");
+      installInitialBusyPoll(stalePid, (call) => {
         // Permanent ENOENT — lsof is not installed
         events.push(`enoent-poll-${call}`);
-        const err = new Error("lsof not found") as NodeJS.ErrnoException;
-        err.code = "ENOENT";
-        return { error: err, status: null, stdout: "", stderr: "" };
+        return createErrnoResult("ENOENT", "lsof not found");
       });
 
       vi.spyOn(process, "kill").mockReturnValue(true);
@@ -506,50 +473,26 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // EPERM occurs when lsof exists but a MAC policy (SELinux/AppArmor) blocks
       // execution. Like ENOENT/EACCES, this is permanent — retrying is pointless.
       const stalePid = process.pid + 305;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        const err = new Error("lsof eperm") as NodeJS.ErrnoException;
-        err.code = "EPERM";
-        return { error: err, status: null, stdout: "", stderr: "" };
-      });
+      const getCallCount = installInitialBusyPoll(stalePid, () =>
+        createErrnoResult("EPERM", "lsof eperm"),
+      );
       vi.spyOn(process, "kill").mockReturnValue(true);
       expect(() => cleanStaleGatewayProcessesSync()).not.toThrow();
       // Must bail after exactly 1 EPERM poll — same as ENOENT/EACCES
-      expect(call).toBe(2); // 1 initial find + 1 EPERM poll
+      expect(getCallCount()).toBe(2); // 1 initial find + 1 EPERM poll
     });
 
     it("bails immediately when lsof is permanently unavailable (EACCES) — same as ENOENT", () => {
       // EACCES and EPERM are also permanent conditions — lsof exists but the
       // process has no permission to run it. No point retrying.
       const stalePid = process.pid + 302;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        const err = new Error("lsof permission denied") as NodeJS.ErrnoException;
-        err.code = "EACCES";
-        return { error: err, status: null, stdout: "", stderr: "" };
-      });
+      const getCallCount = installInitialBusyPoll(stalePid, () =>
+        createErrnoResult("EACCES", "lsof permission denied"),
+      );
       vi.spyOn(process, "kill").mockReturnValue(true);
       expect(() => cleanStaleGatewayProcessesSync()).not.toThrow();
       // Should have bailed after exactly 1 poll call (the EACCES one)
-      expect(call).toBe(2); // 1 initial find + 1 EACCES poll
+      expect(getCallCount()).toBe(2); // 1 initial find + 1 EACCES poll
     });
 
     it("proceeds with warning when polling budget is exhausted — fake clock, no real 2s wait", () => {
@@ -561,15 +504,10 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       let fakeNow = 0;
       __testing.setDateNowOverride(() => fakeNow);
 
-      mockSpawnSync.mockImplementation(() => {
+      installInitialBusyPoll(stalePid, () => {
         // Advance clock by PORT_FREE_TIMEOUT_MS + 1ms on first poll to trip the deadline.
         fakeNow += 2001;
-        return {
-          error: null,
-          status: 0,
-          stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-          stderr: "",
-        };
+        return createOpenClawBusyResult(stalePid);
       });
 
       vi.spyOn(process, "kill").mockReturnValue(true);
@@ -585,24 +523,13 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // leaving its socket in TIME_WAIT / FIN_WAIT. Skipping the poll would
       // silently recreate the EADDRINUSE race we are fixing.
       const stalePid = process.pid + 304;
-      let call = 0;
       const events: string[] = [];
 
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          // Initial scan: finds stale pid
-          events.push("initial-find");
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      events.push("initial-find");
+      installInitialBusyPoll(stalePid, () => {
         // Port is already free on first poll — pid was dead before SIGTERM
         events.push("poll-free");
-        return { error: null, status: 1, stdout: "", stderr: "" };
+        return createLsofResult({ status: 1 });
       });
 
       // All SIGTERMs throw ESRCH — pid already gone
@@ -623,27 +550,16 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // would recreate the EADDRINUSE race this PR is designed to prevent.
       const stalePid = process.pid + 301;
       const events: string[] = [];
-      let call = 0;
-
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          events.push("initial-find");
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      events.push("initial-find");
+      installInitialBusyPoll(stalePid, (call) => {
         if (call === 2) {
           // Transient: spawnSync timeout (no ENOENT code)
           events.push("transient-error");
-          return { error: new Error("timeout"), status: null, stdout: "", stderr: "" };
+          return createLsofResult({ error: new Error("timeout"), status: null });
         }
         // Port free on the next poll
         events.push("port-free");
-        return { error: null, status: 1, stdout: "", stderr: "" };
+        return createLsofResult({ status: 1 });
       });
 
       vi.spyOn(process, "kill").mockReturnValue(true);
@@ -739,30 +655,18 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
       // the port may be held by an unrelated process. From our perspective
       // (we only kill openclaw pids) it is effectively free.
       const stalePid = process.pid + 800;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      const getCallCount = installInitialBusyPoll(stalePid, () => {
         // status 1 + non-openclaw output — should be treated as free:true for our purposes
-        return {
-          error: null,
+        return createLsofResult({
           status: 1,
           stdout: lsofOutput([{ pid: process.pid + 801, cmd: "caddy" }]),
-          stderr: "",
-        };
+        });
       });
       vi.spyOn(process, "kill").mockReturnValue(true);
       // Should complete cleanly — no openclaw pids in status-1 output → free
       expect(() => cleanStaleGatewayProcessesSync()).not.toThrow();
       // Completed in exactly 2 calls (initial find + 1 free poll)
-      expect(call).toBe(2);
+      expect(getCallCount()).toBe(2);
     });
   });