refactor: share stale pid polling fixtures

2026-03-13 20:36:47 +00:00 · 2026-03-13 20:36:47 +00:00 · 8c21284c1c
parent bf631b5872
commit 8c21284c1c
1 changed files with 85 additions and 181 deletions
--- a/src/infra/restart-stale-pids.test.ts
+++ b/src/infra/restart-stale-pids.test.ts
@ -42,6 +42,51 @@ function lsofOutput(entries: Array<{ pid: number; cmd: string }>): string {
  return entries.map(({ pid, cmd }) => `p${pid}\nc${cmd}`).join("\n") + "\n";
 }

+type MockLsofResult = {
+  error: Error | null;
+  status: number | null;
+  stdout: string;
+  stderr: string;
+};
+
+function createLsofResult(overrides: Partial<MockLsofResult> = {}): MockLsofResult {
+  return {
+    error: null,
+    status: 0,
+    stdout: "",
+    stderr: "",
+    ...overrides,
+  };
+}
+
+function createOpenClawBusyResult(pid: number, overrides: Partial<MockLsofResult> = {}) {
+  return createLsofResult({
+    stdout: lsofOutput([{ pid, cmd: "openclaw-gateway" }]),
+    ...overrides,
+  });
+}
+
+function createErrnoResult(code: string, message: string) {
+  const error = new Error(message) as NodeJS.ErrnoException;
+  error.code = code;
+  return createLsofResult({ error, status: null });
+}
+
+function installInitialBusyPoll(
+  stalePid: number,
+  resolvePoll: (call: number) => MockLsofResult,
+): () => number {
+  let call = 0;
+  mockSpawnSync.mockImplementation(() => {
+    call += 1;
+    if (call === 1) {
+      return createOpenClawBusyResult(stalePid);
+    }
+    return resolvePoll(call);
+  });
+  return () => call;
+}
+
 describe.skipIf(isWindows)("restart-stale-pids", () => {
  beforeEach(() => {
    mockSpawnSync.mockReset();
@ -201,20 +246,7 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // lsof exits with status 1 when no matching processes are found — this is
      // the canonical "port is free" signal, not an error.
      const stalePid = process.pid + 500;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        // Poll returns status 1 — no listeners
-        return { error: null, status: 1, stdout: "", stderr: "" };
-      });
+      installInitialBusyPoll(stalePid, () => createLsofResult({ status: 1 }));
      vi.spyOn(process, "kill").mockReturnValue(true);
      // Should complete cleanly (port reported free on status 1)
      expect(() => cleanStaleGatewayProcessesSync()).not.toThrow();
@ -225,27 +257,17 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // bad flag, runtime error) must not be mapped to free:true. They are
      // inconclusive and should keep the polling loop running until budget expires.
      const stalePid = process.pid + 501;
-      let call = 0;
      const events: string[] = [];
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          events.push("initial-find");
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      events.push("initial-find");
+      installInitialBusyPoll(stalePid, (call) => {
        if (call === 2) {
          // Permission/runtime error — status 2, should NOT be treated as free
          events.push("error-poll");
-          return { error: null, status: 2, stdout: "", stderr: "lsof: permission denied" };
+          return createLsofResult({ status: 2, stderr: "lsof: permission denied" });
        }
        // Eventually port is free
        events.push("free-poll");
-        return { error: null, status: 1, stdout: "", stderr: "" };
+        return createLsofResult({ status: 1 });
      });
      vi.spyOn(process, "kill").mockReturnValue(true);
      cleanStaleGatewayProcessesSync();
@ -263,29 +285,13 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // The fix: pollPortOnce now parses res.stdout directly from the first
      // spawnSync call. Exactly ONE lsof invocation per poll cycle.
      const stalePid = process.pid + 400;
-      let spawnCount = 0;
-      mockSpawnSync.mockImplementation(() => {
-        spawnCount++;
-        if (spawnCount === 1) {
-          // Initial findGatewayPidsOnPortSync — returns stale pid
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        if (spawnCount === 2) {
+      const getCallCount = installInitialBusyPoll(stalePid, (call) => {
+        if (call === 2) {
          // First waitForPortFreeSync poll — status 0, port busy (should parse inline, not spawn again)
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
+          return createOpenClawBusyResult(stalePid);
        }
        // Port free on third call
-        return { error: null, status: 0, stdout: "", stderr: "" };
+        return createLsofResult();
      });

      vi.spyOn(process, "kill").mockReturnValue(true);
@ -294,7 +300,7 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // If pollPortOnce made a second lsof call internally, spawnCount would
      // be at least 4 (initial + 2 polls each doubled). With the fix, each poll
      // is exactly one spawn: initial(1) + busy-poll(1) + free-poll(1) = 3.
-      expect(spawnCount).toBe(3);
+      expect(getCallCount()).toBe(3);
    });

    it("lsof status 1 with non-empty openclaw stdout is treated as busy, not free (Linux container edge case)", () => {
@ -302,34 +308,21 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // lsof can exit 1 AND still emit output for processes it could read.
      // status 1 + non-empty openclaw stdout must not be treated as port-free.
      const stalePid = process.pid + 601;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          // Initial scan: finds stale pid
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      const getCallCount = installInitialBusyPoll(stalePid, (call) => {
        if (call === 2) {
          // status 1 + openclaw pid in stdout — container-restricted lsof reports partial results
-          return {
-            error: null,
+          return createOpenClawBusyResult(stalePid, {
            status: 1,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
            stderr: "lsof: WARNING: can't stat() fuse",
-          };
+          });
        }
        // Third poll: port is genuinely free
-        return { error: null, status: 1, stdout: "", stderr: "" };
+        return createLsofResult({ status: 1 });
      });
      vi.spyOn(process, "kill").mockReturnValue(true);
      cleanStaleGatewayProcessesSync();
      // Poll 2 returned busy (not free), so we must have polled at least 3 times
-      expect(call).toBeGreaterThanOrEqual(3);
+      expect(getCallCount()).toBeGreaterThanOrEqual(3);
    });

    it("pollPortOnce outer catch returns { free: null, permanent: false } when resolveLsofCommandSync throws", () => {
@ -382,20 +375,7 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {

    it("sends SIGTERM to stale pids and returns them", () => {
      const stalePid = process.pid + 100;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        // waitForPortFreeSync polls: port free immediately
-        return { error: null, status: 0, stdout: "", stderr: "" };
-      });
+      installInitialBusyPoll(stalePid, () => createLsofResult());

      const killSpy = vi.spyOn(process, "kill").mockReturnValue(true);
      const result = cleanStaleGatewayProcessesSync();
@ -474,24 +454,11 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // immediately on ENOENT rather than spinning the full 2-second budget.
      const stalePid = process.pid + 300;
      const events: string[] = [];
-      let call = 0;
-
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          events.push("initial-find");
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      events.push("initial-find");
+      installInitialBusyPoll(stalePid, (call) => {
        // Permanent ENOENT — lsof is not installed
        events.push(`enoent-poll-${call}`);
-        const err = new Error("lsof not found") as NodeJS.ErrnoException;
-        err.code = "ENOENT";
-        return { error: err, status: null, stdout: "", stderr: "" };
+        return createErrnoResult("ENOENT", "lsof not found");
      });

      vi.spyOn(process, "kill").mockReturnValue(true);
@ -506,50 +473,26 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // EPERM occurs when lsof exists but a MAC policy (SELinux/AppArmor) blocks
      // execution. Like ENOENT/EACCES, this is permanent — retrying is pointless.
      const stalePid = process.pid + 305;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        const err = new Error("lsof eperm") as NodeJS.ErrnoException;
-        err.code = "EPERM";
-        return { error: err, status: null, stdout: "", stderr: "" };
-      });
+      const getCallCount = installInitialBusyPoll(stalePid, () =>
+        createErrnoResult("EPERM", "lsof eperm"),
+      );
      vi.spyOn(process, "kill").mockReturnValue(true);
      expect(() => cleanStaleGatewayProcessesSync()).not.toThrow();
      // Must bail after exactly 1 EPERM poll — same as ENOENT/EACCES
-      expect(call).toBe(2); // 1 initial find + 1 EPERM poll
+      expect(getCallCount()).toBe(2); // 1 initial find + 1 EPERM poll
    });

    it("bails immediately when lsof is permanently unavailable (EACCES) — same as ENOENT", () => {
      // EACCES and EPERM are also permanent conditions — lsof exists but the
      // process has no permission to run it. No point retrying.
      const stalePid = process.pid + 302;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
-        const err = new Error("lsof permission denied") as NodeJS.ErrnoException;
-        err.code = "EACCES";
-        return { error: err, status: null, stdout: "", stderr: "" };
-      });
+      const getCallCount = installInitialBusyPoll(stalePid, () =>
+        createErrnoResult("EACCES", "lsof permission denied"),
+      );
      vi.spyOn(process, "kill").mockReturnValue(true);
      expect(() => cleanStaleGatewayProcessesSync()).not.toThrow();
      // Should have bailed after exactly 1 poll call (the EACCES one)
-      expect(call).toBe(2); // 1 initial find + 1 EACCES poll
+      expect(getCallCount()).toBe(2); // 1 initial find + 1 EACCES poll
    });

    it("proceeds with warning when polling budget is exhausted — fake clock, no real 2s wait", () => {
@ -561,15 +504,10 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      let fakeNow = 0;
      __testing.setDateNowOverride(() => fakeNow);

-      mockSpawnSync.mockImplementation(() => {
+      installInitialBusyPoll(stalePid, () => {
        // Advance clock by PORT_FREE_TIMEOUT_MS + 1ms on first poll to trip the deadline.
        fakeNow += 2001;
-        return {
-          error: null,
-          status: 0,
-          stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-          stderr: "",
-        };
+        return createOpenClawBusyResult(stalePid);
      });

      vi.spyOn(process, "kill").mockReturnValue(true);
@ -585,24 +523,13 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // leaving its socket in TIME_WAIT / FIN_WAIT. Skipping the poll would
      // silently recreate the EADDRINUSE race we are fixing.
      const stalePid = process.pid + 304;
-      let call = 0;
      const events: string[] = [];

-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          // Initial scan: finds stale pid
-          events.push("initial-find");
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      events.push("initial-find");
+      installInitialBusyPoll(stalePid, () => {
        // Port is already free on first poll — pid was dead before SIGTERM
        events.push("poll-free");
-        return { error: null, status: 1, stdout: "", stderr: "" };
+        return createLsofResult({ status: 1 });
      });

      // All SIGTERMs throw ESRCH — pid already gone
@ -623,27 +550,16 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // would recreate the EADDRINUSE race this PR is designed to prevent.
      const stalePid = process.pid + 301;
      const events: string[] = [];
-      let call = 0;
-
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          events.push("initial-find");
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      events.push("initial-find");
+      installInitialBusyPoll(stalePid, (call) => {
        if (call === 2) {
          // Transient: spawnSync timeout (no ENOENT code)
          events.push("transient-error");
-          return { error: new Error("timeout"), status: null, stdout: "", stderr: "" };
+          return createLsofResult({ error: new Error("timeout"), status: null });
        }
        // Port free on the next poll
        events.push("port-free");
-        return { error: null, status: 1, stdout: "", stderr: "" };
+        return createLsofResult({ status: 1 });
      });

      vi.spyOn(process, "kill").mockReturnValue(true);
@ -739,30 +655,18 @@ describe.skipIf(isWindows)("restart-stale-pids", () => {
      // the port may be held by an unrelated process. From our perspective
      // (we only kill openclaw pids) it is effectively free.
      const stalePid = process.pid + 800;
-      let call = 0;
-      mockSpawnSync.mockImplementation(() => {
-        call++;
-        if (call === 1) {
-          return {
-            error: null,
-            status: 0,
-            stdout: lsofOutput([{ pid: stalePid, cmd: "openclaw-gateway" }]),
-            stderr: "",
-          };
-        }
+      const getCallCount = installInitialBusyPoll(stalePid, () => {
        // status 1 + non-openclaw output — should be treated as free:true for our purposes
-        return {
-          error: null,
+        return createLsofResult({
          status: 1,
          stdout: lsofOutput([{ pid: process.pid + 801, cmd: "caddy" }]),
-          stderr: "",
-        };
+        });
      });
      vi.spyOn(process, "kill").mockReturnValue(true);
      // Should complete cleanly — no openclaw pids in status-1 output → free
      expect(() => cleanStaleGatewayProcessesSync()).not.toThrow();
      // Completed in exactly 2 calls (initial find + 1 free poll)
-      expect(call).toBe(2);
+      expect(getCallCount()).toBe(2);
    });
  });