fix: recover unloaded macOS launch agents (#43766)

This commit is contained in:
Peter Steinberger 2026-04-05 09:01:05 +01:00
parent 07e7b7177f
commit fd968bfb2d
8 changed files with 276 additions and 15 deletions

View File

@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai
- MS Teams: download inline DM images via Graph API and preserve channel reply threading in proactive fallback. (#52212, #55198)
- Agents/Claude CLI: persist explicit `openclaw agent --session-id` runs under a stable session key so follow-ups can reuse the stored CLI binding and resume the same underlying Claude session.
- Agents/CLI backends: invalidate stored CLI session reuse when local CLI login state or the selected auth profile credential changes, so relogin and token rotation stop resuming stale sessions.
- Gateway/macOS: recover installed-but-unloaded LaunchAgents during `openclaw gateway start` and `restart`, while still preferring live unmanaged gateways during restart recovery. (#43766) Thanks @HenryC-3.
- Auth/failover: persist selected fallback overrides before retrying, shorten `auth_permanent` lockouts, and refresh websocket/shared-auth sessions only when real auth changes occur so retries and secret rotations behave predictably. (#60404, #60323, #60387)
- Cron: replay interrupted recurring jobs on the first gateway restart instead of waiting for a second restart. (#60583) Thanks @joelnishanth.
- Plugins/media understanding: enable bundled Groq and Deepgram providers by default so configured transcription models work without extra plugin activation config. (#59982) Thanks @yxjsxy.

View File

@ -132,6 +132,28 @@ describe("runServiceStart config pre-flight (#35862)", () => {
expect(service.restart).not.toHaveBeenCalled();
});
it("aborts before not-loaded start recovery when config is invalid", async () => {
const onNotLoaded = vi.fn(async () => ({
result: "started" as const,
loaded: true,
}));
setConfigSnapshot({
exists: true,
valid: false,
issues: [{ path: "agents.defaults.pdfModel", message: "Unrecognized key" }],
});
await expect(
runServiceStart({
...createServiceRunArgs(),
onNotLoaded,
}),
).rejects.toThrow("__exit__:1");
expect(onNotLoaded).not.toHaveBeenCalled();
expect(service.restart).not.toHaveBeenCalled();
});
it("proceeds with start when config is valid", async () => {
setConfigSnapshot({ exists: true, valid: true });

View File

@ -218,6 +218,33 @@ describe("runServiceRestart token drift", () => {
expect(service.stop).not.toHaveBeenCalled();
});
it("emits started when a not-loaded start path repairs the service", async () => {
service.isLoaded.mockResolvedValue(false);
await runServiceStart({
serviceNoun: "Gateway",
service,
renderStartHints: () => [],
opts: { json: true },
onNotLoaded: async () => ({
result: "started",
message:
"Gateway LaunchAgent was installed but not loaded; re-bootstrapped launchd service.",
loaded: true,
}),
});
const payload = readJsonLog<{
result?: string;
message?: string;
service?: { loaded?: boolean };
}>();
expect(payload.result).toBe("started");
expect(payload.message).toContain("re-bootstrapped");
expect(payload.service?.loaded).toBe(true);
expect(service.restart).not.toHaveBeenCalled();
});
it("runs restart health checks after an unmanaged restart signal", async () => {
const postRestartCheck = vi.fn(async () => {});
service.isLoaded.mockResolvedValue(false);
@ -242,6 +269,36 @@ describe("runServiceRestart token drift", () => {
expect(payload.message).toContain("unmanaged process");
});
it("emits loaded restart state when launchd repair handles a not-loaded restart", async () => {
const postRestartCheck = vi.fn(async () => {});
service.isLoaded.mockResolvedValue(false);
await runServiceRestart({
serviceNoun: "Gateway",
service,
renderStartHints: () => [],
opts: { json: true },
onNotLoaded: async () => ({
result: "restarted",
message:
"Gateway LaunchAgent was installed but not loaded; re-bootstrapped launchd service.",
loaded: true,
}),
postRestartCheck,
});
expect(postRestartCheck).toHaveBeenCalledTimes(1);
expect(service.restart).not.toHaveBeenCalled();
const payload = readJsonLog<{
result?: string;
message?: string;
service?: { loaded?: boolean };
}>();
expect(payload.result).toBe("restarted");
expect(payload.message).toContain("re-bootstrapped");
expect(payload.service?.loaded).toBe(true);
});
it("skips restart health checks when restart is only scheduled", async () => {
const postRestartCheck = vi.fn(async () => {});
service.restart.mockResolvedValue({ outcome: "scheduled" });

View File

@ -31,9 +31,10 @@ type RestartPostCheckContext = {
};
type NotLoadedActionResult = {
result: "stopped" | "restarted";
result: "started" | "stopped" | "restarted";
message?: string;
warnings?: string[];
loaded?: boolean;
};
type NotLoadedActionContext = {
@ -186,17 +187,17 @@ export async function runServiceStart(params: {
service: GatewayService;
renderStartHints: () => string[];
opts?: DaemonLifecycleOptions;
onNotLoaded?: (ctx: NotLoadedActionContext) => Promise<NotLoadedActionResult | null>;
}) {
const json = Boolean(params.opts?.json);
const { stdout, emit, fail } = createDaemonActionContext({ action: "start", json });
const loaded = await resolveServiceLoadedOrFail({
serviceNoun: params.serviceNoun,
service: params.service,
fail,
});
if (
(await resolveServiceLoadedOrFail({
serviceNoun: params.serviceNoun,
service: params.service,
fail,
})) === null
) {
if (loaded === null) {
return;
}
// Pre-flight config validation (#35862) — run for both loaded and not-loaded
@ -210,6 +211,28 @@ export async function runServiceStart(params: {
return;
}
}
if (!loaded) {
try {
const handled = await params.onNotLoaded?.({ json, stdout, fail });
if (handled) {
emit({
ok: true,
result: handled.result,
message: handled.message,
warnings: handled.warnings,
service: buildDaemonServiceSnapshot(params.service, handled.loaded ?? false),
});
if (!json && handled.message) {
defaultRuntime.log(handled.message);
}
return;
}
} catch (err) {
const hints = params.renderStartHints();
fail(`${params.serviceNoun} start failed: ${String(err)}`, hints);
return;
}
}
try {
const startResult = await startGatewayService(params.service, { env: process.env, stdout });
if (startResult.outcome === "missing-install") {
@ -332,6 +355,7 @@ export async function runServiceRestart(params: {
const { stdout, emit, fail } = createDaemonActionContext({ action: "restart", json });
const warnings: string[] = [];
let handledNotLoaded: NotLoadedActionResult | null = null;
let recoveredLoadedState: boolean | null = null;
const emitScheduledRestart = (
restartStatus: ReturnType<typeof describeGatewayServiceRestart>,
serviceLoaded: boolean,
@ -392,6 +416,7 @@ export async function runServiceRestart(params: {
if (handledNotLoaded.warnings?.length) {
warnings.push(...handledNotLoaded.warnings);
}
recoveredLoadedState = handledNotLoaded.loaded ?? null;
}
if (loaded && params.checkTokenDrift) {
@ -437,14 +462,14 @@ export async function runServiceRestart(params: {
}
let restartStatus = describeGatewayServiceRestart(params.serviceNoun, restartResult);
if (restartStatus.scheduled) {
return emitScheduledRestart(restartStatus, loaded);
return emitScheduledRestart(restartStatus, loaded || recoveredLoadedState === true);
}
if (params.postRestartCheck) {
const postRestartResult = await params.postRestartCheck({ json, stdout, warnings, fail });
if (postRestartResult) {
restartStatus = describeGatewayServiceRestart(params.serviceNoun, postRestartResult);
if (restartStatus.scheduled) {
return emitScheduledRestart(restartStatus, loaded);
return emitScheduledRestart(restartStatus, loaded || recoveredLoadedState === true);
}
}
}
@ -455,6 +480,8 @@ export async function runServiceRestart(params: {
} catch {
restarted = true;
}
} else if (recoveredLoadedState !== null) {
restarted = recoveredLoadedState;
}
emit({
ok: true,

View File

@ -27,6 +27,7 @@ const service = {
restart: vi.fn(),
};
const runServiceStart = vi.fn();
const runServiceRestart = vi.fn();
const runServiceStop = vi.fn();
const waitForGatewayHealthyListener = vi.fn();
@ -50,6 +51,8 @@ const probeGateway = vi.fn<
>();
const isRestartEnabled = vi.fn<(config?: { commands?: unknown }) => boolean>(() => true);
const loadConfig = vi.hoisted(() => vi.fn(() => ({})));
const launchAgentPlistExists = vi.hoisted(() => vi.fn());
const repairLaunchAgentBootstrap = vi.hoisted(() => vi.fn());
vi.mock("../../config/config.js", () => ({
loadConfig: () => loadConfig(),
@ -81,6 +84,12 @@ vi.mock("../../daemon/service.js", () => ({
resolveGatewayService: () => service,
}));
vi.mock("../../daemon/launchd.js", () => ({
launchAgentPlistExists: (env: Record<string, string | undefined>) => launchAgentPlistExists(env),
repairLaunchAgentBootstrap: (args: { env?: Record<string, string | undefined> }) =>
repairLaunchAgentBootstrap(args),
}));
vi.mock("./restart-health.js", () => ({
DEFAULT_RESTART_HEALTH_ATTEMPTS: 120,
DEFAULT_RESTART_HEALTH_DELAY_MS: 500,
@ -93,12 +102,13 @@ vi.mock("./restart-health.js", () => ({
vi.mock("./lifecycle-core.js", () => ({
runServiceRestart,
runServiceStart: vi.fn(),
runServiceStart,
runServiceStop,
runServiceUninstall: vi.fn(),
}));
describe("runDaemonRestart health checks", () => {
let runDaemonStart: (opts?: { json?: boolean }) => Promise<void>;
let runDaemonRestart: (opts?: { json?: boolean }) => Promise<boolean>;
let runDaemonStop: (opts?: { json?: boolean }) => Promise<void>;
let envSnapshot: ReturnType<typeof captureEnv>;
@ -127,7 +137,7 @@ describe("runDaemonRestart health checks", () => {
}
beforeAll(async () => {
({ runDaemonRestart, runDaemonStop } = await import("./lifecycle.js"));
({ runDaemonStart, runDaemonRestart, runDaemonStop } = await import("./lifecycle.js"));
});
beforeEach(() => {
@ -135,6 +145,7 @@ describe("runDaemonRestart health checks", () => {
delete process.env.OPENCLAW_CONTAINER_HINT;
service.readCommand.mockReset();
service.restart.mockReset();
runServiceStart.mockReset();
runServiceRestart.mockReset();
runServiceStop.mockReset();
waitForGatewayHealthyListener.mockReset();
@ -149,12 +160,17 @@ describe("runDaemonRestart health checks", () => {
probeGateway.mockReset();
isRestartEnabled.mockReset();
loadConfig.mockReset();
launchAgentPlistExists.mockReset();
repairLaunchAgentBootstrap.mockReset();
service.readCommand.mockResolvedValue({
programArguments: ["openclaw", "gateway", "--port", "18789"],
environment: {},
});
service.restart.mockResolvedValue({ outcome: "completed" });
runServiceStart.mockResolvedValue(undefined);
launchAgentPlistExists.mockResolvedValue(false);
repairLaunchAgentBootstrap.mockResolvedValue({ ok: true });
runServiceRestart.mockImplementation(async (params: RestartParams) => {
const fail = (message: string, hints?: string[]) => {
@ -175,6 +191,12 @@ describe("runDaemonRestart health checks", () => {
healthy: true,
portUsage: { port: 18789, status: "busy", listeners: [], hints: [] },
});
waitForGatewayHealthyRestart.mockResolvedValue({
healthy: true,
staleGatewayPids: [],
runtime: { status: "running" },
portUsage: { port: 18789, status: "busy", listeners: [], hints: [] },
});
probeGateway.mockResolvedValue({
ok: true,
configSnapshot: { commands: { restart: true } },
@ -189,6 +211,19 @@ describe("runDaemonRestart health checks", () => {
vi.restoreAllMocks();
});
it("re-bootstraps an installed LaunchAgent when start finds it not loaded", async () => {
vi.spyOn(process, "platform", "get").mockReturnValue("darwin");
launchAgentPlistExists.mockResolvedValue(true);
runServiceStart.mockImplementation(async (params: { onNotLoaded?: () => Promise<unknown> }) => {
await params.onNotLoaded?.();
});
await runDaemonStart({ json: true });
expect(launchAgentPlistExists).toHaveBeenCalledWith(process.env);
expect(repairLaunchAgentBootstrap).toHaveBeenCalledWith({ env: process.env });
});
it("kills stale gateway pids and retries restart", async () => {
const unhealthy: RestartHealthSnapshot = {
healthy: false,
@ -307,6 +342,48 @@ describe("runDaemonRestart health checks", () => {
expect(service.restart).not.toHaveBeenCalled();
});
it("prefers unmanaged restart over launchd repair when a gateway listener is present", async () => {
vi.spyOn(process, "platform", "get").mockReturnValue("darwin");
launchAgentPlistExists.mockResolvedValue(true);
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([4200]);
mockUnmanagedRestart({ runPostRestartCheck: true });
await runDaemonRestart({ json: true });
expect(signalVerifiedGatewayPidSync).toHaveBeenCalledWith(4200, "SIGUSR1");
expect(repairLaunchAgentBootstrap).not.toHaveBeenCalled();
expect(waitForGatewayHealthyListener).toHaveBeenCalledTimes(1);
expect(waitForGatewayHealthyRestart).not.toHaveBeenCalled();
});
it("re-bootstraps an installed LaunchAgent on restart when no unmanaged listener exists", async () => {
vi.spyOn(process, "platform", "get").mockReturnValue("darwin");
launchAgentPlistExists.mockResolvedValue(true);
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([]);
runServiceRestart.mockImplementation(
async (params: RestartParams & { onNotLoaded?: () => Promise<unknown> }) => {
await params.onNotLoaded?.();
await params.postRestartCheck?.({
json: Boolean(params.opts?.json),
stdout: process.stdout,
warnings: [],
fail: (message: string) => {
throw new Error(message);
},
});
return true;
},
);
await runDaemonRestart({ json: true });
expect(repairLaunchAgentBootstrap).toHaveBeenCalledWith({ env: process.env });
expect(signalVerifiedGatewayPidSync).not.toHaveBeenCalled();
expect(waitForGatewayHealthyListener).not.toHaveBeenCalled();
expect(waitForGatewayHealthyRestart).toHaveBeenCalledTimes(1);
expect(service.restart).not.toHaveBeenCalled();
});
it("fails unmanaged restart when multiple gateway listeners are present", async () => {
findVerifiedGatewayListenerPidsOnPortSync.mockReturnValue([4200, 4300]);
mockUnmanagedRestart();

View File

@ -1,5 +1,6 @@
import { isRestartEnabled } from "../../config/commands.js";
import { readBestEffortConfig, resolveGatewayPort } from "../../config/config.js";
import { launchAgentPlistExists, repairLaunchAgentBootstrap } from "../../daemon/launchd.js";
import { resolveGatewayService } from "../../daemon/service.js";
import { probeGateway } from "../../gateway/probe.js";
import {
@ -130,6 +131,28 @@ async function restartGatewayWithoutServiceManager(port: number) {
};
}
async function repairLaunchAgentIfInstalled(params: { result: "started" | "restarted" }) {
if (process.platform !== "darwin") {
return null;
}
const serviceEnv = process.env as Record<string, string | undefined>;
const plistExists = await launchAgentPlistExists(serviceEnv).catch(() => false);
if (!plistExists) {
return null;
}
const repaired = await repairLaunchAgentBootstrap({ env: serviceEnv }).catch(() => ({
ok: false,
}));
if (!repaired.ok) {
return null;
}
return {
result: params.result,
loaded: true,
message: "Gateway LaunchAgent was installed but not loaded; re-bootstrapped launchd service.",
} as const;
}
export async function runDaemonUninstall(opts: DaemonLifecycleOptions = {}) {
return await runServiceUninstall({
serviceNoun: "Gateway",
@ -145,6 +168,10 @@ export async function runDaemonStart(opts: DaemonLifecycleOptions = {}) {
serviceNoun: "Gateway",
service: resolveGatewayService(),
renderStartHints: renderGatewayServiceStartHints,
onNotLoaded:
process.platform === "darwin"
? async () => await repairLaunchAgentIfInstalled({ result: "started" })
: undefined,
opts,
});
}
@ -187,8 +214,9 @@ export async function runDaemonRestart(opts: DaemonLifecycleOptions = {}): Promi
const handled = await restartGatewayWithoutServiceManager(restartPort);
if (handled) {
restartedWithoutServiceManager = true;
return handled;
}
return handled;
return await repairLaunchAgentIfInstalled({ result: "restarted" });
},
postRestartCheck: async ({ warnings, fail, stdout }) => {
if (restartedWithoutServiceManager) {

View File

@ -18,6 +18,7 @@ const state = vi.hoisted(() => ({
listOutput: "",
printOutput: "",
bootstrapError: "",
bootstrapCode: 1,
kickstartError: "",
kickstartFailuresRemaining: 0,
dirs: new Set<string>(),
@ -75,7 +76,7 @@ vi.mock("./exec-file.js", () => ({
return { stdout: state.printOutput, stderr: "", code: 0 };
}
if (call[0] === "bootstrap" && state.bootstrapError) {
return { stdout: "", stderr: state.bootstrapError, code: 1 };
return { stdout: "", stderr: state.bootstrapError, code: state.bootstrapCode };
}
if (call[0] === "kickstart" && state.kickstartError && state.kickstartFailuresRemaining > 0) {
state.kickstartFailuresRemaining -= 1;
@ -152,6 +153,7 @@ beforeEach(() => {
state.listOutput = "";
state.printOutput = "";
state.bootstrapError = "";
state.bootstrapCode = 1;
state.kickstartError = "";
state.kickstartFailuresRemaining = 0;
state.dirs.clear();
@ -255,6 +257,48 @@ describe("launchd bootstrap repair", () => {
expect(kickstartIndex).toBeGreaterThanOrEqual(0);
expect(bootstrapIndex).toBeLessThan(kickstartIndex);
});
it("treats bootstrap exit 130 as success", async () => {
state.bootstrapError = "Service already loaded";
state.bootstrapCode = 130;
const env: Record<string, string | undefined> = {
HOME: "/Users/test",
OPENCLAW_PROFILE: "default",
};
const repair = await repairLaunchAgentBootstrap({ env });
expect(repair.ok).toBe(true);
expect(state.launchctlCalls.filter((call) => call[0] === "kickstart")).toHaveLength(1);
});
it("treats 'already exists in domain' bootstrap failures as success", async () => {
state.bootstrapError =
"Could not bootstrap service: 5: Input/output error: already exists in domain for gui/501";
const env: Record<string, string | undefined> = {
HOME: "/Users/test",
OPENCLAW_PROFILE: "default",
};
const repair = await repairLaunchAgentBootstrap({ env });
expect(repair.ok).toBe(true);
expect(state.launchctlCalls.filter((call) => call[0] === "kickstart")).toHaveLength(1);
});
it("keeps genuine bootstrap failures as failures", async () => {
state.bootstrapError = "Could not find specified service";
const env: Record<string, string | undefined> = {
HOME: "/Users/test",
OPENCLAW_PROFILE: "default",
};
const repair = await repairLaunchAgentBootstrap({ env });
expect(repair.ok).toBe(false);
expect(repair.detail).toContain("Could not find specified service");
expect(state.launchctlCalls.some((call) => call[0] === "kickstart")).toBe(false);
});
});
describe("launchd install", () => {

View File

@ -325,7 +325,12 @@ export async function repairLaunchAgentBootstrap(args: {
await execLaunchctl(["enable", `${domain}/${label}`]);
const boot = await execLaunchctl(["bootstrap", domain, plistPath]);
if (boot.code !== 0) {
return { ok: false, detail: (boot.stderr || boot.stdout).trim() || undefined };
const detail = (boot.stderr || boot.stdout).trim();
const normalized = detail.toLowerCase();
const alreadyLoaded = boot.code === 130 || normalized.includes("already exists in domain");
if (!alreadyLoaded) {
return { ok: false, detail: detail || undefined };
}
}
const kick = await execLaunchctl(["kickstart", "-k", `${domain}/${label}`]);
if (kick.code !== 0) {