From 7096819f2b4e2349d211d90c87bcb4be0e2d2917 Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Wed, 1 Apr 2026 18:29:36 +0530 Subject: [PATCH] fix(acpx): retry queue-owner repair without resume-session (thanks @obviyus) --- CHANGELOG.md | 1 + extensions/acpx/src/runtime.test.ts | 48 +++++++++++++++++++ extensions/acpx/src/runtime.ts | 40 ++++++++++++---- .../acpx/src/test-utils/runtime-fixtures.ts | 14 ++++++ 4 files changed, 94 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f14d18534f9..2b7c873ff5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ Docs: https://docs.openclaw.ai - Agents/failover: unify structured and raw provider error classification so provider-specific `400`/`422` payloads no longer get forced into generic format failures before retry, billing, or compaction logic can inspect them. (#58856) Thanks @aaron-he-zhu. - Auth profiles/store: coerce misplaced SecretRef objects out of plaintext `key` and `token` fields during store load so agents without ACP runtime stop crashing on `.trim()` after upgrade. (#58923) Thanks @openperf. - ACPX/runtime: repair `queue owner unavailable` session recovery by replacing dead named sessions and resuming the backend session when ACPX exposes a stable session id, so the first ACP prompt no longer inherits a dead handle. (#58669) Thanks @neeravmakwana +- ACPX/runtime: retry dead-session queue-owner repair without `--resume-session` when the reported ACPX session id is stale, so recovery still creates a fresh named session instead of failing session init. Thanks @obviyus. ## 2026.3.31 diff --git a/extensions/acpx/src/runtime.test.ts b/extensions/acpx/src/runtime.test.ts index c647a8cfa95..9fe482f2c12 100644 --- a/extensions/acpx/src/runtime.test.ts +++ b/extensions/acpx/src/runtime.test.ts @@ -317,6 +317,54 @@ describe("AcpxRuntime", () => { }); }); + it("falls back to a fresh named session when queue owner resume repair uses a stale session id", async () => { + const previousResumeFailure = process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME; + const previousStatus = process.env.MOCK_ACPX_STATUS_STATUS; + const previousSummary = process.env.MOCK_ACPX_STATUS_SUMMARY; + process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME = "1"; + process.env.MOCK_ACPX_STATUS_STATUS = "dead"; + process.env.MOCK_ACPX_STATUS_SUMMARY = "queue owner unavailable"; + + try { + const { runtime, logPath } = await createMockRuntimeFixture(); + const handle = await runtime.ensureSession({ + sessionKey: "agent:codex:acp:dead-session-stale-resume", + agent: "codex", + mode: "persistent", + }); + + expect(handle.backend).toBe("acpx"); + + const logs = await readMockRuntimeLogEntries(logPath); + const newEntries = logs.filter((entry) => entry.kind === "new"); + expect(newEntries).toHaveLength(2); + const firstArgs = ((newEntries[0]?.args as string[]) ?? []).slice(); + const secondArgs = ((newEntries[1]?.args as string[]) ?? []).slice(); + const firstResumeFlagIndex = firstArgs.indexOf("--resume-session"); + expect(firstResumeFlagIndex).toBeGreaterThanOrEqual(0); + expect(firstArgs[firstResumeFlagIndex + 1]).toBe( + "sid-agent:codex:acp:dead-session-stale-resume", + ); + expect(secondArgs.indexOf("--resume-session")).toBe(-1); + } finally { + if (previousResumeFailure === undefined) { + delete process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME; + } else { + process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME = previousResumeFailure; + } + if (previousStatus === undefined) { + delete process.env.MOCK_ACPX_STATUS_STATUS; + } else { + process.env.MOCK_ACPX_STATUS_STATUS = previousStatus; + } + if (previousSummary === undefined) { + delete process.env.MOCK_ACPX_STATUS_SUMMARY; + } else { + process.env.MOCK_ACPX_STATUS_SUMMARY = previousSummary; + } + } + }); + it("creates a fresh named session after ensure failure when status indicates an unrecoverable failure", async () => { await expectSessionEnsureFallback({ sessionKey: "agent:codex:acp:ensure-fallback-dead-unrecoverable", diff --git a/extensions/acpx/src/runtime.ts b/extensions/acpx/src/runtime.ts index 53bf85eb1a9..bf00185ff3e 100644 --- a/extensions/acpx/src/runtime.ts +++ b/extensions/acpx/src/runtime.ts @@ -367,17 +367,39 @@ export class AcpxRuntime implements AcpRuntime { logContext: string; }): Promise { const resumeSessionId = resolveResumeSessionIdFromDetail(params.detail); + if (!resumeSessionId) { + this.logger?.warn?.( + `acpx ensureSession repairing dead named session with fresh session owner: session=${params.sessionName} cwd=${params.cwd} ${params.logContext}`, + ); + return await this.createNamedSession({ + agent: params.agent, + cwd: params.cwd, + sessionName: params.sessionName, + }); + } this.logger?.warn?.( - resumeSessionId - ? `acpx ensureSession repairing dead named session by resuming backend session: session=${params.sessionName} cwd=${params.cwd} resumeSessionId=${resumeSessionId} ${params.logContext}` - : `acpx ensureSession repairing dead named session with fresh session owner: session=${params.sessionName} cwd=${params.cwd} ${params.logContext}`, + `acpx ensureSession repairing dead named session by resuming backend session: session=${params.sessionName} cwd=${params.cwd} resumeSessionId=${resumeSessionId} ${params.logContext}`, ); - return await this.createNamedSession({ - agent: params.agent, - cwd: params.cwd, - sessionName: params.sessionName, - ...(resumeSessionId ? { resumeSessionId } : {}), - }); + try { + return await this.createNamedSession({ + agent: params.agent, + cwd: params.cwd, + sessionName: params.sessionName, + resumeSessionId, + }); + } catch (error) { + if (!(error instanceof AcpRuntimeError) || error.code !== "ACP_SESSION_INIT_FAILED") { + throw error; + } + this.logger?.warn?.( + `acpx ensureSession dead-session resume repair failed; retrying with fresh session owner: session=${params.sessionName} cwd=${params.cwd} resumeSessionId=${resumeSessionId} error=${summarizeLogText(error.message) || ""} ${params.logContext}`, + ); + return await this.createNamedSession({ + agent: params.agent, + cwd: params.cwd, + sessionName: params.sessionName, + }); + } } private async shouldReplaceEnsuredSession(params: { diff --git a/extensions/acpx/src/test-utils/runtime-fixtures.ts b/extensions/acpx/src/test-utils/runtime-fixtures.ts index 976047b5d77..8d5b66b08f1 100644 --- a/extensions/acpx/src/test-utils/runtime-fixtures.ts +++ b/extensions/acpx/src/test-utils/runtime-fixtures.ts @@ -115,6 +115,19 @@ if (command === "sessions" && args[commandIndex + 1] === "ensure") { if (command === "sessions" && args[commandIndex + 1] === "new") { writeLog({ kind: "new", agent, args, sessionName: ensureName }); + if (process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME === "1" && args.includes("--resume-session")) { + return emitJsonAndExit( + { + jsonrpc: "2.0", + id: null, + error: { + code: -32603, + message: "mock stale resume session", + }, + }, + 1, + ); + } if (process.env.MOCK_ACPX_NEW_EMPTY === "1") { emitJson({ action: "session_created", name: ensureName }); } else { @@ -426,6 +439,7 @@ export async function cleanupMockRuntimeFixtures(): Promise { delete process.env.MOCK_ACPX_ENSURE_ERROR_MESSAGE; delete process.env.MOCK_ACPX_ENSURE_EXIT_1; delete process.env.MOCK_ACPX_ENSURE_STDERR; + delete process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME; delete process.env.MOCK_ACPX_STATUS_STATUS; delete process.env.MOCK_ACPX_STATUS_NO_IDS; delete process.env.MOCK_ACPX_STATUS_SUMMARY;