fix(acpx): retry queue-owner repair without resume-session (thanks @obviyus)

This commit is contained in:
Ayaan Zaidi 2026-04-01 18:29:36 +05:30
parent fc745db76d
commit 7096819f2b
No known key found for this signature in database
4 changed files with 94 additions and 9 deletions

View File

@ -56,6 +56,7 @@ Docs: https://docs.openclaw.ai
- Agents/failover: unify structured and raw provider error classification so provider-specific `400`/`422` payloads no longer get forced into generic format failures before retry, billing, or compaction logic can inspect them. (#58856) Thanks @aaron-he-zhu.
- Auth profiles/store: coerce misplaced SecretRef objects out of plaintext `key` and `token` fields during store load so agents without ACP runtime stop crashing on `.trim()` after upgrade. (#58923) Thanks @openperf.
- ACPX/runtime: repair `queue owner unavailable` session recovery by replacing dead named sessions and resuming the backend session when ACPX exposes a stable session id, so the first ACP prompt no longer inherits a dead handle. (#58669) Thanks @neeravmakwana
- ACPX/runtime: retry dead-session queue-owner repair without `--resume-session` when the reported ACPX session id is stale, so recovery still creates a fresh named session instead of failing session init. Thanks @obviyus.
## 2026.3.31

View File

@ -317,6 +317,54 @@ describe("AcpxRuntime", () => {
});
});
it("falls back to a fresh named session when queue owner resume repair uses a stale session id", async () => {
const previousResumeFailure = process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME;
const previousStatus = process.env.MOCK_ACPX_STATUS_STATUS;
const previousSummary = process.env.MOCK_ACPX_STATUS_SUMMARY;
process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME = "1";
process.env.MOCK_ACPX_STATUS_STATUS = "dead";
process.env.MOCK_ACPX_STATUS_SUMMARY = "queue owner unavailable";
try {
const { runtime, logPath } = await createMockRuntimeFixture();
const handle = await runtime.ensureSession({
sessionKey: "agent:codex:acp:dead-session-stale-resume",
agent: "codex",
mode: "persistent",
});
expect(handle.backend).toBe("acpx");
const logs = await readMockRuntimeLogEntries(logPath);
const newEntries = logs.filter((entry) => entry.kind === "new");
expect(newEntries).toHaveLength(2);
const firstArgs = ((newEntries[0]?.args as string[]) ?? []).slice();
const secondArgs = ((newEntries[1]?.args as string[]) ?? []).slice();
const firstResumeFlagIndex = firstArgs.indexOf("--resume-session");
expect(firstResumeFlagIndex).toBeGreaterThanOrEqual(0);
expect(firstArgs[firstResumeFlagIndex + 1]).toBe(
"sid-agent:codex:acp:dead-session-stale-resume",
);
expect(secondArgs.indexOf("--resume-session")).toBe(-1);
} finally {
if (previousResumeFailure === undefined) {
delete process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME;
} else {
process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME = previousResumeFailure;
}
if (previousStatus === undefined) {
delete process.env.MOCK_ACPX_STATUS_STATUS;
} else {
process.env.MOCK_ACPX_STATUS_STATUS = previousStatus;
}
if (previousSummary === undefined) {
delete process.env.MOCK_ACPX_STATUS_SUMMARY;
} else {
process.env.MOCK_ACPX_STATUS_SUMMARY = previousSummary;
}
}
});
it("creates a fresh named session after ensure failure when status indicates an unrecoverable failure", async () => {
await expectSessionEnsureFallback({
sessionKey: "agent:codex:acp:ensure-fallback-dead-unrecoverable",

View File

@ -367,17 +367,39 @@ export class AcpxRuntime implements AcpRuntime {
logContext: string;
}): Promise<AcpxJsonObject[]> {
const resumeSessionId = resolveResumeSessionIdFromDetail(params.detail);
if (!resumeSessionId) {
this.logger?.warn?.(
`acpx ensureSession repairing dead named session with fresh session owner: session=${params.sessionName} cwd=${params.cwd} ${params.logContext}`,
);
return await this.createNamedSession({
agent: params.agent,
cwd: params.cwd,
sessionName: params.sessionName,
});
}
this.logger?.warn?.(
resumeSessionId
? `acpx ensureSession repairing dead named session by resuming backend session: session=${params.sessionName} cwd=${params.cwd} resumeSessionId=${resumeSessionId} ${params.logContext}`
: `acpx ensureSession repairing dead named session with fresh session owner: session=${params.sessionName} cwd=${params.cwd} ${params.logContext}`,
`acpx ensureSession repairing dead named session by resuming backend session: session=${params.sessionName} cwd=${params.cwd} resumeSessionId=${resumeSessionId} ${params.logContext}`,
);
return await this.createNamedSession({
agent: params.agent,
cwd: params.cwd,
sessionName: params.sessionName,
...(resumeSessionId ? { resumeSessionId } : {}),
});
try {
return await this.createNamedSession({
agent: params.agent,
cwd: params.cwd,
sessionName: params.sessionName,
resumeSessionId,
});
} catch (error) {
if (!(error instanceof AcpRuntimeError) || error.code !== "ACP_SESSION_INIT_FAILED") {
throw error;
}
this.logger?.warn?.(
`acpx ensureSession dead-session resume repair failed; retrying with fresh session owner: session=${params.sessionName} cwd=${params.cwd} resumeSessionId=${resumeSessionId} error=${summarizeLogText(error.message) || "<empty>"} ${params.logContext}`,
);
return await this.createNamedSession({
agent: params.agent,
cwd: params.cwd,
sessionName: params.sessionName,
});
}
}
private async shouldReplaceEnsuredSession(params: {

View File

@ -115,6 +115,19 @@ if (command === "sessions" && args[commandIndex + 1] === "ensure") {
if (command === "sessions" && args[commandIndex + 1] === "new") {
writeLog({ kind: "new", agent, args, sessionName: ensureName });
if (process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME === "1" && args.includes("--resume-session")) {
return emitJsonAndExit(
{
jsonrpc: "2.0",
id: null,
error: {
code: -32603,
message: "mock stale resume session",
},
},
1,
);
}
if (process.env.MOCK_ACPX_NEW_EMPTY === "1") {
emitJson({ action: "session_created", name: ensureName });
} else {
@ -426,6 +439,7 @@ export async function cleanupMockRuntimeFixtures(): Promise<void> {
delete process.env.MOCK_ACPX_ENSURE_ERROR_MESSAGE;
delete process.env.MOCK_ACPX_ENSURE_EXIT_1;
delete process.env.MOCK_ACPX_ENSURE_STDERR;
delete process.env.MOCK_ACPX_NEW_FAIL_ON_RESUME;
delete process.env.MOCK_ACPX_STATUS_STATUS;
delete process.env.MOCK_ACPX_STATUS_NO_IDS;
delete process.env.MOCK_ACPX_STATUS_SUMMARY;