From 2f86ae71d574f3f17b7001ea6ef6ae78b8b872ff Mon Sep 17 00:00:00 2001 From: Vignesh Natarajan Date: Thu, 5 Mar 2026 19:03:56 -0800 Subject: [PATCH] fix(subagents): recover announce cleanup after kill/complete race --- CHANGELOG.md | 1 + .../subagent-registry.steer-restart.test.ts | 32 +++++++++++++++++++ src/agents/subagent-registry.ts | 13 ++++++++ 3 files changed, 46 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a726bb1b25..f53c2aa5a08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai - Onboarding/API key input hardening: strip non-Latin1 Unicode artifacts from normalized secret input (while preserving Latin-1 content and internal spaces) so malformed copied API keys cannot trigger HTTP header `ByteString` construction crashes; adds regression coverage for shared normalization and MiniMax auth header usage. (#24496) Thanks @fa6maalassaf. - Kimi Coding/Anthropic tools compatibility: normalize `anthropic-messages` tool payloads to OpenAI-style `tools[].function` + compatible `tool_choice` when targeting Kimi Coding endpoints, restoring tool-call workflows that regressed after v2026.3.2. (#37038) Thanks @mochimochimochi-hub. - Heartbeat/workspace-path guardrails: append explicit workspace `HEARTBEAT.md` path guidance (and `docs/heartbeat.md` avoidance) to heartbeat prompts so heartbeat runs target workspace checklists reliably across packaged install layouts. (#37037) Thanks @stofancy. +- Subagents/kill-complete announce race: when a late `subagent-complete` lifecycle event arrives after an earlier kill marker, clear stale kill suppression/cleanup flags and re-run announce cleanup so finished runs no longer get silently swallowed. (#37024) Thanks @cmfinlan. - Gateway/remote WS break-glass hostname support: honor `OPENCLAW_ALLOW_INSECURE_PRIVATE_WS=1` for `ws://` hostname URLs (not only private IP literals) across onboarding validation and runtime gateway connection checks, while still rejecting public IP literals and non-unicast IPv6 endpoints. (#36930) Thanks @manju-rn. - Routing/binding lookup scalability: pre-index route bindings by channel/account and avoid full binding-list rescans on channel-account cache rollover, preventing multi-second `resolveAgentRoute` stalls in large binding configurations. (#36915) Thanks @songchenghao. - Browser/session cleanup: track browser tabs opened by session-scoped browser tool runs and close tracked tabs during `sessions.reset`/`sessions.delete` runtime cleanup, preventing orphaned tabs and unbounded browser memory growth after session teardown. (#36666) Thanks @Harnoor6693. diff --git a/src/agents/subagent-registry.steer-restart.test.ts b/src/agents/subagent-registry.steer-restart.test.ts index 9ad20be4719..19ef8228688 100644 --- a/src/agents/subagent-registry.steer-restart.test.ts +++ b/src/agents/subagent-registry.steer-restart.test.ts @@ -447,6 +447,38 @@ describe("subagent registry steer restarts", () => { ); }); + it("recovers announce cleanup when completion arrives after a kill marker", async () => { + const childSessionKey = "agent:main:subagent:kill-race"; + registerRun({ + runId: "run-kill-race", + childSessionKey, + task: "race test", + }); + + expect(mod.markSubagentRunTerminated({ runId: "run-kill-race", reason: "manual kill" })).toBe( + 1, + ); + expect(listMainRuns()[0]?.suppressAnnounceReason).toBe("killed"); + expect(listMainRuns()[0]?.cleanupHandled).toBe(true); + expect(typeof listMainRuns()[0]?.cleanupCompletedAt).toBe("number"); + + emitLifecycleEnd("run-kill-race"); + await flushAnnounce(); + await flushAnnounce(); + + expect(announceSpy).toHaveBeenCalledTimes(1); + const announce = (announceSpy.mock.calls[0]?.[0] ?? {}) as { childRunId?: string }; + expect(announce.childRunId).toBe("run-kill-race"); + + const run = listMainRuns()[0]; + expect(run?.endedReason).toBe("subagent-complete"); + expect(run?.outcome?.status).not.toBe("error"); + expect(run?.suppressAnnounceReason).toBeUndefined(); + expect(run?.cleanupHandled).toBe(true); + expect(typeof run?.cleanupCompletedAt).toBe("number"); + expect(runSubagentEndedHookMock).toHaveBeenCalledTimes(1); + }); + it("retries deferred parent cleanup after a descendant announces", async () => { let parentAttempts = 0; announceSpy.mockImplementation(async (params: unknown) => { diff --git a/src/agents/subagent-registry.ts b/src/agents/subagent-registry.ts index 900aa4752d9..f2d9b81cd5a 100644 --- a/src/agents/subagent-registry.ts +++ b/src/agents/subagent-registry.ts @@ -338,6 +338,19 @@ async function completeSubagentRun(params: { } let mutated = false; + // If a late lifecycle completion arrives after an earlier kill marker, allow + // completion cleanup/announce to run instead of staying permanently suppressed. + if ( + params.reason === SUBAGENT_ENDED_REASON_COMPLETE && + entry.suppressAnnounceReason === "killed" && + (entry.cleanupHandled || typeof entry.cleanupCompletedAt === "number") + ) { + entry.suppressAnnounceReason = undefined; + entry.cleanupHandled = false; + entry.cleanupCompletedAt = undefined; + mutated = true; + } + const endedAt = typeof params.endedAt === "number" ? params.endedAt : Date.now(); if (entry.endedAt !== endedAt) { entry.endedAt = endedAt;