From 5c05347d119e9ea0376e31e9d7c5bf9ee137a93e Mon Sep 17 00:00:00 2001 From: samzong Date: Sun, 22 Mar 2026 03:27:35 +0800 Subject: [PATCH] fix(compaction): make compaction guard content-aware to prevent false cancellations in heartbeat sessions (#42119) Merged via squash. Prepared head SHA: 34296433150546e3da0f8074550a079d093acfbc Co-authored-by: samzong <13782141+samzong@users.noreply.github.com> Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com> Reviewed-by: @jalehman --- CHANGELOG.md | 1 + src/agents/compaction-real-conversation.ts | 85 ++++++++++++ .../compact.hooks.harness.ts | 57 +++++--- .../pi-embedded-runner/compact.hooks.test.ts | 123 ++++++++++++++++++ src/agents/pi-embedded-runner/compact.ts | 23 +++- .../compaction-safeguard.test.ts | 80 ++++++++++++ .../pi-extensions/compaction-safeguard.ts | 18 ++- 7 files changed, 362 insertions(+), 25 deletions(-) create mode 100644 src/agents/compaction-real-conversation.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index d442cbd6041..f6624fe11cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -410,6 +410,7 @@ Docs: https://docs.openclaw.ai - Control UI/auth: restore one-time legacy `?token=` imports for shared Control UI links while keeping `#token=` preferred, and carry pending query tokens through gateway URL confirmation so compatibility links still authenticate after confirmation. (#43979) Thanks @stim64045-spec. - Plugins/context engines: retry legacy lifecycle calls once without `sessionKey` when older plugins reject that field, memoize legacy mode after the first strict-schema fallback, and preserve non-compat runtime errors without retry. (#44779) thanks @hhhhao28. +- Agents/compaction: treat markup-wrapped heartbeat boilerplate as non-meaningful session history when deciding whether to compact, so heartbeat-only sessions no longer keep compaction alive due to wrapper formatting. (#42119) thanks @samzong. ## 2026.3.11 diff --git a/src/agents/compaction-real-conversation.ts b/src/agents/compaction-real-conversation.ts new file mode 100644 index 00000000000..66d617cd9c8 --- /dev/null +++ b/src/agents/compaction-real-conversation.ts @@ -0,0 +1,85 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { stripHeartbeatToken } from "../auto-reply/heartbeat.js"; +import { isSilentReplyText } from "../auto-reply/tokens.js"; + +export const TOOL_RESULT_REAL_CONVERSATION_LOOKBACK = 20; +const NON_CONVERSATION_BLOCK_TYPES = new Set([ + "toolCall", + "toolUse", + "functionCall", + "thinking", + "reasoning", +]); + +function hasMeaningfulText(text: string): boolean { + const trimmed = text.trim(); + if (!trimmed) { + return false; + } + if (isSilentReplyText(trimmed)) { + return false; + } + const heartbeat = stripHeartbeatToken(trimmed, { mode: "message" }); + if (heartbeat.didStrip) { + return heartbeat.text.trim().length > 0; + } + return true; +} + +export function hasMeaningfulConversationContent(message: AgentMessage): boolean { + const content = (message as { content?: unknown }).content; + if (typeof content === "string") { + return hasMeaningfulText(content); + } + if (!Array.isArray(content)) { + return false; + } + let sawMeaningfulNonTextBlock = false; + for (const block of content) { + if (!block || typeof block !== "object") { + continue; + } + const type = (block as { type?: unknown }).type; + if (type !== "text") { + // Tool-call metadata and internal reasoning blocks do not make a + // heartbeat-only transcript count as real conversation. + if (typeof type === "string" && NON_CONVERSATION_BLOCK_TYPES.has(type)) { + continue; + } + sawMeaningfulNonTextBlock = true; + continue; + } + const text = (block as { text?: unknown }).text; + if (typeof text !== "string") { + continue; + } + if (hasMeaningfulText(text)) { + return true; + } + } + return sawMeaningfulNonTextBlock; +} + +export function isRealConversationMessage( + message: AgentMessage, + messages: AgentMessage[], + index: number, +): boolean { + if (message.role === "user" || message.role === "assistant") { + return hasMeaningfulConversationContent(message); + } + if (message.role !== "toolResult") { + return false; + } + const start = Math.max(0, index - TOOL_RESULT_REAL_CONVERSATION_LOOKBACK); + for (let i = index - 1; i >= start; i -= 1) { + const candidate = messages[i]; + if (!candidate || candidate.role !== "user") { + continue; + } + if (hasMeaningfulConversationContent(candidate)) { + return true; + } + } + return false; +} diff --git a/src/agents/pi-embedded-runner/compact.hooks.harness.ts b/src/agents/pi-embedded-runner/compact.hooks.harness.ts index e065b0105b3..c4a21c734ba 100644 --- a/src/agents/pi-embedded-runner/compact.hooks.harness.ts +++ b/src/agents/pi-embedded-runner/compact.hooks.harness.ts @@ -67,6 +67,18 @@ export const resolveMemorySearchConfigMock = vi.fn(() => ({ })); export const resolveSessionAgentIdMock = vi.fn(() => "main"); export const estimateTokensMock = vi.fn((_message?: unknown) => 10); +export const sessionMessages: unknown[] = [ + { role: "user", content: "hello", timestamp: 1 }, + { role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 }, + { + role: "toolResult", + toolCallId: "t1", + toolName: "exec", + content: [{ type: "text", text: "output" }], + isError: false, + timestamp: 3, + }, +]; export const sessionAbortCompactionMock: Mock<(reason?: unknown) => void> = vi.fn(); export const createOpenClawCodingToolsMock = vi.fn(() => []); @@ -134,6 +146,20 @@ export function resetCompactHooksHarnessMocks(): void { resolveSessionAgentIdMock.mockReturnValue("main"); estimateTokensMock.mockReset(); estimateTokensMock.mockReturnValue(10); + sessionMessages.splice( + 0, + sessionMessages.length, + { role: "user", content: "hello", timestamp: 1 }, + { role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 }, + { + role: "toolResult", + toolCallId: "t1", + toolName: "exec", + content: [{ type: "text", text: "output" }], + isError: false, + timestamp: 3, + }, + ); sessionAbortCompactionMock.mockReset(); createOpenClawCodingToolsMock.mockReset(); createOpenClawCodingToolsMock.mockReturnValue([]); @@ -142,6 +168,7 @@ export function resetCompactHooksHarnessMocks(): void { export async function loadCompactHooksHarness(): Promise<{ compactEmbeddedPiSessionDirect: typeof import("./compact.js").compactEmbeddedPiSessionDirect; compactEmbeddedPiSession: typeof import("./compact.js").compactEmbeddedPiSession; + __testing: typeof import("./compact.js").__testing; onSessionTranscriptUpdate: typeof import("../../sessions/transcript-events.js").onSessionTranscriptUpdate; }> { resetCompactHooksHarnessMocks(); @@ -176,18 +203,11 @@ export async function loadCompactHooksHarness(): Promise<{ createAgentSession: vi.fn(async () => { const session = { sessionId: "session-1", - messages: [ - { role: "user", content: "hello", timestamp: 1 }, - { role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 }, - { - role: "toolResult", - toolCallId: "t1", - toolName: "exec", - content: [{ type: "text", text: "output" }], - isError: false, - timestamp: 3, - }, - ], + messages: sessionMessages.map((message) => + typeof structuredClone === "function" + ? structuredClone(message) + : JSON.parse(JSON.stringify(message)), + ), agent: { replaceMessages: vi.fn((messages: unknown[]) => { session.messages = [...(messages as typeof session.messages)]; @@ -358,10 +378,15 @@ export async function loadCompactHooksHarness(): Promise<{ resolveChannelCapabilities: vi.fn(() => undefined), })); - vi.doMock("../../utils/message-channel.js", () => ({ - INTERNAL_MESSAGE_CHANNEL: "webchat", - normalizeMessageChannel: vi.fn(() => undefined), - })); + vi.doMock("../../utils/message-channel.js", async () => { + const actual = await vi.importActual( + "../../utils/message-channel.js", + ); + return { + ...actual, + normalizeMessageChannel: vi.fn(() => undefined), + }; + }); vi.doMock("../pi-embedded-helpers.js", () => ({ ensureSessionHeader: vi.fn(async () => {}), diff --git a/src/agents/pi-embedded-runner/compact.hooks.test.ts b/src/agents/pi-embedded-runner/compact.hooks.test.ts index f8f486f230f..59b58002588 100644 --- a/src/agents/pi-embedded-runner/compact.hooks.test.ts +++ b/src/agents/pi-embedded-runner/compact.hooks.test.ts @@ -1,3 +1,4 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { getApiProvider, unregisterApiProviders } from "@mariozechner/pi-ai"; import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; import { getCustomApiRegistrySourceId } from "../custom-api-registry.js"; @@ -16,12 +17,14 @@ import { resetCompactHooksHarnessMocks, sanitizeSessionHistoryMock, sessionAbortCompactionMock, + sessionMessages, sessionCompactImpl, triggerInternalHook, } from "./compact.hooks.harness.js"; let compactEmbeddedPiSessionDirect: typeof import("./compact.js").compactEmbeddedPiSessionDirect; let compactEmbeddedPiSession: typeof import("./compact.js").compactEmbeddedPiSession; +let compactTesting: typeof import("./compact.js").__testing; let onSessionTranscriptUpdate: typeof import("../../sessions/transcript-events.js").onSessionTranscriptUpdate; const TEST_SESSION_ID = "session-1"; @@ -108,6 +111,7 @@ beforeAll(async () => { const loaded = await loadCompactHooksHarness(); compactEmbeddedPiSessionDirect = loaded.compactEmbeddedPiSessionDirect; compactEmbeddedPiSession = loaded.compactEmbeddedPiSession; + compactTesting = loaded.__testing; onSessionTranscriptUpdate = loaded.onSessionTranscriptUpdate; }); @@ -154,6 +158,20 @@ describe("compactEmbeddedPiSessionDirect hooks", () => { estimateTokensMock.mockReset(); estimateTokensMock.mockReturnValue(10); sessionAbortCompactionMock.mockReset(); + sessionMessages.splice( + 0, + sessionMessages.length, + { role: "user", content: "hello", timestamp: 1 }, + { role: "assistant", content: [{ type: "text", text: "hi" }], timestamp: 2 }, + { + role: "toolResult", + toolCallId: "t1", + toolName: "exec", + content: [{ type: "text", text: "output" }], + isError: false, + timestamp: 3, + }, + ); unregisterApiProviders(getCustomApiRegistrySourceId("ollama")); }); @@ -490,6 +508,111 @@ describe("compactEmbeddedPiSessionDirect hooks", () => { }); }); + it("skips compaction when the transcript only contains boilerplate replies and tool output", async () => { + sessionMessages.splice( + 0, + sessionMessages.length, + { role: "user", content: "HEARTBEAT_OK", timestamp: 1 }, + { + role: "toolResult", + toolCallId: "t1", + toolName: "exec", + content: [{ type: "text", text: "checked" }], + isError: false, + timestamp: 2, + }, + ); + + const result = await compactEmbeddedPiSessionDirect({ + sessionId: "session-1", + sessionKey: "agent:main:session-1", + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp", + customInstructions: "focus on decisions", + }); + + expect(result).toMatchObject({ + ok: true, + compacted: false, + reason: "no real conversation messages", + }); + expect(sessionCompactImpl).not.toHaveBeenCalled(); + }); + + it("skips compaction when the transcript only contains heartbeat boilerplate and reasoning blocks", async () => { + sessionMessages.splice( + 0, + sessionMessages.length, + { role: "user", content: "HEARTBEAT_OK", timestamp: 1 }, + { + role: "assistant", + content: [{ type: "thinking", thinking: "checking" }], + timestamp: 2, + }, + ); + + const result = await compactEmbeddedPiSessionDirect({ + sessionId: "session-1", + sessionKey: "agent:main:session-1", + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp", + customInstructions: "focus on decisions", + }); + + expect(result).toMatchObject({ + ok: true, + compacted: false, + reason: "no real conversation messages", + }); + expect(sessionCompactImpl).not.toHaveBeenCalled(); + }); + + it("does not treat assistant-only tool-call blocks as meaningful conversation", () => { + expect( + compactTesting.hasMeaningfulConversationContent({ + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "exec", arguments: {} }], + } as AgentMessage), + ).toBe(false); + }); + + it("counts tool output as real only when a meaningful user ask exists in the lookback window", () => { + const heartbeatToolResultWindow = [ + { role: "user", content: "HEARTBEAT_OK" }, + { + role: "toolResult", + toolCallId: "t1", + toolName: "exec", + content: [{ type: "text", text: "checked" }], + }, + ] as AgentMessage[]; + expect( + compactTesting.hasRealConversationContent( + heartbeatToolResultWindow[1], + heartbeatToolResultWindow, + 1, + ), + ).toBe(false); + + const realAskToolResultWindow = [ + { role: "assistant", content: "NO_REPLY" }, + { role: "user", content: "please inspect the failing PR" }, + { + role: "toolResult", + toolCallId: "t2", + toolName: "exec", + content: [{ type: "text", text: "checked" }], + }, + ] as AgentMessage[]; + expect( + compactTesting.hasRealConversationContent( + realAskToolResultWindow[2], + realAskToolResultWindow, + 2, + ), + ).toBe(true); + }); + it("registers the Ollama api provider before compaction", async () => { resolveModelMock.mockReturnValue({ model: { diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index dd5806421a0..e27df822cd7 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -38,6 +38,10 @@ import { resolveSessionAgentId, resolveSessionAgentIds } from "../agent-scope.js import type { ExecElevatedDefaults } from "../bash-tools.js"; import { makeBootstrapWarn, resolveBootstrapContextForRun } from "../bootstrap-files.js"; import { listChannelSupportedActions, resolveChannelMessageToolHints } from "../channel-tools.js"; +import { + hasMeaningfulConversationContent, + isRealConversationMessage, +} from "../compaction-real-conversation.js"; import { resolveContextWindowInfo } from "../context-window-guard.js"; import { ensureCustomApiRegistered } from "../custom-api-registry.js"; import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../date-time.js"; @@ -169,8 +173,12 @@ type CompactionMessageMetrics = { contributors: Array<{ role: string; chars: number; tool?: string }>; }; -function hasRealConversationContent(msg: AgentMessage): boolean { - return msg.role === "user" || msg.role === "assistant" || msg.role === "toolResult"; +function hasRealConversationContent( + msg: AgentMessage, + messages: AgentMessage[], + index: number, +): boolean { + return isRealConversationMessage(msg, messages, index); } function createCompactionDiagId(): string { @@ -962,7 +970,11 @@ export async function compactEmbeddedPiSessionDirect( ); } - if (!session.messages.some(hasRealConversationContent)) { + if ( + !session.messages.some((message, index, messages) => + hasRealConversationContent(message, messages, index), + ) + ) { log.info( `[compaction] skipping — no real conversation messages (sessionKey=${params.sessionKey ?? params.sessionId})`, ); @@ -1281,3 +1293,8 @@ export async function compactEmbeddedPiSession( }), ); } + +export const __testing = { + hasRealConversationContent, + hasMeaningfulConversationContent, +} as const; diff --git a/src/agents/pi-extensions/compaction-safeguard.test.ts b/src/agents/pi-extensions/compaction-safeguard.test.ts index 897e49cb655..382331a4696 100644 --- a/src/agents/pi-extensions/compaction-safeguard.test.ts +++ b/src/agents/pi-extensions/compaction-safeguard.test.ts @@ -1795,6 +1795,86 @@ describe("compaction-safeguard double-compaction guard", () => { expect(result).toEqual({ cancel: true }); expect(getApiKeyMock).toHaveBeenCalled(); }); + + it("treats tool results as real conversation only when linked to a meaningful user ask", async () => { + expect( + __testing.isRealConversationMessage( + { + role: "toolResult", + toolCallId: "t1", + toolName: "exec", + content: [{ type: "text", text: "done" }], + } as AgentMessage, + [ + { role: "user", content: "HEARTBEAT_OK" } as AgentMessage, + { + role: "toolResult", + toolCallId: "t1", + toolName: "exec", + content: [{ type: "text", text: "done" }], + } as AgentMessage, + ], + 1, + ), + ).toBe(false); + + expect( + __testing.isRealConversationMessage( + { + role: "toolResult", + toolCallId: "t2", + toolName: "exec", + content: [{ type: "text", text: "done" }], + } as AgentMessage, + [ + { role: "user", content: "please inspect the repo" } as AgentMessage, + { + role: "toolResult", + toolCallId: "t2", + toolName: "exec", + content: [{ type: "text", text: "done" }], + } as AgentMessage, + ], + 1, + ), + ).toBe(true); + }); + + it("does not treat assistant-only tool calls as meaningful conversation", () => { + expect( + __testing.hasMeaningfulConversationContent({ + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "exec", arguments: {} }], + } as AgentMessage), + ).toBe(false); + }); + + it("does not treat reasoning-only assistant blocks as meaningful conversation", () => { + expect( + __testing.hasMeaningfulConversationContent({ + role: "assistant", + content: [{ type: "thinking", thinking: "checking" }], + } as AgentMessage), + ).toBe(false); + + expect( + __testing.hasMeaningfulConversationContent({ + role: "assistant", + content: [{ type: "reasoning", summary: [] }], + } as unknown as AgentMessage), + ).toBe(false); + }); + + it("treats markup-wrapped heartbeat tokens as boilerplate", () => { + expect( + __testing.hasMeaningfulConversationContent( + castAgentMessage({ + role: "assistant", + content: "HEARTBEAT_OK", + }), + ), + ).toBe(false); + }); }); async function expectWorkspaceSummaryEmptyForAgentsAlias( diff --git a/src/agents/pi-extensions/compaction-safeguard.ts b/src/agents/pi-extensions/compaction-safeguard.ts index d96fa4412ff..5b7d4f082f0 100644 --- a/src/agents/pi-extensions/compaction-safeguard.ts +++ b/src/agents/pi-extensions/compaction-safeguard.ts @@ -6,6 +6,10 @@ import { extractSections } from "../../auto-reply/reply/post-compaction-context. import { openBoundaryFile } from "../../infra/boundary-file-read.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; import { extractKeywords, isQueryStopWordToken } from "../../memory/query-expansion.js"; +import { + hasMeaningfulConversationContent, + isRealConversationMessage, +} from "../compaction-real-conversation.js"; import { BASE_CHUNK_RATIO, type CompactionSummarizationInstructions, @@ -183,10 +187,6 @@ function formatToolFailuresSection(failures: ToolFailure[]): string { return `\n\n## Tool Failures\n${lines.join("\n")}`; } -function isRealConversationMessage(message: AgentMessage): boolean { - return message.role === "user" || message.role === "assistant" || message.role === "toolResult"; -} - function computeFileLists(fileOps: FileOperations): { readFiles: string[]; modifiedFiles: string[]; @@ -774,8 +774,12 @@ async function readWorkspaceContextForSummary(): Promise { export default function compactionSafeguardExtension(api: ExtensionAPI): void { api.on("session_before_compact", async (event, ctx) => { const { preparation, customInstructions: eventInstructions, signal } = event; - const hasRealSummarizable = preparation.messagesToSummarize.some(isRealConversationMessage); - const hasRealTurnPrefix = preparation.turnPrefixMessages.some(isRealConversationMessage); + const hasRealSummarizable = preparation.messagesToSummarize.some((message, index, messages) => + isRealConversationMessage(message, messages, index), + ); + const hasRealTurnPrefix = preparation.turnPrefixMessages.some((message, index, messages) => + isRealConversationMessage(message, messages, index), + ); if (!hasRealSummarizable && !hasRealTurnPrefix) { // When there are no summarizable messages AND no real turn-prefix content, // cancelling compaction leaves context unchanged but the SDK re-triggers @@ -1124,6 +1128,8 @@ export const __testing = { computeAdaptiveChunkRatio, isOversizedForSummary, readWorkspaceContextForSummary, + hasMeaningfulConversationContent, + isRealConversationMessage, BASE_CHUNK_RATIO, MIN_CHUNK_RATIO, SAFETY_MARGIN,