mirror of https://github.com/openclaw/openclaw.git
agent: preemptive context overflow detection during tool loops (#29371)
Merged via squash.
Prepared head SHA: 19661b8fb1
Co-authored-by: keshav55 <3821985+keshav55@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman
This commit is contained in:
parent
76500c7a78
commit
3aa4199ef0
|
|
@ -105,6 +105,8 @@ Docs: https://docs.openclaw.ai
|
|||
- Agents/usage tracking: stop forcing `supportsUsageInStreaming: false` on non-native OpenAI-completions providers so compatible backends report token usage and cost again instead of showing all zeros. (#46500) Fixes #46142. Thanks @ademczuk.
|
||||
- Plugins/subagents: preserve gateway-owned plugin subagent access across runtime, tool, and embedded-runner load paths so gateway plugin tools and context engines can still spawn and manage subagents after the loader cache split. (#46648) Thanks @jalehman.
|
||||
- Control UI/overview: keep the language dropdown aligned with the persisted locale during dashboard startup so refreshing the page does not fall back to English before locale hydration completes. (#48019) Thanks @git-jxj.
|
||||
- Agents/compaction: rerun transcript repair after `session.compact()` so orphaned `tool_result` blocks cannot survive compaction and break later Anthropic requests. (#16095) thanks @claw-sylphx.
|
||||
- Agents/compaction: trigger overflow recovery from the tool-result guard once post-compaction context still exceeds the safe threshold, so long tool loops compact before the next model call hard-fails. (#29371) thanks @keshav55.
|
||||
|
||||
## 2026.3.13
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import type { Message } from "grammy/types";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildTelegramThreadParams,
|
||||
|
|
@ -404,8 +405,59 @@ describe("hasBotMention", () => {
|
|||
),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it("matches mention followed by punctuation", () => {
|
||||
expect(
|
||||
hasBotMention(
|
||||
{
|
||||
text: "@gaian, what's up?",
|
||||
chat: { id: 1, type: "supergroup" },
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
} as any,
|
||||
"gaian",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("matches mention followed by space", () => {
|
||||
expect(
|
||||
hasBotMention(
|
||||
{
|
||||
text: "@gaian how are you",
|
||||
chat: { id: 1, type: "supergroup" },
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
} as any,
|
||||
"gaian",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not match substring of a longer username", () => {
|
||||
expect(
|
||||
hasBotMention(
|
||||
{
|
||||
text: "@gaianchat_bot hello",
|
||||
chat: { id: 1, type: "supergroup" },
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
} as any,
|
||||
"gaian",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not match when mention is a prefix of another word", () => {
|
||||
expect(
|
||||
hasBotMention(
|
||||
{
|
||||
text: "@gaianbot do something",
|
||||
chat: { id: 1, type: "supergroup" },
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
} as any,
|
||||
"gaian",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
describe("expandTextLinks", () => {
|
||||
it("returns text unchanged when no entities are provided", () => {
|
||||
expect(expandTextLinks("Hello world")).toBe("Hello world");
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
|
|||
import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
|
||||
import {
|
||||
CONTEXT_LIMIT_TRUNCATION_NOTICE,
|
||||
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
|
||||
PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
|
||||
installToolResultContextGuard,
|
||||
} from "./tool-result-context-guard.js";
|
||||
|
|
@ -268,4 +269,63 @@ describe("installToolResultContextGuard", () => {
|
|||
expect(oldResult.details).toBeUndefined();
|
||||
expect(newResult.details).toBeUndefined();
|
||||
});
|
||||
|
||||
it("throws preemptive context overflow when context exceeds 90% after tool-result compaction", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
|
||||
installToolResultContextGuard({
|
||||
agent,
|
||||
// contextBudgetChars = 1000 * 4 * 0.75 = 3000
|
||||
// preemptiveOverflowChars = 1000 * 4 * 0.9 = 3600
|
||||
contextWindowTokens: 1_000,
|
||||
});
|
||||
|
||||
// Large user message (non-compactable) pushes context past 90% threshold.
|
||||
const contextForNextCall = [makeUser("u".repeat(3_700)), makeToolResult("call_1", "small")];
|
||||
|
||||
await expect(
|
||||
agent.transformContext?.(contextForNextCall, new AbortController().signal),
|
||||
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
|
||||
});
|
||||
|
||||
it("does not throw when context is under 90% after tool-result compaction", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
|
||||
installToolResultContextGuard({
|
||||
agent,
|
||||
contextWindowTokens: 1_000,
|
||||
});
|
||||
|
||||
// Context well under the 3600-char preemptive threshold.
|
||||
const contextForNextCall = [makeUser("u".repeat(1_000)), makeToolResult("call_1", "small")];
|
||||
|
||||
await expect(
|
||||
agent.transformContext?.(contextForNextCall, new AbortController().signal),
|
||||
).resolves.not.toThrow();
|
||||
});
|
||||
|
||||
it("compacts tool results before checking the preemptive overflow threshold", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
|
||||
installToolResultContextGuard({
|
||||
agent,
|
||||
contextWindowTokens: 1_000,
|
||||
});
|
||||
|
||||
// Large user message + large tool result. The guard should compact the tool
|
||||
// result first, then check the overflow threshold. Even after compaction the
|
||||
// user content alone pushes past 90%, so the overflow error fires.
|
||||
const contextForNextCall = [
|
||||
makeUser("u".repeat(3_700)),
|
||||
makeToolResult("call_old", "x".repeat(2_000)),
|
||||
];
|
||||
|
||||
await expect(
|
||||
agent.transformContext?.(contextForNextCall, new AbortController().signal),
|
||||
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
|
||||
|
||||
// Tool result should have been compacted before the overflow check.
|
||||
const toolResultText = getToolResultText(contextForNextCall[1]);
|
||||
expect(toolResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -14,6 +14,9 @@ import {
|
|||
// Keep a conservative input budget to absorb tokenizer variance and provider framing overhead.
|
||||
const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
|
||||
const SINGLE_TOOL_RESULT_CONTEXT_SHARE = 0.5;
|
||||
// High-water mark: if context exceeds this ratio after tool-result compaction,
|
||||
// trigger full session compaction via the existing overflow recovery cascade.
|
||||
const PREEMPTIVE_OVERFLOW_RATIO = 0.9;
|
||||
|
||||
export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "[truncated: output exceeded context limit]";
|
||||
const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
|
||||
|
|
@ -21,6 +24,9 @@ const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
|
|||
export const PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER =
|
||||
"[compacted: tool output removed to free context]";
|
||||
|
||||
export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE =
|
||||
"Preemptive context overflow: estimated context size exceeds safe threshold during tool loop";
|
||||
|
||||
type GuardableTransformContext = (
|
||||
messages: AgentMessage[],
|
||||
signal: AbortSignal,
|
||||
|
|
@ -196,6 +202,10 @@ export function installToolResultContextGuard(params: {
|
|||
contextWindowTokens * TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE * SINGLE_TOOL_RESULT_CONTEXT_SHARE,
|
||||
),
|
||||
);
|
||||
const preemptiveOverflowChars = Math.max(
|
||||
contextBudgetChars,
|
||||
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
|
||||
);
|
||||
|
||||
// Agent.transformContext is private in pi-coding-agent, so access it via a
|
||||
// narrow runtime view to keep callsites type-safe while preserving behavior.
|
||||
|
|
@ -214,6 +224,18 @@ export function installToolResultContextGuard(params: {
|
|||
maxSingleToolResultChars,
|
||||
});
|
||||
|
||||
// After tool-result compaction, check if context still exceeds the high-water mark.
|
||||
// If it does, non-tool-result content dominates and only full LLM-based session
|
||||
// compaction can reduce context size. Throwing a context overflow error triggers
|
||||
// the existing overflow recovery cascade in run.ts.
|
||||
const postEnforcementChars = estimateContextChars(
|
||||
contextMessages,
|
||||
createMessageCharEstimateCache(),
|
||||
);
|
||||
if (postEnforcementChars > preemptiveOverflowChars) {
|
||||
throw new Error(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
|
||||
}
|
||||
|
||||
return contextMessages;
|
||||
}) as GuardableTransformContext;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue