From f91271594deee1ef63f3d71db91012b7915a94bb Mon Sep 17 00:00:00 2001 From: Josh Lehman Date: Mon, 9 Mar 2026 08:29:03 -0700 Subject: [PATCH] fix: carry live overflow token counts --- CHANGELOG.md | 1 + .../pi-embedded-helpers.isbillingerrormessage.test.ts | 6 +++--- .../run.overflow-compaction.mocks.shared.ts | 4 +--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9820a7aa1e8..563e7e6b132 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -236,6 +236,7 @@ Docs: https://docs.openclaw.ai - Memory/Gemini: normalize returned Gemini embeddings across direct query, direct batch, and async batch paths so memory search uses consistent vector handling for Gemini too. (#43409) Thanks @gumadeiras. - Agents/failover: recognize additional serialized network errno strings plus `EHOSTDOWN` and `EPIPE` structured codes so transient transport failures trigger timeout failover more reliably. (#42830) Thanks @jnMetaCode. - Telegram/model picker: make inline model button selections persist the chosen session model correctly, clear overrides when selecting the configured default, and include effective fallback models in `/models` button validation. (#40105) Thanks @avirweb. +- Agents/embedded runner: carry provider-observed overflow token counts into compaction so overflow retries and diagnostics use the rejected live prompt size instead of only transcript estimates. (#40357) thanks @rabsef-bicrym. ## 2026.3.7 diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index cd49ecb8be2..b71ad3a7d78 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -469,9 +469,9 @@ describe("extractObservedOverflowTokenCount", () => { '400 {"type":"error","error":{"message":"prompt is too long: 277403 tokens > 200000 maximum"}}', ), ).toBe(277403); - expect(extractObservedOverflowTokenCount("Context window exceeded: requested 12000 tokens")).toBe( - 12000, - ); + expect( + extractObservedOverflowTokenCount("Context window exceeded: requested 12000 tokens"), + ).toBe(12000); expect( extractObservedOverflowTokenCount( "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens.", diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts index 96e92bea55f..3e3d4a83461 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts @@ -110,9 +110,7 @@ vi.mock("../pi-embedded-helpers.js", () => ({ formatBillingErrorMessage: vi.fn(() => ""), classifyFailoverReason: vi.fn(() => null), extractObservedOverflowTokenCount: vi.fn((msg?: string) => { - const match = msg?.match( - /prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i, - ); + const match = msg?.match(/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i); return match?.[1] ? Number(match[1].replaceAll(",", "")) : undefined; }), formatAssistantErrorText: vi.fn(() => ""),