From f91271594deee1ef63f3d71db91012b7915a94bb Mon Sep 17 00:00:00 2001
From: Josh Lehman <josh@martian.engineering>
Date: Mon, 9 Mar 2026 08:29:03 -0700
Subject: [PATCH] fix: carry live overflow token counts

---
 CHANGELOG.md                                                | 1 +
 .../pi-embedded-helpers.isbillingerrormessage.test.ts       | 6 +++---
 .../run.overflow-compaction.mocks.shared.ts                 | 4 +---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9820a7aa1e8..563e7e6b132 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -236,6 +236,7 @@ Docs: https://docs.openclaw.ai
 - Memory/Gemini: normalize returned Gemini embeddings across direct query, direct batch, and async batch paths so memory search uses consistent vector handling for Gemini too. (#43409) Thanks @gumadeiras.
 - Agents/failover: recognize additional serialized network errno strings plus `EHOSTDOWN` and `EPIPE` structured codes so transient transport failures trigger timeout failover more reliably. (#42830) Thanks @jnMetaCode.
 - Telegram/model picker: make inline model button selections persist the chosen session model correctly, clear overrides when selecting the configured default, and include effective fallback models in `/models` button validation. (#40105) Thanks @avirweb.
+- Agents/embedded runner: carry provider-observed overflow token counts into compaction so overflow retries and diagnostics use the rejected live prompt size instead of only transcript estimates. (#40357) thanks @rabsef-bicrym.
 
 ## 2026.3.7
 
diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
index cd49ecb8be2..b71ad3a7d78 100644
--- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
+++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
@@ -469,9 +469,9 @@ describe("extractObservedOverflowTokenCount", () => {
         '400 {"type":"error","error":{"message":"prompt is too long: 277403 tokens > 200000 maximum"}}',
       ),
     ).toBe(277403);
-    expect(extractObservedOverflowTokenCount("Context window exceeded: requested 12000 tokens")).toBe(
-      12000,
-    );
+    expect(
+      extractObservedOverflowTokenCount("Context window exceeded: requested 12000 tokens"),
+    ).toBe(12000);
     expect(
       extractObservedOverflowTokenCount(
         "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens.",
diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts
index 96e92bea55f..3e3d4a83461 100644
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts
@@ -110,9 +110,7 @@ vi.mock("../pi-embedded-helpers.js", () => ({
   formatBillingErrorMessage: vi.fn(() => ""),
   classifyFailoverReason: vi.fn(() => null),
   extractObservedOverflowTokenCount: vi.fn((msg?: string) => {
-    const match = msg?.match(
-      /prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i,
-    );
+    const match = msg?.match(/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i);
     return match?.[1] ? Number(match[1].replaceAll(",", "")) : undefined;
   }),
   formatAssistantErrorText: vi.fn(() => ""),