From b202ac2ad1a257e560ba79a3034597b2d7c38116 Mon Sep 17 00:00:00 2001
From: Andrew Demczuk <andrew.demczuk@gmail.com>
Date: Sun, 15 Mar 2026 00:34:04 +0100
Subject: [PATCH] revert: restore supportsUsageInStreaming=false default for
 non-native endpoints

Reverts #46500. Breaks Ollama, LM Studio, TGI, LocalAI, Mistral API -
these backends reject stream_options with 400/422.

This reverts commit bb06dc7cc9e71fbac29d7888d64323db2acec7ca.
---
 CHANGELOG.md                    |  1 -
 src/agents/model-compat.test.ts | 27 ++++++++++++++-------------
 src/agents/model-compat.ts      | 29 +++++++++++++----------------
 3 files changed, 27 insertions(+), 30 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 375cac0aa5c..a0da6d6e8cc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,7 +20,6 @@ Docs: https://docs.openclaw.ai
 
 - Slack/interactive replies: preserve `channelData.slack.blocks` through live DM delivery and preview-finalized edits so Block Kit button and select directives render instead of falling back to raw text. Thanks @vincentkoc.
 - CI/channel test routing: move the built-in channel suites into `test:channels` and keep them out of `test:extensions`, so extension CI no longer fails after the channel migration while targeted test routing still sends Slack, Signal, and iMessage suites to the right lane. (#46066) Thanks @scoootscooob.
-- Agents/usage tracking: stop forcing `supportsUsageInStreaming: false` on non-native openai-completions endpoints so providers like DashScope, DeepSeek, and other OpenAI-compatible backends report token usage and cost instead of showing all zeros. (#46142)
 - Node/startup: remove leftover debug `console.log("node host PATH: ...")` that printed the resolved PATH on every `openclaw node run` invocation. (#46411)
 - Control UI/dashboard: preserve structured gateway shutdown reasons across restart disconnects so config-triggered restarts no longer fall back to `disconnected (1006): no reason`. (#46532) Thanks @vincentkoc.
 - Feishu/topic threads: fetch full thread context, including prior bot replies, when starting a topic-thread session so follow-up turns in Feishu topics keep the right conversation state. Thanks @Coobiw.
diff --git a/src/agents/model-compat.test.ts b/src/agents/model-compat.test.ts
index 3ae2e1b99fe..56b9c16203c 100644
--- a/src/agents/model-compat.test.ts
+++ b/src/agents/model-compat.test.ts
@@ -86,6 +86,14 @@ function expectSupportsDeveloperRoleForcedOff(overrides?: Partial<Model<Api>>):
   const normalized = normalizeModelCompat(model as Model<Api>);
   expect(supportsDeveloperRole(normalized)).toBe(false);
 }
+
+function expectSupportsUsageInStreamingForcedOff(overrides?: Partial<Model<Api>>): void {
+  const model = { ...baseModel(), ...overrides };
+  delete (model as { compat?: unknown }).compat;
+  const normalized = normalizeModelCompat(model as Model<Api>);
+  expect(supportsUsageInStreaming(normalized)).toBe(false);
+}
+
 function expectResolvedForwardCompat(
   model: Model<Api> | undefined,
   expected: { provider: string; id: string },
@@ -211,16 +219,11 @@ describe("normalizeModelCompat", () => {
     });
   });
 
-  it("leaves supportsUsageInStreaming at default for generic custom openai-completions provider", () => {
-    const model = {
-      ...baseModel(),
+  it("forces supportsUsageInStreaming off for generic custom openai-completions provider", () => {
+    expectSupportsUsageInStreamingForcedOff({
       provider: "custom-cpa",
       baseUrl: "https://cpa.example.com/v1",
-    };
-    delete (model as { compat?: unknown }).compat;
-    const normalized = normalizeModelCompat(model as Model<Api>);
-    // supportsUsageInStreaming is no longer forced off — pi-ai's default (true) applies
-    expect(supportsUsageInStreaming(normalized)).toBeUndefined();
+    });
   });
 
   it("forces supportsDeveloperRole off for Qwen proxy via openai-completions", () => {
@@ -270,7 +273,7 @@ describe("normalizeModelCompat", () => {
     expect(supportsUsageInStreaming(normalized)).toBe(true);
   });
 
-  it("forces supportsDeveloperRole off but leaves supportsUsageInStreaming unset for non-native endpoints", () => {
+  it("still forces flags off when not explicitly set by user", () => {
     const model = {
       ...baseModel(),
       provider: "custom-cpa",
@@ -279,8 +282,7 @@ describe("normalizeModelCompat", () => {
     delete (model as { compat?: unknown }).compat;
     const normalized = normalizeModelCompat(model);
     expect(supportsDeveloperRole(normalized)).toBe(false);
-    // supportsUsageInStreaming is no longer forced off — pi-ai default applies
-    expect(supportsUsageInStreaming(normalized)).toBeUndefined();
+    expect(supportsUsageInStreaming(normalized)).toBe(false);
   });
 
   it("does not mutate caller model when forcing supportsDeveloperRole off", () => {
@@ -295,8 +297,7 @@ describe("normalizeModelCompat", () => {
     expect(supportsDeveloperRole(model)).toBeUndefined();
     expect(supportsUsageInStreaming(model)).toBeUndefined();
     expect(supportsDeveloperRole(normalized)).toBe(false);
-    // supportsUsageInStreaming is not set by normalizeModelCompat — pi-ai default applies
-    expect(supportsUsageInStreaming(normalized)).toBeUndefined();
+    expect(supportsUsageInStreaming(normalized)).toBe(false);
   });
 
   it("does not override explicit compat false", () => {
diff --git a/src/agents/model-compat.ts b/src/agents/model-compat.ts
index c2837f6b83d..72deb0c655f 100644
--- a/src/agents/model-compat.ts
+++ b/src/agents/model-compat.ts
@@ -52,16 +52,11 @@ export function normalizeModelCompat(model: Model<Api>): Model<Api> {
     return model;
   }
 
-  // The `developer` role is an OpenAI-native behavior that most compatible
-  // backends reject. Force it off for non-native endpoints unless the user
-  // has explicitly opted in via their model config.
-  //
-  // `supportsUsageInStreaming` is NOT forced off — most OpenAI-compatible
-  // backends (DashScope, DeepSeek, Groq, Together, etc.) handle
-  // `stream_options: { include_usage: true }` correctly, and disabling it
-  // silently breaks usage/cost tracking for all non-native providers.
-  // Users can still opt out with `compat.supportsUsageInStreaming: false`
-  // if their backend rejects the parameter.
+  // The `developer` role and stream usage chunks are OpenAI-native behaviors.
+  // Many OpenAI-compatible backends reject `developer` and/or emit usage-only
+  // chunks that break strict parsers expecting choices[0]. For non-native
+  // openai-completions endpoints, force both compat flags off — unless the
+  // user has explicitly opted in via their model config.
   const compat = model.compat ?? undefined;
   // When baseUrl is empty the pi-ai library defaults to api.openai.com, so
   // leave compat unchanged and let default native behavior apply.
@@ -70,22 +65,24 @@ export function normalizeModelCompat(model: Model<Api>): Model<Api> {
     return model;
   }
 
-  // Respect explicit user overrides.
+  // Respect explicit user overrides: if the user has set a compat flag to
+  // true in their model definition, they know their endpoint supports it.
   const forcedDeveloperRole = compat?.supportsDeveloperRole === true;
+  const forcedUsageStreaming = compat?.supportsUsageInStreaming === true;
 
-  if (forcedDeveloperRole) {
+  if (forcedDeveloperRole && forcedUsageStreaming) {
     return model;
   }
 
-  // Only force supportsDeveloperRole off. Leave supportsUsageInStreaming
-  // at whatever the user set or pi-ai's default (true).
+  // Return a new object — do not mutate the caller's model reference.
   return {
     ...model,
     compat: compat
       ? {
           ...compat,
-          supportsDeveloperRole: false,
+          supportsDeveloperRole: forcedDeveloperRole || false,
+          supportsUsageInStreaming: forcedUsageStreaming || false,
         }
-      : { supportsDeveloperRole: false },
+      : { supportsDeveloperRole: false, supportsUsageInStreaming: false },
   } as typeof model;
 }