From c1f828e7b2aa25fdaa763271cf07c312043723e2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 14 Feb 2026 00:57:42 +0100 Subject: [PATCH] fix: harden native ollama stream routing + options (openclaw#11853) thanks @BrokenFinger98 --- CHANGELOG.md | 1 + src/agents/ollama-stream.test.ts | 59 +++++++++++++++++++- src/agents/ollama-stream.ts | 17 +++++- src/agents/pi-embedded-runner/run/attempt.ts | 9 ++- 4 files changed, 80 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36164bb7da6..f00ddfbf888 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,7 @@ Docs: https://docs.openclaw.ai - Config: keep legacy audio transcription migration strict by rejecting non-string/unsafe command tokens while still migrating valid custom script executables. (#5042) Thanks @shayan919293. - Status/Sessions: stop clamping derived `totalTokens` to context-window size, keep prompt-token snapshots wired through session accounting, and surface context usage as unknown when fresh snapshot data is missing to avoid false 100% reports. (#15114) Thanks @echoVic. - Providers/MiniMax: switch implicit MiniMax API-key provider from `openai-completions` to `anthropic-messages` with the correct Anthropic-compatible base URL, fixing `invalid role: developer (2013)` errors on MiniMax M2.5. (#15275) Thanks @lailoo. +- Ollama/Agents: use resolved model/provider base URLs for native `/api/chat` streaming (including aliased providers), normalize `/v1` endpoints, and forward abort + `maxTokens` stream options for reliable cancellation and token caps. (#11853) Thanks @BrokenFinger98. ## 2026.2.12 diff --git a/src/agents/ollama-stream.test.ts b/src/agents/ollama-stream.test.ts index 3d46eadcc21..3517d01a06d 100644 --- a/src/agents/ollama-stream.test.ts +++ b/src/agents/ollama-stream.test.ts @@ -1,5 +1,6 @@ -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { + createOllamaStreamFn, convertToOllamaMessages, buildAssistantMessage, parseNdjsonStream, @@ -228,3 +229,59 @@ describe("parseNdjsonStream", () => { expect(chunks[2].message.tool_calls).toBeUndefined(); }); }); + +describe("createOllamaStreamFn", () => { + it("normalizes /v1 baseUrl and maps maxTokens + signal", async () => { + const originalFetch = globalThis.fetch; + const fetchMock = vi.fn(async () => { + const payload = [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}', + '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}', + ].join("\n"); + return new Response(`${payload}\n`, { + status: 200, + headers: { "Content-Type": "application/x-ndjson" }, + }); + }); + globalThis.fetch = fetchMock as unknown as typeof fetch; + + try { + const streamFn = createOllamaStreamFn("http://ollama-host:11434/v1/"); + const signal = new AbortController().signal; + const stream = streamFn( + { + id: "qwen3:32b", + api: "ollama", + provider: "custom-ollama", + contextWindow: 131072, + } as unknown as Parameters[0], + { + messages: [{ role: "user", content: "hello" }], + } as unknown as Parameters[1], + { + maxTokens: 123, + signal, + } as unknown as Parameters[2], + ); + + const events = []; + for await (const event of stream) { + events.push(event); + } + expect(events.at(-1)?.type).toBe("done"); + + expect(fetchMock).toHaveBeenCalledTimes(1); + const [url, requestInit] = fetchMock.mock.calls[0] as [string, RequestInit]; + expect(url).toBe("http://ollama-host:11434/api/chat"); + expect(requestInit.signal).toBe(signal); + + const requestBody = JSON.parse(String(requestInit.body)) as { + options: { num_ctx?: number; num_predict?: number }; + }; + expect(requestBody.options.num_ctx).toBe(131072); + expect(requestBody.options.num_predict).toBe(123); + } finally { + globalThis.fetch = originalFetch; + } + }); +}); diff --git a/src/agents/ollama-stream.ts b/src/agents/ollama-stream.ts index 74ed20751e5..76029e67cea 100644 --- a/src/agents/ollama-stream.ts +++ b/src/agents/ollama-stream.ts @@ -10,6 +10,8 @@ import type { import { createAssistantMessageEventStream } from "@mariozechner/pi-ai"; import { randomUUID } from "node:crypto"; +export const OLLAMA_NATIVE_BASE_URL = "http://127.0.0.1:11434"; + // ── Ollama /api/chat request types ────────────────────────────────────────── interface OllamaChatRequest { @@ -273,8 +275,15 @@ export async function* parseNdjsonStream( // ── Main StreamFn factory ─────────────────────────────────────────────────── +function resolveOllamaChatUrl(baseUrl: string): string { + const trimmed = baseUrl.trim().replace(/\/+$/, ""); + const normalizedBase = trimmed.replace(/\/v1$/i, ""); + const apiBase = normalizedBase || OLLAMA_NATIVE_BASE_URL; + return `${apiBase}/api/chat`; +} + export function createOllamaStreamFn(baseUrl: string): StreamFn { - const chatUrl = `${baseUrl.replace(/\/+$/, "")}/api/chat`; + const chatUrl = resolveOllamaChatUrl(baseUrl); return (model, context, options) => { const stream = createAssistantMessageEventStream(); @@ -294,6 +303,9 @@ export function createOllamaStreamFn(baseUrl: string): StreamFn { if (typeof options?.temperature === "number") { ollamaOptions.temperature = options.temperature; } + if (typeof options?.maxTokens === "number") { + ollamaOptions.num_predict = options.maxTokens; + } const body: OllamaChatRequest = { model: model.id, @@ -315,6 +327,7 @@ export function createOllamaStreamFn(baseUrl: string): StreamFn { method: "POST", headers, body: JSON.stringify(body), + signal: options?.signal, }); if (!response.ok) { @@ -404,5 +417,3 @@ export function createOllamaStreamFn(baseUrl: string): StreamFn { return stream; }; } - -export const OLLAMA_NATIVE_BASE_URL = "http://127.0.0.1:11434"; diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index bc869c9c3bf..7b91249a4bb 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -588,8 +588,13 @@ export async function runEmbeddedAttempt( // Ollama native API: bypass SDK's streamSimple and use direct /api/chat calls // for reliable streaming + tool calling support (#11828). if (params.model.api === "ollama") { - const providerConfig = params.config?.models?.providers?.ollama; - const ollamaBaseUrl = providerConfig?.baseUrl ?? OLLAMA_NATIVE_BASE_URL; + // Use the resolved model baseUrl first so custom provider aliases work. + const providerConfig = params.config?.models?.providers?.[params.model.provider]; + const modelBaseUrl = + typeof params.model.baseUrl === "string" ? params.model.baseUrl.trim() : ""; + const providerBaseUrl = + typeof providerConfig?.baseUrl === "string" ? providerConfig.baseUrl.trim() : ""; + const ollamaBaseUrl = modelBaseUrl || providerBaseUrl || OLLAMA_NATIVE_BASE_URL; activeSession.agent.streamFn = createOllamaStreamFn(ollamaBaseUrl); } else { // Force a stable streamFn reference so vitest can reliably mock @mariozechner/pi-ai.