From 3070fafec14867a305f6ac729927b41cc9af4be3 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 6 Mar 2026 19:31:07 -0500 Subject: [PATCH] fix(venice): switch default model to kimi-k2-5 (#38423) * Docs: refresh Venice default model guidance * Venice: switch default model to Kimi K2.5 * Changelog: credit Venice default refresh --- CHANGELOG.md | 1 + docs/providers/venice.md | 141 +++++++++++++---------- src/agents/venice-models.ts | 2 +- src/commands/onboard-auth.config-core.ts | 2 +- 4 files changed, 81 insertions(+), 65 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40b9027f7f9..993e6ffe3a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -216,6 +216,7 @@ Docs: https://docs.openclaw.ai - Feishu/media downloads: drop invalid timeout fields from SDK method calls now that client-level `httpTimeoutMs` applies to requests. (#38267) Thanks @ant1eicher and @thewilloftheshadow. - PI embedded runner/Feishu docs: propagate sender identity into embedded attempts so Feishu doc auto-grant restores requester access for embedded-runner executions. (#32915) thanks @cszhouwei. - Agents/usage normalization: normalize missing or partial assistant usage snapshots before compaction accounting so `openclaw agent --json` no longer crashes when provider payloads omit `totalTokens` or related usage fields. (#34977) thanks @sp-hk2ldn. +- Venice/default model refresh: switch the built-in Venice default to `kimi-k2-5`, update onboarding aliasing, and refresh Venice provider docs/recommendations to match the current private and anonymized catalog. (from #12964) Fixes #20156. Thanks @sabrinaaquino and @vincentkoc. ## 2026.3.2 diff --git a/docs/providers/venice.md b/docs/providers/venice.md index 6517e9909b2..520cf22d82b 100644 --- a/docs/providers/venice.md +++ b/docs/providers/venice.md @@ -23,16 +23,16 @@ Venice AI provides privacy-focused AI inference with support for uncensored mode Venice offers two privacy levels — understanding this is key to choosing your model: -| Mode | Description | Models | -| -------------- | -------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------- | -| **Private** | Fully private. Prompts/responses are **never stored or logged**. Ephemeral. | Llama, Qwen, DeepSeek, Venice Uncensored, etc. | -| **Anonymized** | Proxied through Venice with metadata stripped. The underlying provider (OpenAI, Anthropic) sees anonymized requests. | Claude, GPT, Gemini, Grok, Kimi, MiniMax | +| Mode | Description | Models | +| -------------- | --------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------- | +| **Private** | Fully private. Prompts/responses are **never stored or logged**. Ephemeral. | Llama, Qwen, DeepSeek, Kimi, MiniMax, Venice Uncensored, etc. | +| **Anonymized** | Proxied through Venice with metadata stripped. The underlying provider (OpenAI, Anthropic, Google, xAI) sees anonymized requests. | Claude, GPT, Gemini, Grok | ## Features - **Privacy-focused**: Choose between "private" (fully private) and "anonymized" (proxied) modes - **Uncensored models**: Access to models without content restrictions -- **Major model access**: Use Claude, GPT-5.2, Gemini, Grok via Venice's anonymized proxy +- **Major model access**: Use Claude, GPT, Gemini, and Grok via Venice's anonymized proxy - **OpenAI-compatible API**: Standard `/v1` endpoints for easy integration - **Streaming**: ✅ Supported on all models - **Function calling**: ✅ Supported on select models (check model capabilities) @@ -79,23 +79,23 @@ openclaw onboard --non-interactive \ ### 3. Verify Setup ```bash -openclaw agent --model venice/llama-3.3-70b --message "Hello, are you working?" +openclaw agent --model venice/kimi-k2-5 --message "Hello, are you working?" ``` ## Model Selection After setup, OpenClaw shows all available Venice models. Pick based on your needs: -- **Default model**: `venice/llama-3.3-70b` for private, balanced performance. -- **High-capability option**: `venice/claude-opus-45` for hard jobs. +- **Default model**: `venice/kimi-k2-5` for strong private reasoning plus vision. +- **High-capability option**: `venice/claude-opus-4-6` for the strongest anonymized Venice path. - **Privacy**: Choose "private" models for fully private inference. - **Capability**: Choose "anonymized" models to access Claude, GPT, Gemini via Venice's proxy. Change your default model anytime: ```bash -openclaw models set venice/claude-opus-45 -openclaw models set venice/llama-3.3-70b +openclaw models set venice/kimi-k2-5 +openclaw models set venice/claude-opus-4-6 ``` List all available models: @@ -112,53 +112,68 @@ openclaw models list | grep venice ## Which Model Should I Use? -| Use Case | Recommended Model | Why | -| ---------------------------- | -------------------------------- | ----------------------------------- | -| **General chat** | `llama-3.3-70b` | Good all-around, fully private | -| **High-capability option** | `claude-opus-45` | Higher quality for hard tasks | -| **Privacy + Claude quality** | `claude-opus-45` | Best reasoning via anonymized proxy | -| **Coding** | `qwen3-coder-480b-a35b-instruct` | Code-optimized, 262k context | -| **Vision tasks** | `qwen3-vl-235b-a22b` | Best private vision model | -| **Uncensored** | `venice-uncensored` | No content restrictions | -| **Fast + cheap** | `qwen3-4b` | Lightweight, still capable | -| **Complex reasoning** | `deepseek-v3.2` | Strong reasoning, private | +| Use Case | Recommended Model | Why | +| -------------------------- | -------------------------------- | -------------------------------------------- | +| **General chat (default)** | `kimi-k2-5` | Strong private reasoning plus vision | +| **Best overall quality** | `claude-opus-4-6` | Strongest anonymized Venice option | +| **Privacy + coding** | `qwen3-coder-480b-a35b-instruct` | Private coding model with large context | +| **Private vision** | `kimi-k2-5` | Vision support without leaving private mode | +| **Fast + cheap** | `qwen3-4b` | Lightweight reasoning model | +| **Complex private tasks** | `deepseek-v3.2` | Strong reasoning, but no Venice tool support | +| **Uncensored** | `venice-uncensored` | No content restrictions | -## Available Models (25 Total) +## Available Models (41 Total) -### Private Models (15) — Fully Private, No Logging +### Private Models (26) — Fully Private, No Logging -| Model ID | Name | Context (tokens) | Features | -| -------------------------------- | ----------------------- | ---------------- | ----------------------- | -| `llama-3.3-70b` | Llama 3.3 70B | 131k | General | -| `llama-3.2-3b` | Llama 3.2 3B | 131k | Fast, lightweight | -| `hermes-3-llama-3.1-405b` | Hermes 3 Llama 3.1 405B | 131k | Complex tasks | -| `qwen3-235b-a22b-thinking-2507` | Qwen3 235B Thinking | 131k | Reasoning | -| `qwen3-235b-a22b-instruct-2507` | Qwen3 235B Instruct | 131k | General | -| `qwen3-coder-480b-a35b-instruct` | Qwen3 Coder 480B | 262k | Code | -| `qwen3-next-80b` | Qwen3 Next 80B | 262k | General | -| `qwen3-vl-235b-a22b` | Qwen3 VL 235B | 262k | Vision | -| `qwen3-4b` | Venice Small (Qwen3 4B) | 32k | Fast, reasoning | -| `deepseek-v3.2` | DeepSeek V3.2 | 163k | Reasoning | -| `venice-uncensored` | Venice Uncensored | 32k | Uncensored | -| `mistral-31-24b` | Venice Medium (Mistral) | 131k | Vision | -| `google-gemma-3-27b-it` | Gemma 3 27B Instruct | 202k | Vision | -| `openai-gpt-oss-120b` | OpenAI GPT OSS 120B | 131k | General | -| `zai-org-glm-4.7` | GLM 4.7 | 202k | Reasoning, multilingual | +| Model ID | Name | Context | Features | +| -------------------------------------- | ----------------------------------- | ------- | -------------------------- | +| `kimi-k2-5` | Kimi K2.5 | 256k | Default, reasoning, vision | +| `kimi-k2-thinking` | Kimi K2 Thinking | 256k | Reasoning | +| `llama-3.3-70b` | Llama 3.3 70B | 128k | General | +| `llama-3.2-3b` | Llama 3.2 3B | 128k | General | +| `hermes-3-llama-3.1-405b` | Hermes 3 Llama 3.1 405B | 128k | General, tools disabled | +| `qwen3-235b-a22b-thinking-2507` | Qwen3 235B Thinking | 128k | Reasoning | +| `qwen3-235b-a22b-instruct-2507` | Qwen3 235B Instruct | 128k | General | +| `qwen3-coder-480b-a35b-instruct` | Qwen3 Coder 480B | 256k | Coding | +| `qwen3-coder-480b-a35b-instruct-turbo` | Qwen3 Coder 480B Turbo | 256k | Coding | +| `qwen3-5-35b-a3b` | Qwen3.5 35B A3B | 256k | Reasoning, vision | +| `qwen3-next-80b` | Qwen3 Next 80B | 256k | General | +| `qwen3-vl-235b-a22b` | Qwen3 VL 235B (Vision) | 256k | Vision | +| `qwen3-4b` | Venice Small (Qwen3 4B) | 32k | Fast, reasoning | +| `deepseek-v3.2` | DeepSeek V3.2 | 160k | Reasoning, tools disabled | +| `venice-uncensored` | Venice Uncensored (Dolphin-Mistral) | 32k | Uncensored, tools disabled | +| `mistral-31-24b` | Venice Medium (Mistral) | 128k | Vision | +| `google-gemma-3-27b-it` | Google Gemma 3 27B Instruct | 198k | Vision | +| `openai-gpt-oss-120b` | OpenAI GPT OSS 120B | 128k | General | +| `nvidia-nemotron-3-nano-30b-a3b` | NVIDIA Nemotron 3 Nano 30B | 128k | General | +| `olafangensan-glm-4.7-flash-heretic` | GLM 4.7 Flash Heretic | 128k | Reasoning | +| `zai-org-glm-4.6` | GLM 4.6 | 198k | General | +| `zai-org-glm-4.7` | GLM 4.7 | 198k | Reasoning | +| `zai-org-glm-4.7-flash` | GLM 4.7 Flash | 128k | Reasoning | +| `zai-org-glm-5` | GLM 5 | 198k | Reasoning | +| `minimax-m21` | MiniMax M2.1 | 198k | Reasoning | +| `minimax-m25` | MiniMax M2.5 | 198k | Reasoning | -### Anonymized Models (10) — Via Venice Proxy +### Anonymized Models (15) — Via Venice Proxy -| Model ID | Original | Context (tokens) | Features | -| ------------------------ | ----------------- | ---------------- | ----------------- | -| `claude-opus-45` | Claude Opus 4.5 | 202k | Reasoning, vision | -| `claude-sonnet-45` | Claude Sonnet 4.5 | 202k | Reasoning, vision | -| `openai-gpt-52` | GPT-5.2 | 262k | Reasoning | -| `openai-gpt-52-codex` | GPT-5.2 Codex | 262k | Reasoning, vision | -| `gemini-3-pro-preview` | Gemini 3 Pro | 202k | Reasoning, vision | -| `gemini-3-flash-preview` | Gemini 3 Flash | 262k | Reasoning, vision | -| `grok-41-fast` | Grok 4.1 Fast | 262k | Reasoning, vision | -| `grok-code-fast-1` | Grok Code Fast 1 | 262k | Reasoning, code | -| `kimi-k2-thinking` | Kimi K2 Thinking | 262k | Reasoning | -| `minimax-m21` | MiniMax M2.5 | 202k | Reasoning | +| Model ID | Name | Context | Features | +| ------------------------------- | ------------------------------ | ------- | ------------------------- | +| `claude-opus-4-6` | Claude Opus 4.6 (via Venice) | 1M | Reasoning, vision | +| `claude-opus-4-5` | Claude Opus 4.5 (via Venice) | 198k | Reasoning, vision | +| `claude-sonnet-4-6` | Claude Sonnet 4.6 (via Venice) | 1M | Reasoning, vision | +| `claude-sonnet-4-5` | Claude Sonnet 4.5 (via Venice) | 198k | Reasoning, vision | +| `openai-gpt-54` | GPT-5.4 (via Venice) | 1M | Reasoning, vision | +| `openai-gpt-53-codex` | GPT-5.3 Codex (via Venice) | 400k | Reasoning, vision, coding | +| `openai-gpt-52` | GPT-5.2 (via Venice) | 256k | Reasoning | +| `openai-gpt-52-codex` | GPT-5.2 Codex (via Venice) | 256k | Reasoning, vision, coding | +| `openai-gpt-4o-2024-11-20` | GPT-4o (via Venice) | 128k | Vision | +| `openai-gpt-4o-mini-2024-07-18` | GPT-4o Mini (via Venice) | 128k | Vision | +| `gemini-3-1-pro-preview` | Gemini 3.1 Pro (via Venice) | 1M | Reasoning, vision | +| `gemini-3-pro-preview` | Gemini 3 Pro (via Venice) | 198k | Reasoning, vision | +| `gemini-3-flash-preview` | Gemini 3 Flash (via Venice) | 256k | Reasoning, vision | +| `grok-41-fast` | Grok 4.1 Fast (via Venice) | 1M | Reasoning, vision | +| `grok-code-fast-1` | Grok Code Fast 1 (via Venice) | 256k | Reasoning, coding | ## Model Discovery @@ -194,11 +209,11 @@ Venice uses a credit-based system. Check [venice.ai/pricing](https://venice.ai/p ## Usage Examples ```bash -# Use default private model -openclaw agent --model venice/llama-3.3-70b --message "Quick health check" +# Use the default private model +openclaw agent --model venice/kimi-k2-5 --message "Quick health check" -# Use Claude via Venice (anonymized) -openclaw agent --model venice/claude-opus-45 --message "Summarize this task" +# Use Claude Opus via Venice (anonymized) +openclaw agent --model venice/claude-opus-4-6 --message "Summarize this task" # Use uncensored model openclaw agent --model venice/venice-uncensored --message "Draft options" @@ -234,7 +249,7 @@ Venice API is at `https://api.venice.ai/api/v1`. Ensure your network allows HTTP ```json5 { env: { VENICE_API_KEY: "vapi_..." }, - agents: { defaults: { model: { primary: "venice/llama-3.3-70b" } } }, + agents: { defaults: { model: { primary: "venice/kimi-k2-5" } } }, models: { mode: "merge", providers: { @@ -244,13 +259,13 @@ Venice API is at `https://api.venice.ai/api/v1`. Ensure your network allows HTTP api: "openai-completions", models: [ { - id: "llama-3.3-70b", - name: "Llama 3.3 70B", - reasoning: false, - input: ["text"], + id: "kimi-k2-5", + name: "Kimi K2.5", + reasoning: true, + input: ["text", "image"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 256000, + maxTokens: 65536, }, ], }, diff --git a/src/agents/venice-models.ts b/src/agents/venice-models.ts index 47dcf4d2f4d..2e6dae6bac9 100644 --- a/src/agents/venice-models.ts +++ b/src/agents/venice-models.ts @@ -5,7 +5,7 @@ import { createSubsystemLogger } from "../logging/subsystem.js"; const log = createSubsystemLogger("venice-models"); export const VENICE_BASE_URL = "https://api.venice.ai/api/v1"; -export const VENICE_DEFAULT_MODEL_ID = "llama-3.3-70b"; +export const VENICE_DEFAULT_MODEL_ID = "kimi-k2-5"; export const VENICE_DEFAULT_MODEL_REF = `venice/${VENICE_DEFAULT_MODEL_ID}`; // Venice uses credit-based pricing, not per-token costs. diff --git a/src/commands/onboard-auth.config-core.ts b/src/commands/onboard-auth.config-core.ts index 18d106c7d7f..103343d5914 100644 --- a/src/commands/onboard-auth.config-core.ts +++ b/src/commands/onboard-auth.config-core.ts @@ -305,7 +305,7 @@ export function applyVeniceProviderConfig(cfg: OpenClawConfig): OpenClawConfig { const models = { ...cfg.agents?.defaults?.models }; models[VENICE_DEFAULT_MODEL_REF] = { ...models[VENICE_DEFAULT_MODEL_REF], - alias: models[VENICE_DEFAULT_MODEL_REF]?.alias ?? "Llama 3.3 70B", + alias: models[VENICE_DEFAULT_MODEL_REF]?.alias ?? "Kimi K2.5", }; const veniceModels = VENICE_MODEL_CATALOG.map(buildVeniceModelDefinition);