import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared"; import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env"; const log = createSubsystemLogger("chutes-models"); export const CHUTES_BASE_URL = "https://llm.chutes.ai/v1"; export const CHUTES_DEFAULT_MODEL_ID = "zai-org/GLM-4.7-TEE"; export const CHUTES_DEFAULT_MODEL_REF = `chutes/${CHUTES_DEFAULT_MODEL_ID}`; const CHUTES_DEFAULT_CONTEXT_WINDOW = 128000; const CHUTES_DEFAULT_MAX_TOKENS = 4096; export const CHUTES_MODEL_CATALOG: ModelDefinitionConfig[] = [ { id: "Qwen/Qwen3-32B", name: "Qwen/Qwen3-32B", reasoning: true, input: ["text"], contextWindow: 40960, maxTokens: 40960, cost: { input: 0.08, output: 0.24, cacheRead: 0, cacheWrite: 0 }, }, { id: "unsloth/Mistral-Nemo-Instruct-2407", name: "unsloth/Mistral-Nemo-Instruct-2407", reasoning: false, input: ["text"], contextWindow: 131072, maxTokens: 131072, cost: { input: 0.02, output: 0.04, cacheRead: 0, cacheWrite: 0 }, }, { id: "deepseek-ai/DeepSeek-V3-0324-TEE", name: "deepseek-ai/DeepSeek-V3-0324-TEE", reasoning: true, input: ["text"], contextWindow: 163840, maxTokens: 65536, cost: { input: 0.25, output: 1, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen3-235B-A22B-Instruct-2507-TEE", name: "Qwen/Qwen3-235B-A22B-Instruct-2507-TEE", reasoning: true, input: ["text"], contextWindow: 262144, maxTokens: 65536, cost: { input: 0.08, output: 0.55, cacheRead: 0, cacheWrite: 0 }, }, { id: "openai/gpt-oss-120b-TEE", name: "openai/gpt-oss-120b-TEE", reasoning: true, input: ["text"], contextWindow: 131072, maxTokens: 65536, cost: { input: 0.05, output: 0.45, cacheRead: 0, cacheWrite: 0 }, }, { id: "chutesai/Mistral-Small-3.1-24B-Instruct-2503", name: "chutesai/Mistral-Small-3.1-24B-Instruct-2503", reasoning: false, input: ["text", "image"], contextWindow: 131072, maxTokens: 131072, cost: { input: 0.03, output: 0.11, cacheRead: 0, cacheWrite: 0 }, }, { id: "deepseek-ai/DeepSeek-V3.2-TEE", name: "deepseek-ai/DeepSeek-V3.2-TEE", reasoning: true, input: ["text"], contextWindow: 131072, maxTokens: 65536, cost: { input: 0.28, output: 0.42, cacheRead: 0, cacheWrite: 0 }, }, { id: "zai-org/GLM-4.7-TEE", name: "zai-org/GLM-4.7-TEE", reasoning: true, input: ["text"], contextWindow: 202752, maxTokens: 65535, cost: { input: 0.4, output: 2, cacheRead: 0, cacheWrite: 0 }, }, { id: "moonshotai/Kimi-K2.5-TEE", name: "moonshotai/Kimi-K2.5-TEE", reasoning: true, input: ["text", "image"], contextWindow: 262144, maxTokens: 65535, cost: { input: 0.45, output: 2.2, cacheRead: 0, cacheWrite: 0 }, }, { id: "unsloth/gemma-3-27b-it", name: "unsloth/gemma-3-27b-it", reasoning: false, input: ["text", "image"], contextWindow: 128000, maxTokens: 65536, cost: { input: 0.04, output: 0.15, cacheRead: 0, cacheWrite: 0 }, }, { id: "XiaomiMiMo/MiMo-V2-Flash-TEE", name: "XiaomiMiMo/MiMo-V2-Flash-TEE", reasoning: true, input: ["text"], contextWindow: 262144, maxTokens: 65536, cost: { input: 0.09, output: 0.29, cacheRead: 0, cacheWrite: 0 }, }, { id: "chutesai/Mistral-Small-3.2-24B-Instruct-2506", name: "chutesai/Mistral-Small-3.2-24B-Instruct-2506", reasoning: false, input: ["text", "image"], contextWindow: 131072, maxTokens: 131072, cost: { input: 0.06, output: 0.18, cacheRead: 0, cacheWrite: 0 }, }, { id: "deepseek-ai/DeepSeek-R1-0528-TEE", name: "deepseek-ai/DeepSeek-R1-0528-TEE", reasoning: true, input: ["text"], contextWindow: 163840, maxTokens: 65536, cost: { input: 0.45, output: 2.15, cacheRead: 0, cacheWrite: 0 }, }, { id: "zai-org/GLM-5-TEE", name: "zai-org/GLM-5-TEE", reasoning: true, input: ["text"], contextWindow: 202752, maxTokens: 65535, cost: { input: 0.95, output: 3.15, cacheRead: 0, cacheWrite: 0 }, }, { id: "deepseek-ai/DeepSeek-V3.1-TEE", name: "deepseek-ai/DeepSeek-V3.1-TEE", reasoning: true, input: ["text"], contextWindow: 163840, maxTokens: 65536, cost: { input: 0.2, output: 0.8, cacheRead: 0, cacheWrite: 0 }, }, { id: "deepseek-ai/DeepSeek-V3.1-Terminus-TEE", name: "deepseek-ai/DeepSeek-V3.1-Terminus-TEE", reasoning: true, input: ["text"], contextWindow: 163840, maxTokens: 65536, cost: { input: 0.23, output: 0.9, cacheRead: 0, cacheWrite: 0 }, }, { id: "unsloth/gemma-3-4b-it", name: "unsloth/gemma-3-4b-it", reasoning: false, input: ["text", "image"], contextWindow: 96000, maxTokens: 96000, cost: { input: 0.01, output: 0.03, cacheRead: 0, cacheWrite: 0 }, }, { id: "MiniMaxAI/MiniMax-M2.5-TEE", name: "MiniMaxAI/MiniMax-M2.5-TEE", reasoning: true, input: ["text"], contextWindow: 196608, maxTokens: 65536, cost: { input: 0.3, output: 1.1, cacheRead: 0, cacheWrite: 0 }, }, { id: "tngtech/DeepSeek-TNG-R1T2-Chimera", name: "tngtech/DeepSeek-TNG-R1T2-Chimera", reasoning: true, input: ["text"], contextWindow: 163840, maxTokens: 163840, cost: { input: 0.25, output: 0.85, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen3-Coder-Next-TEE", name: "Qwen/Qwen3-Coder-Next-TEE", reasoning: true, input: ["text"], contextWindow: 262144, maxTokens: 65536, cost: { input: 0.12, output: 0.75, cacheRead: 0, cacheWrite: 0 }, }, { id: "NousResearch/Hermes-4-405B-FP8-TEE", name: "NousResearch/Hermes-4-405B-FP8-TEE", reasoning: true, input: ["text"], contextWindow: 131072, maxTokens: 65536, cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 }, }, { id: "deepseek-ai/DeepSeek-V3", name: "deepseek-ai/DeepSeek-V3", reasoning: false, input: ["text"], contextWindow: 163840, maxTokens: 163840, cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 }, }, { id: "openai/gpt-oss-20b", name: "openai/gpt-oss-20b", reasoning: true, input: ["text"], contextWindow: 131072, maxTokens: 131072, cost: { input: 0.04, output: 0.15, cacheRead: 0, cacheWrite: 0 }, }, { id: "unsloth/Llama-3.2-3B-Instruct", name: "unsloth/Llama-3.2-3B-Instruct", reasoning: false, input: ["text"], contextWindow: 128000, maxTokens: 4096, cost: { input: 0.01, output: 0.01, cacheRead: 0, cacheWrite: 0 }, }, { id: "unsloth/Mistral-Small-24B-Instruct-2501", name: "unsloth/Mistral-Small-24B-Instruct-2501", reasoning: false, input: ["text", "image"], contextWindow: 32768, maxTokens: 32768, cost: { input: 0.07, output: 0.3, cacheRead: 0, cacheWrite: 0 }, }, { id: "zai-org/GLM-4.7-FP8", name: "zai-org/GLM-4.7-FP8", reasoning: true, input: ["text"], contextWindow: 202752, maxTokens: 65535, cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 }, }, { id: "zai-org/GLM-4.6-TEE", name: "zai-org/GLM-4.6-TEE", reasoning: true, input: ["text"], contextWindow: 202752, maxTokens: 65536, cost: { input: 0.4, output: 1.7, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen3.5-397B-A17B-TEE", name: "Qwen/Qwen3.5-397B-A17B-TEE", reasoning: true, input: ["text", "image"], contextWindow: 262144, maxTokens: 65536, cost: { input: 0.55, output: 3.5, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen2.5-72B-Instruct", name: "Qwen/Qwen2.5-72B-Instruct", reasoning: false, input: ["text"], contextWindow: 32768, maxTokens: 32768, cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 }, }, { id: "NousResearch/DeepHermes-3-Mistral-24B-Preview", name: "NousResearch/DeepHermes-3-Mistral-24B-Preview", reasoning: false, input: ["text"], contextWindow: 32768, maxTokens: 32768, cost: { input: 0.02, output: 0.1, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen3-Next-80B-A3B-Instruct", name: "Qwen/Qwen3-Next-80B-A3B-Instruct", reasoning: false, input: ["text"], contextWindow: 262144, maxTokens: 262144, cost: { input: 0.1, output: 0.8, cacheRead: 0, cacheWrite: 0 }, }, { id: "zai-org/GLM-4.6-FP8", name: "zai-org/GLM-4.6-FP8", reasoning: true, input: ["text"], contextWindow: 202752, maxTokens: 65535, cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen3-235B-A22B-Thinking-2507", name: "Qwen/Qwen3-235B-A22B-Thinking-2507", reasoning: true, input: ["text"], contextWindow: 262144, maxTokens: 262144, cost: { input: 0.11, output: 0.6, cacheRead: 0, cacheWrite: 0 }, }, { id: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", name: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", reasoning: true, input: ["text"], contextWindow: 131072, maxTokens: 131072, cost: { input: 0.03, output: 0.11, cacheRead: 0, cacheWrite: 0 }, }, { id: "tngtech/R1T2-Chimera-Speed", name: "tngtech/R1T2-Chimera-Speed", reasoning: true, input: ["text"], contextWindow: 131072, maxTokens: 65536, cost: { input: 0.22, output: 0.6, cacheRead: 0, cacheWrite: 0 }, }, { id: "zai-org/GLM-4.6V", name: "zai-org/GLM-4.6V", reasoning: true, input: ["text", "image"], contextWindow: 131072, maxTokens: 65536, cost: { input: 0.3, output: 0.9, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen2.5-VL-32B-Instruct", name: "Qwen/Qwen2.5-VL-32B-Instruct", reasoning: false, input: ["text", "image"], contextWindow: 16384, maxTokens: 16384, cost: { input: 0.05, output: 0.22, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen3-VL-235B-A22B-Instruct", name: "Qwen/Qwen3-VL-235B-A22B-Instruct", reasoning: false, input: ["text", "image"], contextWindow: 262144, maxTokens: 262144, cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen3-14B", name: "Qwen/Qwen3-14B", reasoning: true, input: ["text"], contextWindow: 40960, maxTokens: 40960, cost: { input: 0.05, output: 0.22, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen2.5-Coder-32B-Instruct", name: "Qwen/Qwen2.5-Coder-32B-Instruct", reasoning: false, input: ["text"], contextWindow: 32768, maxTokens: 32768, cost: { input: 0.03, output: 0.11, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen3-30B-A3B", name: "Qwen/Qwen3-30B-A3B", reasoning: true, input: ["text"], contextWindow: 40960, maxTokens: 40960, cost: { input: 0.06, output: 0.22, cacheRead: 0, cacheWrite: 0 }, }, { id: "unsloth/gemma-3-12b-it", name: "unsloth/gemma-3-12b-it", reasoning: false, input: ["text", "image"], contextWindow: 131072, maxTokens: 131072, cost: { input: 0.03, output: 0.1, cacheRead: 0, cacheWrite: 0 }, }, { id: "unsloth/Llama-3.2-1B-Instruct", name: "unsloth/Llama-3.2-1B-Instruct", reasoning: false, input: ["text"], contextWindow: 128000, maxTokens: 4096, cost: { input: 0.01, output: 0.01, cacheRead: 0, cacheWrite: 0 }, }, { id: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16-TEE", name: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16-TEE", reasoning: true, input: ["text"], contextWindow: 128000, maxTokens: 4096, cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 }, }, { id: "NousResearch/Hermes-4-14B", name: "NousResearch/Hermes-4-14B", reasoning: true, input: ["text"], contextWindow: 40960, maxTokens: 40960, cost: { input: 0.01, output: 0.05, cacheRead: 0, cacheWrite: 0 }, }, { id: "Qwen/Qwen3Guard-Gen-0.6B", name: "Qwen/Qwen3Guard-Gen-0.6B", reasoning: false, input: ["text"], contextWindow: 128000, maxTokens: 4096, cost: { input: 0.01, output: 0.01, cacheRead: 0, cacheWrite: 0 }, }, { id: "rednote-hilab/dots.ocr", name: "rednote-hilab/dots.ocr", reasoning: false, input: ["text", "image"], contextWindow: 131072, maxTokens: 131072, cost: { input: 0.01, output: 0.01, cacheRead: 0, cacheWrite: 0 }, }, ]; export function buildChutesModelDefinition( model: (typeof CHUTES_MODEL_CATALOG)[number], ): ModelDefinitionConfig { return { ...model, compat: { supportsUsageInStreaming: false, }, }; } interface ChutesModelEntry { id: string; name?: string; supported_features?: string[]; input_modalities?: string[]; context_length?: number; max_output_length?: number; pricing?: { prompt?: number; completion?: number; }; [key: string]: unknown; } interface OpenAIListModelsResponse { data?: ChutesModelEntry[]; } const CACHE_TTL = 5 * 60 * 1000; const CACHE_MAX_ENTRIES = 100; interface CacheEntry { models: ModelDefinitionConfig[]; time: number; } const modelCache = new Map(); function pruneExpiredCacheEntries(now: number = Date.now()): void { for (const [key, entry] of modelCache.entries()) { if (now - entry.time >= CACHE_TTL) { modelCache.delete(key); } } } function cacheAndReturn( tokenKey: string, models: ModelDefinitionConfig[], ): ModelDefinitionConfig[] { const now = Date.now(); pruneExpiredCacheEntries(now); if (!modelCache.has(tokenKey) && modelCache.size >= CACHE_MAX_ENTRIES) { const oldest = modelCache.keys().next(); if (!oldest.done) { modelCache.delete(oldest.value); } } modelCache.set(tokenKey, { models, time: now }); return models; } export async function discoverChutesModels(accessToken?: string): Promise { const trimmedKey = accessToken?.trim() ?? ""; const now = Date.now(); pruneExpiredCacheEntries(now); const cached = modelCache.get(trimmedKey); if (cached) { return cached.models; } if (process.env.NODE_ENV === "test" || process.env.VITEST === "true") { return CHUTES_MODEL_CATALOG.map(buildChutesModelDefinition); } let effectiveKey = trimmedKey; const staticCatalog = () => cacheAndReturn(effectiveKey, CHUTES_MODEL_CATALOG.map(buildChutesModelDefinition)); const headers: Record = {}; if (trimmedKey) { headers.Authorization = `Bearer ${trimmedKey}`; } try { let response = await fetch(`${CHUTES_BASE_URL}/models`, { signal: AbortSignal.timeout(10_000), headers, }); if (response.status === 401 && trimmedKey) { effectiveKey = ""; response = await fetch(`${CHUTES_BASE_URL}/models`, { signal: AbortSignal.timeout(10_000), }); } if (!response.ok) { if (response.status !== 401 && response.status !== 503) { log.warn(`GET /v1/models failed: HTTP ${response.status}, using static catalog`); } return staticCatalog(); } const body = (await response.json()) as OpenAIListModelsResponse; const data = body?.data; if (!Array.isArray(data) || data.length === 0) { log.warn("No models in response, using static catalog"); return staticCatalog(); } const seen = new Set(); const models: ModelDefinitionConfig[] = []; for (const entry of data) { const id = typeof entry?.id === "string" ? entry.id.trim() : ""; if (!id || seen.has(id)) { continue; } seen.add(id); const isReasoning = entry.supported_features?.includes("reasoning") || id.toLowerCase().includes("r1") || id.toLowerCase().includes("thinking") || id.toLowerCase().includes("reason") || id.toLowerCase().includes("tee"); const input: Array<"text" | "image"> = (entry.input_modalities || ["text"]).filter( (i): i is "text" | "image" => i === "text" || i === "image", ); models.push({ id, name: id, reasoning: isReasoning, input, cost: { input: entry.pricing?.prompt || 0, output: entry.pricing?.completion || 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: entry.context_length || CHUTES_DEFAULT_CONTEXT_WINDOW, maxTokens: entry.max_output_length || CHUTES_DEFAULT_MAX_TOKENS, compat: { supportsUsageInStreaming: false, }, }); } return cacheAndReturn( effectiveKey, models.length > 0 ? models : CHUTES_MODEL_CATALOG.map(buildChutesModelDefinition), ); } catch (error) { log.warn(`Discovery failed: ${String(error)}, using static catalog`); return staticCatalog(); } }