diff --git a/src/agents/tools/image-tool.test.ts b/src/agents/tools/image-tool.test.ts index 149b1aeb9d6..af4402005bb 100644 --- a/src/agents/tools/image-tool.test.ts +++ b/src/agents/tools/image-tool.test.ts @@ -302,6 +302,14 @@ const moonshotProvider = { function installImageUnderstandingProviderStubs(...providers: MediaUnderstandingProvider[]) { imageProviderHarness.setProviders(providers); + __testing.setProviderDepsForTest({ + buildProviderRegistry: (overrides?: Record) => + imageProviderHarness.buildProviderRegistry(overrides), + getMediaUnderstandingProvider: ( + id: string, + registry: Map, + ) => imageProviderHarness.getMediaUnderstandingProvider(id, registry), + }); } function makeModelDefinition(id: string, input: Array<"text" | "image">): ModelDefinitionConfig { @@ -410,6 +418,7 @@ describe("image tool implicit imageModel config", () => { afterEach(() => { imageProviderHarness.reset(); + __testing.setProviderDepsForTest(); }); it("stays disabled without auth when no pairing is possible", async () => { @@ -860,6 +869,7 @@ describe("image tool MiniMax VLM routing", () => { afterEach(() => { imageProviderHarness.reset(); + __testing.setProviderDepsForTest(); }); async function createMinimaxVlmFixture(baseResp: { status_code: number; status_msg: string }) { diff --git a/src/agents/tools/image-tool.ts b/src/agents/tools/image-tool.ts index 0797db4cdc8..c344bc6b1e9 100644 --- a/src/agents/tools/image-tool.ts +++ b/src/agents/tools/image-tool.ts @@ -3,6 +3,7 @@ import type { OpenClawConfig } from "../../config/config.js"; import { getMediaUnderstandingProvider } from "../../media-understanding/provider-registry.js"; import { buildProviderRegistry } from "../../media-understanding/runner.js"; import { loadWebMedia } from "../../media/web-media.js"; +import type { MediaUnderstandingProvider } from "../../plugin-sdk/media-understanding.js"; import { resolveUserPath } from "../../utils.js"; import { isMinimaxVlmProvider } from "../minimax-vlm.js"; import { @@ -38,10 +39,24 @@ const ANTHROPIC_IMAGE_PRIMARY = "anthropic/claude-opus-4-6"; const ANTHROPIC_IMAGE_FALLBACK = "anthropic/claude-opus-4-5"; const DEFAULT_MAX_IMAGES = 20; +const imageToolProviderDeps = { + buildProviderRegistry, + getMediaUnderstandingProvider, +}; + export const __testing = { decodeDataUrl, coerceImageAssistantText, resolveImageToolMaxTokens, + setProviderDepsForTest(overrides?: { + buildProviderRegistry?: typeof buildProviderRegistry; + getMediaUnderstandingProvider?: typeof getMediaUnderstandingProvider; + }) { + imageToolProviderDeps.buildProviderRegistry = + overrides?.buildProviderRegistry ?? buildProviderRegistry; + imageToolProviderDeps.getMediaUnderstandingProvider = + overrides?.getMediaUnderstandingProvider ?? getMediaUnderstandingProvider; + }, } as const; function resolveImageToolMaxTokens(modelMaxTokens: number | undefined, requestedMaxTokens = 4096) { @@ -139,13 +154,16 @@ async function runImagePrompt(params: { }> { const effectiveCfg = applyImageModelConfigDefaults(params.cfg, params.imageModelConfig); const providerCfg: OpenClawConfig = effectiveCfg ?? {}; - const providerRegistry = buildProviderRegistry(undefined, providerCfg); + const providerRegistry = imageToolProviderDeps.buildProviderRegistry(undefined, providerCfg); const result = await runWithImageModelFallback({ cfg: effectiveCfg, modelOverride: params.modelOverride, run: async (provider, modelId) => { - const imageProvider = getMediaUnderstandingProvider(provider, providerRegistry); + const imageProvider = imageToolProviderDeps.getMediaUnderstandingProvider( + provider, + providerRegistry as Map, + ); if (!imageProvider) { throw new Error(`No media-understanding provider registered for ${provider}`); }