fix: restore image-tool generic provider fallback (#54858) (thanks @MonkeyLeeT)

* Image tool: restore generic provider fallback

* Image tool: cover multi-image generic fallback

* test: tighten minimax-portal image fallback coverage

* fix: restore image-tool generic provider fallback (#54858) (thanks @MonkeyLeeT)

---------

Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
Ted Li 2026-03-25 21:37:43 -07:00 committed by GitHub
parent 8efc6e001e
commit 76ff0d9298
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 158 additions and 49 deletions

View File

@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai
- Telegram/pairing: ignore self-authored DM `message` updates so bot-pinned status cards and similar service updates do not trigger bogus pairing requests or re-enter inbound dispatch. (#54530) thanks @huntharo
- iMessage: stop leaking inline `[[reply_to:...]]` tags into delivered text by sending `reply_to` as RPC metadata and stripping stray directive tags from outbound messages. (#39512) Thanks @mvanhorn.
- Agents/embedded replies: surface mid-turn 429 and overload failures when embedded runs end without a user-visible reply, while preserving successful media-only replies that still use legacy `mediaUrl`. (#50930) Thanks @infichen.
- Agents/image tool: restore the generic image-runtime fallback when no provider-specific media-understanding provider is registered, so image analysis works again for providers like `openrouter` and `minimax-portal`. (#54858) Thanks @MonkeyLeeT.
- Agents/compaction: trigger timeout recovery compaction before retrying high-context LLM timeouts so embedded runs stop repeating oversized requests. (#46417) thanks @joeykrug.
- Microsoft Teams/config: accept the existing `welcomeCard`, `groupWelcomeCard`, `promptStarters`, and feedback/reflection keys in strict config validation so already-supported Teams runtime settings stop failing schema checks. (#54679) Thanks @gumclaw.
- CLI/plugins: make routed commands use the same auto-enabled bundled-channel snapshot as gateway startup, so configured bundled channels like Slack load without requiring a prior config rewrite. (#54809) Thanks @neeravmakwana.

View File

@ -222,45 +222,46 @@ function stubMinimaxFetch(baseResp: { status_code: number; status_msg: string },
}
function stubOpenAiCompletionsOkFetch(text = "ok") {
const fetch = vi.fn().mockResolvedValue(
new Response(
new ReadableStream<Uint8Array>({
start(controller) {
const encoder = new TextEncoder();
const chunks = [
`data: ${JSON.stringify({
id: "chatcmpl-moonshot-test",
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
model: "kimi-k2.5",
choices: [
{
index: 0,
delta: { role: "assistant", content: text },
finish_reason: null,
},
],
})}\n\n`,
`data: ${JSON.stringify({
id: "chatcmpl-moonshot-test",
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
model: "kimi-k2.5",
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
})}\n\n`,
"data: [DONE]\n\n",
];
for (const chunk of chunks) {
controller.enqueue(encoder.encode(chunk));
}
controller.close();
const fetch = vi.fn().mockImplementation(
async () =>
new Response(
new ReadableStream<Uint8Array>({
start(controller) {
const encoder = new TextEncoder();
const chunks = [
`data: ${JSON.stringify({
id: "chatcmpl-moonshot-test",
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
model: "kimi-k2.5",
choices: [
{
index: 0,
delta: { role: "assistant", content: text },
finish_reason: null,
},
],
})}\n\n`,
`data: ${JSON.stringify({
id: "chatcmpl-moonshot-test",
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
model: "kimi-k2.5",
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
})}\n\n`,
"data: [DONE]\n\n",
];
for (const chunk of chunks) {
controller.enqueue(encoder.encode(chunk));
}
controller.close();
},
}),
{
status: 200,
headers: { "content-type": "text/event-stream" },
},
}),
{
status: 200,
headers: { "content-type": "text/event-stream" },
},
),
),
);
global.fetch = withFetchPreconnect(fetch);
return fetch;
@ -705,6 +706,110 @@ describe("image tool implicit imageModel config", () => {
});
});
it("falls back to the generic image runtime when openrouter has no media provider registration", async () => {
await withTempAgentDir(async (agentDir) => {
const fetch = stubOpenAiCompletionsOkFetch("ok openrouter");
const cfg: OpenClawConfig = {
agents: {
defaults: {
model: { primary: "openrouter/google/gemini-2.5-flash-lite" },
imageModel: { primary: "openrouter/google/gemini-2.5-flash-lite" },
},
},
models: {
providers: {
openrouter: {
api: "openai-completions",
baseUrl: "https://openrouter.ai/api/v1",
apiKey: "openrouter-test",
models: [makeModelDefinition("google/gemini-2.5-flash-lite", ["text", "image"])],
},
},
},
};
const tool = requireImageTool(createImageTool({ config: cfg, agentDir }));
const result = await tool.execute("t1", {
prompt: "Describe the image.",
image: `data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
});
expect(fetch).toHaveBeenCalledTimes(1);
expect(result.content).toEqual(
expect.arrayContaining([expect.objectContaining({ type: "text", text: "ok openrouter" })]),
);
});
});
it("falls back to the generic multi-image runtime when openrouter has no media provider registration", async () => {
await withTempAgentDir(async (agentDir) => {
const fetch = stubOpenAiCompletionsOkFetch("ok multi");
const cfg: OpenClawConfig = {
agents: {
defaults: {
model: { primary: "openrouter/google/gemini-2.5-flash-lite" },
imageModel: { primary: "openrouter/google/gemini-2.5-flash-lite" },
},
},
models: {
providers: {
openrouter: {
api: "openai-completions",
baseUrl: "https://openrouter.ai/api/v1",
apiKey: "openrouter-test",
models: [makeModelDefinition("google/gemini-2.5-flash-lite", ["text", "image"])],
},
},
},
};
const tool = requireImageTool(createImageTool({ config: cfg, agentDir }));
const result = await tool.execute("t1", {
prompt: "Describe the images.",
images: [
`data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
`data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
],
});
expect(fetch).toHaveBeenCalledTimes(1);
expect(result.content).toEqual(
expect.arrayContaining([expect.objectContaining({ type: "text", text: "ok multi" })]),
);
});
});
it("falls back to the generic image runtime when minimax-portal has no media provider registration", async () => {
await withTempAgentDir(async (agentDir) => {
installImageUnderstandingProviderStubs();
await writeAuthProfiles(agentDir, {
version: 1,
profiles: {
"minimax-portal:default": {
type: "oauth",
provider: "minimax-portal",
access: "oauth-test",
refresh: "refresh-test",
expires: Date.now() + 60_000,
},
},
});
const fetch = stubMinimaxOkFetch();
const cfg: OpenClawConfig = {
agents: {
defaults: {
model: { primary: "minimax-portal/MiniMax-M2.7" },
imageModel: { primary: "minimax-portal/MiniMax-VL-01" },
},
},
};
const tool = requireImageTool(createImageTool({ config: cfg, agentDir }));
await expectImageToolExecOk(tool, `data:image/png;base64,${ONE_PIXEL_PNG_B64}`);
expect(fetch).toHaveBeenCalledTimes(1);
});
});
it("exposes an Anthropic-safe image schema without union keywords", async () => {
await withMinimaxImageToolFromTempAgentDir(async (tool) => {
const violations = findSchemaUnionKeywords(tool.parameters, "image.parameters");

View File

@ -3,7 +3,11 @@ import type { OpenClawConfig } from "../../config/config.js";
import { getMediaUnderstandingProvider } from "../../media-understanding/provider-registry.js";
import { buildProviderRegistry } from "../../media-understanding/runner.js";
import { loadWebMedia } from "../../media/web-media.js";
import type { MediaUnderstandingProvider } from "../../plugin-sdk/media-understanding.js";
import {
describeImageWithModel,
describeImagesWithModel,
type MediaUnderstandingProvider,
} from "../../plugin-sdk/media-understanding.js";
import { resolveUserPath } from "../../utils.js";
import { isMinimaxVlmProvider } from "../minimax-vlm.js";
import {
@ -164,11 +168,12 @@ async function runImagePrompt(params: {
provider,
providerRegistry as Map<string, MediaUnderstandingProvider>,
);
if (!imageProvider) {
throw new Error(`No media-understanding provider registered for ${provider}`);
}
if (params.images.length > 1 && imageProvider.describeImages) {
const described = await imageProvider.describeImages({
if (
params.images.length > 1 &&
(imageProvider?.describeImages || !imageProvider?.describeImage)
) {
const describeImages = imageProvider?.describeImages ?? describeImagesWithModel;
const described = await describeImages({
images: params.images.map((image, index) => ({
buffer: image.buffer,
fileName: `image-${index + 1}`,
@ -184,12 +189,10 @@ async function runImagePrompt(params: {
});
return { text: described.text, provider, model: described.model ?? modelId };
}
if (!imageProvider.describeImage) {
throw new Error(`Provider does not support image analysis: ${provider}`);
}
const describeImage = imageProvider?.describeImage ?? describeImageWithModel;
if (params.images.length === 1) {
const image = params.images[0];
const described = await imageProvider.describeImage({
const described = await describeImage({
buffer: image.buffer,
fileName: "image-1",
mime: image.mimeType,
@ -206,7 +209,7 @@ async function runImagePrompt(params: {
const parts: string[] = [];
for (const [index, image] of params.images.entries()) {
const described = await imageProvider.describeImage({
const described = await describeImage({
buffer: image.buffer,
fileName: `image-${index + 1}`,
mime: image.mimeType,