mirror of https://github.com/openclaw/openclaw.git
fix: restore image-tool generic provider fallback (#54858) (thanks @MonkeyLeeT)
* Image tool: restore generic provider fallback * Image tool: cover multi-image generic fallback * test: tighten minimax-portal image fallback coverage * fix: restore image-tool generic provider fallback (#54858) (thanks @MonkeyLeeT) --------- Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
parent
8efc6e001e
commit
76ff0d9298
|
|
@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai
|
|||
- Telegram/pairing: ignore self-authored DM `message` updates so bot-pinned status cards and similar service updates do not trigger bogus pairing requests or re-enter inbound dispatch. (#54530) thanks @huntharo
|
||||
- iMessage: stop leaking inline `[[reply_to:...]]` tags into delivered text by sending `reply_to` as RPC metadata and stripping stray directive tags from outbound messages. (#39512) Thanks @mvanhorn.
|
||||
- Agents/embedded replies: surface mid-turn 429 and overload failures when embedded runs end without a user-visible reply, while preserving successful media-only replies that still use legacy `mediaUrl`. (#50930) Thanks @infichen.
|
||||
- Agents/image tool: restore the generic image-runtime fallback when no provider-specific media-understanding provider is registered, so image analysis works again for providers like `openrouter` and `minimax-portal`. (#54858) Thanks @MonkeyLeeT.
|
||||
- Agents/compaction: trigger timeout recovery compaction before retrying high-context LLM timeouts so embedded runs stop repeating oversized requests. (#46417) thanks @joeykrug.
|
||||
- Microsoft Teams/config: accept the existing `welcomeCard`, `groupWelcomeCard`, `promptStarters`, and feedback/reflection keys in strict config validation so already-supported Teams runtime settings stop failing schema checks. (#54679) Thanks @gumclaw.
|
||||
- CLI/plugins: make routed commands use the same auto-enabled bundled-channel snapshot as gateway startup, so configured bundled channels like Slack load without requiring a prior config rewrite. (#54809) Thanks @neeravmakwana.
|
||||
|
|
|
|||
|
|
@ -222,45 +222,46 @@ function stubMinimaxFetch(baseResp: { status_code: number; status_msg: string },
|
|||
}
|
||||
|
||||
function stubOpenAiCompletionsOkFetch(text = "ok") {
|
||||
const fetch = vi.fn().mockResolvedValue(
|
||||
new Response(
|
||||
new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
const encoder = new TextEncoder();
|
||||
const chunks = [
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl-moonshot-test",
|
||||
object: "chat.completion.chunk",
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: "kimi-k2.5",
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: { role: "assistant", content: text },
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
})}\n\n`,
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl-moonshot-test",
|
||||
object: "chat.completion.chunk",
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: "kimi-k2.5",
|
||||
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
||||
})}\n\n`,
|
||||
"data: [DONE]\n\n",
|
||||
];
|
||||
for (const chunk of chunks) {
|
||||
controller.enqueue(encoder.encode(chunk));
|
||||
}
|
||||
controller.close();
|
||||
const fetch = vi.fn().mockImplementation(
|
||||
async () =>
|
||||
new Response(
|
||||
new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
const encoder = new TextEncoder();
|
||||
const chunks = [
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl-moonshot-test",
|
||||
object: "chat.completion.chunk",
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: "kimi-k2.5",
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: { role: "assistant", content: text },
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
})}\n\n`,
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl-moonshot-test",
|
||||
object: "chat.completion.chunk",
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
model: "kimi-k2.5",
|
||||
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
||||
})}\n\n`,
|
||||
"data: [DONE]\n\n",
|
||||
];
|
||||
for (const chunk of chunks) {
|
||||
controller.enqueue(encoder.encode(chunk));
|
||||
}
|
||||
controller.close();
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "content-type": "text/event-stream" },
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "content-type": "text/event-stream" },
|
||||
},
|
||||
),
|
||||
),
|
||||
);
|
||||
global.fetch = withFetchPreconnect(fetch);
|
||||
return fetch;
|
||||
|
|
@ -705,6 +706,110 @@ describe("image tool implicit imageModel config", () => {
|
|||
});
|
||||
});
|
||||
|
||||
it("falls back to the generic image runtime when openrouter has no media provider registration", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const fetch = stubOpenAiCompletionsOkFetch("ok openrouter");
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openrouter/google/gemini-2.5-flash-lite" },
|
||||
imageModel: { primary: "openrouter/google/gemini-2.5-flash-lite" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
openrouter: {
|
||||
api: "openai-completions",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
apiKey: "openrouter-test",
|
||||
models: [makeModelDefinition("google/gemini-2.5-flash-lite", ["text", "image"])],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const tool = requireImageTool(createImageTool({ config: cfg, agentDir }));
|
||||
const result = await tool.execute("t1", {
|
||||
prompt: "Describe the image.",
|
||||
image: `data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
|
||||
});
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
expect(result.content).toEqual(
|
||||
expect.arrayContaining([expect.objectContaining({ type: "text", text: "ok openrouter" })]),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to the generic multi-image runtime when openrouter has no media provider registration", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
const fetch = stubOpenAiCompletionsOkFetch("ok multi");
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openrouter/google/gemini-2.5-flash-lite" },
|
||||
imageModel: { primary: "openrouter/google/gemini-2.5-flash-lite" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
openrouter: {
|
||||
api: "openai-completions",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
apiKey: "openrouter-test",
|
||||
models: [makeModelDefinition("google/gemini-2.5-flash-lite", ["text", "image"])],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const tool = requireImageTool(createImageTool({ config: cfg, agentDir }));
|
||||
const result = await tool.execute("t1", {
|
||||
prompt: "Describe the images.",
|
||||
images: [
|
||||
`data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
|
||||
`data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
|
||||
],
|
||||
});
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
expect(result.content).toEqual(
|
||||
expect.arrayContaining([expect.objectContaining({ type: "text", text: "ok multi" })]),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to the generic image runtime when minimax-portal has no media provider registration", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
installImageUnderstandingProviderStubs();
|
||||
await writeAuthProfiles(agentDir, {
|
||||
version: 1,
|
||||
profiles: {
|
||||
"minimax-portal:default": {
|
||||
type: "oauth",
|
||||
provider: "minimax-portal",
|
||||
access: "oauth-test",
|
||||
refresh: "refresh-test",
|
||||
expires: Date.now() + 60_000,
|
||||
},
|
||||
},
|
||||
});
|
||||
const fetch = stubMinimaxOkFetch();
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "minimax-portal/MiniMax-M2.7" },
|
||||
imageModel: { primary: "minimax-portal/MiniMax-VL-01" },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const tool = requireImageTool(createImageTool({ config: cfg, agentDir }));
|
||||
await expectImageToolExecOk(tool, `data:image/png;base64,${ONE_PIXEL_PNG_B64}`);
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
it("exposes an Anthropic-safe image schema without union keywords", async () => {
|
||||
await withMinimaxImageToolFromTempAgentDir(async (tool) => {
|
||||
const violations = findSchemaUnionKeywords(tool.parameters, "image.parameters");
|
||||
|
|
|
|||
|
|
@ -3,7 +3,11 @@ import type { OpenClawConfig } from "../../config/config.js";
|
|||
import { getMediaUnderstandingProvider } from "../../media-understanding/provider-registry.js";
|
||||
import { buildProviderRegistry } from "../../media-understanding/runner.js";
|
||||
import { loadWebMedia } from "../../media/web-media.js";
|
||||
import type { MediaUnderstandingProvider } from "../../plugin-sdk/media-understanding.js";
|
||||
import {
|
||||
describeImageWithModel,
|
||||
describeImagesWithModel,
|
||||
type MediaUnderstandingProvider,
|
||||
} from "../../plugin-sdk/media-understanding.js";
|
||||
import { resolveUserPath } from "../../utils.js";
|
||||
import { isMinimaxVlmProvider } from "../minimax-vlm.js";
|
||||
import {
|
||||
|
|
@ -164,11 +168,12 @@ async function runImagePrompt(params: {
|
|||
provider,
|
||||
providerRegistry as Map<string, MediaUnderstandingProvider>,
|
||||
);
|
||||
if (!imageProvider) {
|
||||
throw new Error(`No media-understanding provider registered for ${provider}`);
|
||||
}
|
||||
if (params.images.length > 1 && imageProvider.describeImages) {
|
||||
const described = await imageProvider.describeImages({
|
||||
if (
|
||||
params.images.length > 1 &&
|
||||
(imageProvider?.describeImages || !imageProvider?.describeImage)
|
||||
) {
|
||||
const describeImages = imageProvider?.describeImages ?? describeImagesWithModel;
|
||||
const described = await describeImages({
|
||||
images: params.images.map((image, index) => ({
|
||||
buffer: image.buffer,
|
||||
fileName: `image-${index + 1}`,
|
||||
|
|
@ -184,12 +189,10 @@ async function runImagePrompt(params: {
|
|||
});
|
||||
return { text: described.text, provider, model: described.model ?? modelId };
|
||||
}
|
||||
if (!imageProvider.describeImage) {
|
||||
throw new Error(`Provider does not support image analysis: ${provider}`);
|
||||
}
|
||||
const describeImage = imageProvider?.describeImage ?? describeImageWithModel;
|
||||
if (params.images.length === 1) {
|
||||
const image = params.images[0];
|
||||
const described = await imageProvider.describeImage({
|
||||
const described = await describeImage({
|
||||
buffer: image.buffer,
|
||||
fileName: "image-1",
|
||||
mime: image.mimeType,
|
||||
|
|
@ -206,7 +209,7 @@ async function runImagePrompt(params: {
|
|||
|
||||
const parts: string[] = [];
|
||||
for (const [index, image] of params.images.entries()) {
|
||||
const described = await imageProvider.describeImage({
|
||||
const described = await describeImage({
|
||||
buffer: image.buffer,
|
||||
fileName: `image-${index + 1}`,
|
||||
mime: image.mimeType,
|
||||
|
|
|
|||
Loading…
Reference in New Issue