mirror of https://github.com/openclaw/openclaw.git
fix(tools): restore Firecrawl config schema and add FIRECRAWL_BASE_URL env fallback
This commit is contained in:
parent
61d171ab0b
commit
25245b8139
|
|
@ -0,0 +1,135 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { withFetchPreconnect } from "../../test-utils/fetch-mock.js";
|
||||
import { installWebFetchSsrfHarness } from "./web-fetch.test-harness.js";
|
||||
import { createWebFetchTool } from "./web-tools.js";
|
||||
|
||||
vi.mock("./web-fetch-utils.js", async () => {
|
||||
const actual =
|
||||
await vi.importActual<typeof import("./web-fetch-utils.js")>("./web-fetch-utils.js");
|
||||
return {
|
||||
...actual,
|
||||
extractReadableContent: vi.fn().mockResolvedValue({ text: "", title: undefined }),
|
||||
};
|
||||
});
|
||||
|
||||
installWebFetchSsrfHarness();
|
||||
|
||||
function requestUrl(input: RequestInfo | URL): string {
|
||||
if (typeof input === "string") {
|
||||
return input;
|
||||
}
|
||||
if (input instanceof URL) {
|
||||
return input.toString();
|
||||
}
|
||||
if ("url" in input && typeof input.url === "string") {
|
||||
return input.url;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function firecrawlResponse(markdown: string, url: string): Response {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
success: true,
|
||||
data: {
|
||||
markdown,
|
||||
metadata: { title: "Firecrawl", sourceURL: url, statusCode: 200 },
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function htmlResponse(): Response {
|
||||
return new Response("<!doctype html><html><head></head><body></body></html>", {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "text/html; charset=utf-8" },
|
||||
});
|
||||
}
|
||||
|
||||
function createFetchTool(fetchOverrides: Record<string, unknown> = {}) {
|
||||
return createWebFetchTool({
|
||||
config: {
|
||||
tools: {
|
||||
web: {
|
||||
fetch: {
|
||||
cacheTtlMinutes: 0,
|
||||
...fetchOverrides,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
sandboxed: false,
|
||||
});
|
||||
}
|
||||
|
||||
describe("web_fetch firecrawl base URL fallback", () => {
|
||||
const priorFirecrawlBaseUrl = process.env.FIRECRAWL_BASE_URL;
|
||||
|
||||
afterEach(() => {
|
||||
if (typeof priorFirecrawlBaseUrl === "string") {
|
||||
process.env.FIRECRAWL_BASE_URL = priorFirecrawlBaseUrl;
|
||||
} else {
|
||||
delete process.env.FIRECRAWL_BASE_URL;
|
||||
}
|
||||
});
|
||||
|
||||
it("uses FIRECRAWL_BASE_URL env var when firecrawl.baseUrl is unset", async () => {
|
||||
process.env.FIRECRAWL_BASE_URL = "https://firecrawl-env.example";
|
||||
|
||||
const fetchSpy = vi.fn(async (input: RequestInfo | URL) => {
|
||||
const url = requestUrl(input);
|
||||
if (url.startsWith("https://firecrawl-env.example/v2/scrape")) {
|
||||
return firecrawlResponse("from env", url);
|
||||
}
|
||||
return htmlResponse();
|
||||
});
|
||||
global.fetch = withFetchPreconnect(fetchSpy);
|
||||
|
||||
const tool = createFetchTool({
|
||||
firecrawl: { apiKey: "firecrawl-test" },
|
||||
});
|
||||
|
||||
const result = await tool?.execute?.("call", { url: "https://example.com/from-env" });
|
||||
const details = result?.details as { extractor?: string; text?: string };
|
||||
|
||||
expect(details.extractor).toBe("firecrawl");
|
||||
expect(details.text).toContain("from env");
|
||||
expect(
|
||||
fetchSpy.mock.calls.some(([input]) =>
|
||||
requestUrl(input).startsWith("https://firecrawl-env.example/v2/scrape"),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("falls back to DEFAULT_FIRECRAWL_BASE_URL when config and env are unset", async () => {
|
||||
delete process.env.FIRECRAWL_BASE_URL;
|
||||
|
||||
const fetchSpy = vi.fn(async (input: RequestInfo | URL) => {
|
||||
const url = requestUrl(input);
|
||||
if (url.startsWith("https://api.firecrawl.dev/v2/scrape")) {
|
||||
return firecrawlResponse("from default", url);
|
||||
}
|
||||
return htmlResponse();
|
||||
});
|
||||
global.fetch = withFetchPreconnect(fetchSpy);
|
||||
|
||||
const tool = createFetchTool({
|
||||
firecrawl: { apiKey: "firecrawl-test" },
|
||||
});
|
||||
|
||||
const result = await tool?.execute?.("call", { url: "https://example.com/from-default" });
|
||||
const details = result?.details as { extractor?: string; text?: string };
|
||||
|
||||
expect(details.extractor).toBe("firecrawl");
|
||||
expect(details.text).toContain("from default");
|
||||
expect(
|
||||
fetchSpy.mock.calls.some(([input]) =>
|
||||
requestUrl(input).startsWith("https://api.firecrawl.dev/v2/scrape"),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -161,11 +161,13 @@ function resolveFirecrawlEnabled(params: {
|
|||
}
|
||||
|
||||
function resolveFirecrawlBaseUrl(firecrawl?: FirecrawlFetchConfig): string {
|
||||
const raw =
|
||||
const fromConfig =
|
||||
firecrawl && "baseUrl" in firecrawl && typeof firecrawl.baseUrl === "string"
|
||||
? firecrawl.baseUrl.trim()
|
||||
: "";
|
||||
return raw || DEFAULT_FIRECRAWL_BASE_URL;
|
||||
const fromEnv =
|
||||
typeof process.env.FIRECRAWL_BASE_URL === "string" ? process.env.FIRECRAWL_BASE_URL.trim() : "";
|
||||
return fromConfig || fromEnv || DEFAULT_FIRECRAWL_BASE_URL;
|
||||
}
|
||||
|
||||
function resolveFirecrawlOnlyMainContent(firecrawl?: FirecrawlFetchConfig): boolean {
|
||||
|
|
|
|||
|
|
@ -144,8 +144,10 @@ async function captureToolErrorMessage(params: {
|
|||
|
||||
describe("web_fetch extraction fallbacks", () => {
|
||||
const priorFetch = global.fetch;
|
||||
const priorFirecrawlBaseUrl = process.env.FIRECRAWL_BASE_URL;
|
||||
|
||||
beforeEach(() => {
|
||||
delete process.env.FIRECRAWL_BASE_URL;
|
||||
vi.spyOn(ssrf, "resolvePinnedHostname").mockImplementation(async (hostname) => {
|
||||
const normalized = hostname.trim().toLowerCase().replace(/\.$/, "");
|
||||
const addresses = ["93.184.216.34", "93.184.216.35"];
|
||||
|
|
@ -159,6 +161,11 @@ describe("web_fetch extraction fallbacks", () => {
|
|||
|
||||
afterEach(() => {
|
||||
global.fetch = priorFetch;
|
||||
if (typeof priorFirecrawlBaseUrl === "string") {
|
||||
process.env.FIRECRAWL_BASE_URL = priorFirecrawlBaseUrl;
|
||||
} else {
|
||||
delete process.env.FIRECRAWL_BASE_URL;
|
||||
}
|
||||
vi.unstubAllEnvs();
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
|
@ -297,6 +304,66 @@ describe("web_fetch extraction fallbacks", () => {
|
|||
expect(details.text).toContain("firecrawl content");
|
||||
});
|
||||
|
||||
it("uses FIRECRAWL_BASE_URL env var when firecrawl.baseUrl is unset", async () => {
|
||||
process.env.FIRECRAWL_BASE_URL = "https://firecrawl-env.example";
|
||||
const fetchSpy = installMockFetch((input: RequestInfo | URL) => {
|
||||
const url = resolveRequestUrl(input);
|
||||
if (url.includes("firecrawl-env.example")) {
|
||||
return Promise.resolve(
|
||||
firecrawlResponse("firecrawl env fallback", url),
|
||||
) as Promise<Response>;
|
||||
}
|
||||
return Promise.resolve(
|
||||
htmlResponse("<!doctype html><html><head></head><body></body></html>", url),
|
||||
) as Promise<Response>;
|
||||
});
|
||||
|
||||
const tool = createFetchTool({
|
||||
firecrawl: { apiKey: "firecrawl-test" },
|
||||
});
|
||||
|
||||
const result = await tool?.execute?.("call", { url: "https://example.com/empty-env" });
|
||||
const details = result?.details as { extractor?: string; text?: string };
|
||||
|
||||
expect(details.extractor).toBe("firecrawl");
|
||||
expect(details.text).toContain("firecrawl env fallback");
|
||||
expect(
|
||||
fetchSpy.mock.calls.some(([input]) =>
|
||||
resolveRequestUrl(input).startsWith("https://firecrawl-env.example/v2/scrape"),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("falls back to DEFAULT_FIRECRAWL_BASE_URL when config and env are unset", async () => {
|
||||
delete process.env.FIRECRAWL_BASE_URL;
|
||||
const fetchSpy = installMockFetch((input: RequestInfo | URL) => {
|
||||
const url = resolveRequestUrl(input);
|
||||
if (url.includes("api.firecrawl.dev")) {
|
||||
return Promise.resolve(
|
||||
firecrawlResponse("firecrawl default fallback", url),
|
||||
) as Promise<Response>;
|
||||
}
|
||||
return Promise.resolve(
|
||||
htmlResponse("<!doctype html><html><head></head><body></body></html>", url),
|
||||
) as Promise<Response>;
|
||||
});
|
||||
|
||||
const tool = createFetchTool({
|
||||
firecrawl: { apiKey: "firecrawl-test" },
|
||||
});
|
||||
|
||||
const result = await tool?.execute?.("call", { url: "https://example.com/empty-default" });
|
||||
const details = result?.details as { extractor?: string; text?: string };
|
||||
|
||||
expect(details.extractor).toBe("firecrawl");
|
||||
expect(details.text).toContain("firecrawl default fallback");
|
||||
expect(
|
||||
fetchSpy.mock.calls.some(([input]) =>
|
||||
resolveRequestUrl(input).startsWith("https://api.firecrawl.dev/v2/scrape"),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("normalizes firecrawl Authorization header values", async () => {
|
||||
const fetchSpy = installMockFetch((input: RequestInfo | URL) => {
|
||||
const url = resolveRequestUrl(input);
|
||||
|
|
|
|||
|
|
@ -51,6 +51,27 @@ describe("config schema regressions", () => {
|
|||
expect(res.ok).toBe(true);
|
||||
});
|
||||
|
||||
it("accepts tools.web.fetch.firecrawl config block", () => {
|
||||
const res = validateConfigObject({
|
||||
tools: {
|
||||
web: {
|
||||
fetch: {
|
||||
firecrawl: {
|
||||
enabled: true,
|
||||
apiKey: "firecrawl-test",
|
||||
baseUrl: "https://firecrawl.example",
|
||||
onlyMainContent: false,
|
||||
maxAgeMs: 60000,
|
||||
timeoutSeconds: 45,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(res.ok).toBe(true);
|
||||
});
|
||||
|
||||
it("accepts safe iMessage remoteHost", () => {
|
||||
const res = validateConfigObject({
|
||||
channels: {
|
||||
|
|
|
|||
Loading…
Reference in New Issue