This commit is contained in:
Hudson 2026-03-15 22:51:54 +00:00 committed by GitHub
commit 8d5a9249ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 227 additions and 2 deletions

View File

@ -0,0 +1,135 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { withFetchPreconnect } from "../../test-utils/fetch-mock.js";
import { installWebFetchSsrfHarness } from "./web-fetch.test-harness.js";
import { createWebFetchTool } from "./web-tools.js";
vi.mock("./web-fetch-utils.js", async () => {
const actual =
await vi.importActual<typeof import("./web-fetch-utils.js")>("./web-fetch-utils.js");
return {
...actual,
extractReadableContent: vi.fn().mockResolvedValue({ text: "", title: undefined }),
};
});
installWebFetchSsrfHarness();
function requestUrl(input: RequestInfo | URL): string {
if (typeof input === "string") {
return input;
}
if (input instanceof URL) {
return input.toString();
}
if ("url" in input && typeof input.url === "string") {
return input.url;
}
return "";
}
function firecrawlResponse(markdown: string, url: string): Response {
return new Response(
JSON.stringify({
success: true,
data: {
markdown,
metadata: { title: "Firecrawl", sourceURL: url, statusCode: 200 },
},
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
);
}
function htmlResponse(): Response {
return new Response("<!doctype html><html><head></head><body></body></html>", {
status: 200,
headers: { "Content-Type": "text/html; charset=utf-8" },
});
}
function createFetchTool(fetchOverrides: Record<string, unknown> = {}) {
return createWebFetchTool({
config: {
tools: {
web: {
fetch: {
cacheTtlMinutes: 0,
...fetchOverrides,
},
},
},
},
sandboxed: false,
});
}
describe("web_fetch firecrawl base URL fallback", () => {
const priorFirecrawlBaseUrl = process.env.FIRECRAWL_BASE_URL;
afterEach(() => {
if (typeof priorFirecrawlBaseUrl === "string") {
process.env.FIRECRAWL_BASE_URL = priorFirecrawlBaseUrl;
} else {
delete process.env.FIRECRAWL_BASE_URL;
}
});
it("uses FIRECRAWL_BASE_URL env var when firecrawl.baseUrl is unset", async () => {
process.env.FIRECRAWL_BASE_URL = "https://firecrawl-env.example";
const fetchSpy = vi.fn(async (input: RequestInfo | URL) => {
const url = requestUrl(input);
if (url.startsWith("https://firecrawl-env.example/v2/scrape")) {
return firecrawlResponse("from env", url);
}
return htmlResponse();
});
global.fetch = withFetchPreconnect(fetchSpy);
const tool = createFetchTool({
firecrawl: { apiKey: "firecrawl-test" },
});
const result = await tool?.execute?.("call", { url: "https://example.com/from-env" });
const details = result?.details as { extractor?: string; text?: string };
expect(details.extractor).toBe("firecrawl");
expect(details.text).toContain("from env");
expect(
fetchSpy.mock.calls.some(([input]) =>
requestUrl(input).startsWith("https://firecrawl-env.example/v2/scrape"),
),
).toBe(true);
});
it("falls back to DEFAULT_FIRECRAWL_BASE_URL when config and env are unset", async () => {
delete process.env.FIRECRAWL_BASE_URL;
const fetchSpy = vi.fn(async (input: RequestInfo | URL) => {
const url = requestUrl(input);
if (url.startsWith("https://api.firecrawl.dev/v2/scrape")) {
return firecrawlResponse("from default", url);
}
return htmlResponse();
});
global.fetch = withFetchPreconnect(fetchSpy);
const tool = createFetchTool({
firecrawl: { apiKey: "firecrawl-test" },
});
const result = await tool?.execute?.("call", { url: "https://example.com/from-default" });
const details = result?.details as { extractor?: string; text?: string };
expect(details.extractor).toBe("firecrawl");
expect(details.text).toContain("from default");
expect(
fetchSpy.mock.calls.some(([input]) =>
requestUrl(input).startsWith("https://api.firecrawl.dev/v2/scrape"),
),
).toBe(true);
});
});

View File

@ -161,11 +161,13 @@ function resolveFirecrawlEnabled(params: {
}
function resolveFirecrawlBaseUrl(firecrawl?: FirecrawlFetchConfig): string {
const raw =
const fromConfig =
firecrawl && "baseUrl" in firecrawl && typeof firecrawl.baseUrl === "string"
? firecrawl.baseUrl.trim()
: "";
return raw || DEFAULT_FIRECRAWL_BASE_URL;
const fromEnv =
typeof process.env.FIRECRAWL_BASE_URL === "string" ? process.env.FIRECRAWL_BASE_URL.trim() : "";
return fromConfig || fromEnv || DEFAULT_FIRECRAWL_BASE_URL;
}
function resolveFirecrawlOnlyMainContent(firecrawl?: FirecrawlFetchConfig): boolean {

View File

@ -144,8 +144,10 @@ async function captureToolErrorMessage(params: {
describe("web_fetch extraction fallbacks", () => {
const priorFetch = global.fetch;
const priorFirecrawlBaseUrl = process.env.FIRECRAWL_BASE_URL;
beforeEach(() => {
delete process.env.FIRECRAWL_BASE_URL;
vi.spyOn(ssrf, "resolvePinnedHostname").mockImplementation(async (hostname) => {
const normalized = hostname.trim().toLowerCase().replace(/\.$/, "");
const addresses = ["93.184.216.34", "93.184.216.35"];
@ -159,6 +161,11 @@ describe("web_fetch extraction fallbacks", () => {
afterEach(() => {
global.fetch = priorFetch;
if (typeof priorFirecrawlBaseUrl === "string") {
process.env.FIRECRAWL_BASE_URL = priorFirecrawlBaseUrl;
} else {
delete process.env.FIRECRAWL_BASE_URL;
}
vi.unstubAllEnvs();
vi.restoreAllMocks();
});
@ -297,6 +304,66 @@ describe("web_fetch extraction fallbacks", () => {
expect(details.text).toContain("firecrawl content");
});
it("uses FIRECRAWL_BASE_URL env var when firecrawl.baseUrl is unset", async () => {
process.env.FIRECRAWL_BASE_URL = "https://firecrawl-env.example";
const fetchSpy = installMockFetch((input: RequestInfo | URL) => {
const url = resolveRequestUrl(input);
if (url.includes("firecrawl-env.example")) {
return Promise.resolve(
firecrawlResponse("firecrawl env fallback", url),
) as Promise<Response>;
}
return Promise.resolve(
htmlResponse("<!doctype html><html><head></head><body></body></html>", url),
) as Promise<Response>;
});
const tool = createFetchTool({
firecrawl: { apiKey: "firecrawl-test" },
});
const result = await tool?.execute?.("call", { url: "https://example.com/empty-env" });
const details = result?.details as { extractor?: string; text?: string };
expect(details.extractor).toBe("firecrawl");
expect(details.text).toContain("firecrawl env fallback");
expect(
fetchSpy.mock.calls.some(([input]) =>
resolveRequestUrl(input).startsWith("https://firecrawl-env.example/v2/scrape"),
),
).toBe(true);
});
it("falls back to DEFAULT_FIRECRAWL_BASE_URL when config and env are unset", async () => {
delete process.env.FIRECRAWL_BASE_URL;
const fetchSpy = installMockFetch((input: RequestInfo | URL) => {
const url = resolveRequestUrl(input);
if (url.includes("api.firecrawl.dev")) {
return Promise.resolve(
firecrawlResponse("firecrawl default fallback", url),
) as Promise<Response>;
}
return Promise.resolve(
htmlResponse("<!doctype html><html><head></head><body></body></html>", url),
) as Promise<Response>;
});
const tool = createFetchTool({
firecrawl: { apiKey: "firecrawl-test" },
});
const result = await tool?.execute?.("call", { url: "https://example.com/empty-default" });
const details = result?.details as { extractor?: string; text?: string };
expect(details.extractor).toBe("firecrawl");
expect(details.text).toContain("firecrawl default fallback");
expect(
fetchSpy.mock.calls.some(([input]) =>
resolveRequestUrl(input).startsWith("https://api.firecrawl.dev/v2/scrape"),
),
).toBe(true);
});
it("normalizes firecrawl Authorization header values", async () => {
const fetchSpy = installMockFetch((input: RequestInfo | URL) => {
const url = resolveRequestUrl(input);

View File

@ -51,6 +51,27 @@ describe("config schema regressions", () => {
expect(res.ok).toBe(true);
});
it("accepts tools.web.fetch.firecrawl config block", () => {
const res = validateConfigObject({
tools: {
web: {
fetch: {
firecrawl: {
enabled: true,
apiKey: "firecrawl-test",
baseUrl: "https://firecrawl.example",
onlyMainContent: false,
maxAgeMs: 60000,
timeoutSeconds: 45,
},
},
},
},
});
expect(res.ok).toBe(true);
});
it("accepts safe iMessage remoteHost", () => {
const res = validateConfigObject({
channels: {