From 25245b81393e6dd19f4ef2bd7611207adbdaaf2c Mon Sep 17 00:00:00 2001 From: Hudson <258693705+hudson-rivera@users.noreply.github.com> Date: Sun, 15 Mar 2026 18:17:31 -0400 Subject: [PATCH] fix(tools): restore Firecrawl config schema and add FIRECRAWL_BASE_URL env fallback --- src/agents/tools/web-fetch.base-url.test.ts | 135 +++++++++++++++++++ src/agents/tools/web-fetch.ts | 6 +- src/agents/tools/web-tools.fetch.test.ts | 67 +++++++++ src/config/config.schema-regressions.test.ts | 21 +++ 4 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 src/agents/tools/web-fetch.base-url.test.ts diff --git a/src/agents/tools/web-fetch.base-url.test.ts b/src/agents/tools/web-fetch.base-url.test.ts new file mode 100644 index 00000000000..c7b6702aefe --- /dev/null +++ b/src/agents/tools/web-fetch.base-url.test.ts @@ -0,0 +1,135 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { withFetchPreconnect } from "../../test-utils/fetch-mock.js"; +import { installWebFetchSsrfHarness } from "./web-fetch.test-harness.js"; +import { createWebFetchTool } from "./web-tools.js"; + +vi.mock("./web-fetch-utils.js", async () => { + const actual = + await vi.importActual("./web-fetch-utils.js"); + return { + ...actual, + extractReadableContent: vi.fn().mockResolvedValue({ text: "", title: undefined }), + }; +}); + +installWebFetchSsrfHarness(); + +function requestUrl(input: RequestInfo | URL): string { + if (typeof input === "string") { + return input; + } + if (input instanceof URL) { + return input.toString(); + } + if ("url" in input && typeof input.url === "string") { + return input.url; + } + return ""; +} + +function firecrawlResponse(markdown: string, url: string): Response { + return new Response( + JSON.stringify({ + success: true, + data: { + markdown, + metadata: { title: "Firecrawl", sourceURL: url, statusCode: 200 }, + }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ); +} + +function htmlResponse(): Response { + return new Response("", { + status: 200, + headers: { "Content-Type": "text/html; charset=utf-8" }, + }); +} + +function createFetchTool(fetchOverrides: Record = {}) { + return createWebFetchTool({ + config: { + tools: { + web: { + fetch: { + cacheTtlMinutes: 0, + ...fetchOverrides, + }, + }, + }, + }, + sandboxed: false, + }); +} + +describe("web_fetch firecrawl base URL fallback", () => { + const priorFirecrawlBaseUrl = process.env.FIRECRAWL_BASE_URL; + + afterEach(() => { + if (typeof priorFirecrawlBaseUrl === "string") { + process.env.FIRECRAWL_BASE_URL = priorFirecrawlBaseUrl; + } else { + delete process.env.FIRECRAWL_BASE_URL; + } + }); + + it("uses FIRECRAWL_BASE_URL env var when firecrawl.baseUrl is unset", async () => { + process.env.FIRECRAWL_BASE_URL = "https://firecrawl-env.example"; + + const fetchSpy = vi.fn(async (input: RequestInfo | URL) => { + const url = requestUrl(input); + if (url.startsWith("https://firecrawl-env.example/v2/scrape")) { + return firecrawlResponse("from env", url); + } + return htmlResponse(); + }); + global.fetch = withFetchPreconnect(fetchSpy); + + const tool = createFetchTool({ + firecrawl: { apiKey: "firecrawl-test" }, + }); + + const result = await tool?.execute?.("call", { url: "https://example.com/from-env" }); + const details = result?.details as { extractor?: string; text?: string }; + + expect(details.extractor).toBe("firecrawl"); + expect(details.text).toContain("from env"); + expect( + fetchSpy.mock.calls.some(([input]) => + requestUrl(input).startsWith("https://firecrawl-env.example/v2/scrape"), + ), + ).toBe(true); + }); + + it("falls back to DEFAULT_FIRECRAWL_BASE_URL when config and env are unset", async () => { + delete process.env.FIRECRAWL_BASE_URL; + + const fetchSpy = vi.fn(async (input: RequestInfo | URL) => { + const url = requestUrl(input); + if (url.startsWith("https://api.firecrawl.dev/v2/scrape")) { + return firecrawlResponse("from default", url); + } + return htmlResponse(); + }); + global.fetch = withFetchPreconnect(fetchSpy); + + const tool = createFetchTool({ + firecrawl: { apiKey: "firecrawl-test" }, + }); + + const result = await tool?.execute?.("call", { url: "https://example.com/from-default" }); + const details = result?.details as { extractor?: string; text?: string }; + + expect(details.extractor).toBe("firecrawl"); + expect(details.text).toContain("from default"); + expect( + fetchSpy.mock.calls.some(([input]) => + requestUrl(input).startsWith("https://api.firecrawl.dev/v2/scrape"), + ), + ).toBe(true); + }); +}); diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts index f4cc88e2d83..b754999ac1c 100644 --- a/src/agents/tools/web-fetch.ts +++ b/src/agents/tools/web-fetch.ts @@ -161,11 +161,13 @@ function resolveFirecrawlEnabled(params: { } function resolveFirecrawlBaseUrl(firecrawl?: FirecrawlFetchConfig): string { - const raw = + const fromConfig = firecrawl && "baseUrl" in firecrawl && typeof firecrawl.baseUrl === "string" ? firecrawl.baseUrl.trim() : ""; - return raw || DEFAULT_FIRECRAWL_BASE_URL; + const fromEnv = + typeof process.env.FIRECRAWL_BASE_URL === "string" ? process.env.FIRECRAWL_BASE_URL.trim() : ""; + return fromConfig || fromEnv || DEFAULT_FIRECRAWL_BASE_URL; } function resolveFirecrawlOnlyMainContent(firecrawl?: FirecrawlFetchConfig): boolean { diff --git a/src/agents/tools/web-tools.fetch.test.ts b/src/agents/tools/web-tools.fetch.test.ts index e9bfabbee7a..08d2e193fb3 100644 --- a/src/agents/tools/web-tools.fetch.test.ts +++ b/src/agents/tools/web-tools.fetch.test.ts @@ -144,8 +144,10 @@ async function captureToolErrorMessage(params: { describe("web_fetch extraction fallbacks", () => { const priorFetch = global.fetch; + const priorFirecrawlBaseUrl = process.env.FIRECRAWL_BASE_URL; beforeEach(() => { + delete process.env.FIRECRAWL_BASE_URL; vi.spyOn(ssrf, "resolvePinnedHostname").mockImplementation(async (hostname) => { const normalized = hostname.trim().toLowerCase().replace(/\.$/, ""); const addresses = ["93.184.216.34", "93.184.216.35"]; @@ -159,6 +161,11 @@ describe("web_fetch extraction fallbacks", () => { afterEach(() => { global.fetch = priorFetch; + if (typeof priorFirecrawlBaseUrl === "string") { + process.env.FIRECRAWL_BASE_URL = priorFirecrawlBaseUrl; + } else { + delete process.env.FIRECRAWL_BASE_URL; + } vi.unstubAllEnvs(); vi.restoreAllMocks(); }); @@ -297,6 +304,66 @@ describe("web_fetch extraction fallbacks", () => { expect(details.text).toContain("firecrawl content"); }); + it("uses FIRECRAWL_BASE_URL env var when firecrawl.baseUrl is unset", async () => { + process.env.FIRECRAWL_BASE_URL = "https://firecrawl-env.example"; + const fetchSpy = installMockFetch((input: RequestInfo | URL) => { + const url = resolveRequestUrl(input); + if (url.includes("firecrawl-env.example")) { + return Promise.resolve( + firecrawlResponse("firecrawl env fallback", url), + ) as Promise; + } + return Promise.resolve( + htmlResponse("", url), + ) as Promise; + }); + + const tool = createFetchTool({ + firecrawl: { apiKey: "firecrawl-test" }, + }); + + const result = await tool?.execute?.("call", { url: "https://example.com/empty-env" }); + const details = result?.details as { extractor?: string; text?: string }; + + expect(details.extractor).toBe("firecrawl"); + expect(details.text).toContain("firecrawl env fallback"); + expect( + fetchSpy.mock.calls.some(([input]) => + resolveRequestUrl(input).startsWith("https://firecrawl-env.example/v2/scrape"), + ), + ).toBe(true); + }); + + it("falls back to DEFAULT_FIRECRAWL_BASE_URL when config and env are unset", async () => { + delete process.env.FIRECRAWL_BASE_URL; + const fetchSpy = installMockFetch((input: RequestInfo | URL) => { + const url = resolveRequestUrl(input); + if (url.includes("api.firecrawl.dev")) { + return Promise.resolve( + firecrawlResponse("firecrawl default fallback", url), + ) as Promise; + } + return Promise.resolve( + htmlResponse("", url), + ) as Promise; + }); + + const tool = createFetchTool({ + firecrawl: { apiKey: "firecrawl-test" }, + }); + + const result = await tool?.execute?.("call", { url: "https://example.com/empty-default" }); + const details = result?.details as { extractor?: string; text?: string }; + + expect(details.extractor).toBe("firecrawl"); + expect(details.text).toContain("firecrawl default fallback"); + expect( + fetchSpy.mock.calls.some(([input]) => + resolveRequestUrl(input).startsWith("https://api.firecrawl.dev/v2/scrape"), + ), + ).toBe(true); + }); + it("normalizes firecrawl Authorization header values", async () => { const fetchSpy = installMockFetch((input: RequestInfo | URL) => { const url = resolveRequestUrl(input); diff --git a/src/config/config.schema-regressions.test.ts b/src/config/config.schema-regressions.test.ts index 7a6053fd01c..09a504ab913 100644 --- a/src/config/config.schema-regressions.test.ts +++ b/src/config/config.schema-regressions.test.ts @@ -51,6 +51,27 @@ describe("config schema regressions", () => { expect(res.ok).toBe(true); }); + it("accepts tools.web.fetch.firecrawl config block", () => { + const res = validateConfigObject({ + tools: { + web: { + fetch: { + firecrawl: { + enabled: true, + apiKey: "firecrawl-test", + baseUrl: "https://firecrawl.example", + onlyMainContent: false, + maxAgeMs: 60000, + timeoutSeconds: 45, + }, + }, + }, + }, + }); + + expect(res.ok).toBe(true); + }); + it("accepts safe iMessage remoteHost", () => { const res = validateConfigObject({ channels: {