openclaw/extensions/firecrawl/src/firecrawl-tools.test.ts

427 lines
13 KiB
TypeScript

import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import {
DEFAULT_FIRECRAWL_BASE_URL,
DEFAULT_FIRECRAWL_MAX_AGE_MS,
DEFAULT_FIRECRAWL_SCRAPE_TIMEOUT_SECONDS,
DEFAULT_FIRECRAWL_SEARCH_TIMEOUT_SECONDS,
resolveFirecrawlApiKey,
resolveFirecrawlBaseUrl,
resolveFirecrawlMaxAgeMs,
resolveFirecrawlOnlyMainContent,
resolveFirecrawlScrapeTimeoutSeconds,
resolveFirecrawlSearchConfig,
resolveFirecrawlSearchTimeoutSeconds,
} from "./config.js";
const { runFirecrawlSearch, runFirecrawlScrape } = vi.hoisted(() => ({
runFirecrawlSearch: vi.fn(async (params: Record<string, unknown>) => params),
runFirecrawlScrape: vi.fn(async (params: Record<string, unknown>) => ({
ok: true,
params,
})),
}));
vi.mock("./firecrawl-client.js", () => ({
runFirecrawlSearch,
runFirecrawlScrape,
}));
describe("firecrawl tools", () => {
let createFirecrawlWebSearchProvider: typeof import("./firecrawl-search-provider.js").createFirecrawlWebSearchProvider;
let createFirecrawlSearchTool: typeof import("./firecrawl-search-tool.js").createFirecrawlSearchTool;
let createFirecrawlScrapeTool: typeof import("./firecrawl-scrape-tool.js").createFirecrawlScrapeTool;
let firecrawlClientTesting: typeof import("./firecrawl-client.js").__testing;
beforeAll(async () => {
vi.resetModules();
({ createFirecrawlWebSearchProvider } = await import("./firecrawl-search-provider.js"));
({ createFirecrawlSearchTool } = await import("./firecrawl-search-tool.js"));
({ createFirecrawlScrapeTool } = await import("./firecrawl-scrape-tool.js"));
({ __testing: firecrawlClientTesting } =
await vi.importActual<typeof import("./firecrawl-client.js")>("./firecrawl-client.js"));
});
beforeEach(() => {
runFirecrawlSearch.mockReset();
runFirecrawlSearch.mockImplementation(async (params: Record<string, unknown>) => params);
runFirecrawlScrape.mockReset();
runFirecrawlScrape.mockImplementation(async (params: Record<string, unknown>) => ({
ok: true,
params,
}));
vi.unstubAllEnvs();
});
it("exposes selection metadata and enables the plugin in config", () => {
const provider = createFirecrawlWebSearchProvider();
if (!provider.applySelectionConfig) {
throw new Error("Expected applySelectionConfig to be defined");
}
const applied = provider.applySelectionConfig({});
expect(provider.id).toBe("firecrawl");
expect(provider.credentialPath).toBe("plugins.entries.firecrawl.config.webSearch.apiKey");
expect(applied.plugins?.entries?.firecrawl?.enabled).toBe(true);
});
it("parses scrape payloads into wrapped external-content results", () => {
const result = firecrawlClientTesting.parseFirecrawlScrapePayload({
payload: {
success: true,
data: {
markdown: "# Hello\n\nWorld",
metadata: {
title: "Example page",
sourceURL: "https://example.com/final",
statusCode: 200,
},
},
},
url: "https://example.com/start",
extractMode: "text",
maxChars: 1000,
});
expect(result.finalUrl).toBe("https://example.com/final");
expect(result.status).toBe(200);
expect(result.extractor).toBe("firecrawl");
expect(String(result.text)).toContain("Hello");
expect(String(result.text)).toContain("World");
expect(result.truncated).toBe(false);
});
it("extracts search items from flexible Firecrawl payload shapes", () => {
const items = firecrawlClientTesting.resolveSearchItems({
success: true,
data: [
{
title: "Docs",
url: "https://docs.example.com/path",
description: "Reference docs",
markdown: "Body",
},
],
});
expect(items).toEqual([
{
title: "Docs",
url: "https://docs.example.com/path",
description: "Reference docs",
content: "Body",
published: undefined,
siteName: "docs.example.com",
},
]);
});
it("extracts search items from Firecrawl v2 data.web payloads", () => {
const items = firecrawlClientTesting.resolveSearchItems({
success: true,
data: {
web: [
{
title: "API Platform - OpenAI",
url: "https://openai.com/api/",
description: "Build on the OpenAI API platform.",
markdown: "# API Platform",
position: 1,
},
],
},
});
expect(items).toEqual([
{
title: "API Platform - OpenAI",
url: "https://openai.com/api/",
description: "Build on the OpenAI API platform.",
content: "# API Platform",
published: undefined,
siteName: "openai.com",
},
]);
});
it("maps generic provider args into firecrawl search params", async () => {
const provider = createFirecrawlWebSearchProvider();
const tool = provider.createTool({
config: { test: true },
} as never);
if (!tool) {
throw new Error("Expected tool definition");
}
const result = await tool.execute({
query: "openclaw docs",
count: 4,
});
expect(runFirecrawlSearch).toHaveBeenCalledWith({
cfg: { test: true },
query: "openclaw docs",
count: 4,
});
expect(result).toEqual({
cfg: { test: true },
query: "openclaw docs",
count: 4,
});
});
it("normalizes optional search parameters before invoking Firecrawl", async () => {
runFirecrawlSearch.mockImplementationOnce(async (params: Record<string, unknown>) => ({
ok: true,
params,
}));
const tool = createFirecrawlSearchTool({
config: { env: "test" },
} as never);
const result = await tool.execute("call-1", {
query: "web search",
count: 6,
timeoutSeconds: 12,
sources: ["web", "", "news"],
categories: ["research", ""],
scrapeResults: true,
});
expect(runFirecrawlSearch).toHaveBeenCalledWith({
cfg: { env: "test" },
query: "web search",
count: 6,
timeoutSeconds: 12,
sources: ["web", "news"],
categories: ["research"],
scrapeResults: true,
});
expect(result).toMatchObject({
details: {
ok: true,
params: {
cfg: { env: "test" },
query: "web search",
count: 6,
timeoutSeconds: 12,
sources: ["web", "news"],
categories: ["research"],
scrapeResults: true,
},
},
});
});
it("maps scrape params and defaults extract mode to markdown", async () => {
const tool = createFirecrawlScrapeTool({
config: { env: "test" },
} as never);
const result = await tool.execute("call-1", {
url: "https://docs.openclaw.ai",
maxChars: 1500,
onlyMainContent: false,
maxAgeMs: 5000,
proxy: "stealth",
storeInCache: false,
timeoutSeconds: 22,
});
expect(runFirecrawlScrape).toHaveBeenCalledWith({
cfg: { env: "test" },
url: "https://docs.openclaw.ai",
extractMode: "markdown",
maxChars: 1500,
onlyMainContent: false,
maxAgeMs: 5000,
proxy: "stealth",
storeInCache: false,
timeoutSeconds: 22,
});
expect(result).toMatchObject({
details: {
ok: true,
params: {
cfg: { env: "test" },
url: "https://docs.openclaw.ai",
extractMode: "markdown",
maxChars: 1500,
onlyMainContent: false,
maxAgeMs: 5000,
proxy: "stealth",
storeInCache: false,
timeoutSeconds: 22,
},
},
});
});
it("passes text mode through and ignores invalid proxy values", async () => {
const tool = createFirecrawlScrapeTool({
config: { env: "test" },
} as never);
await tool.execute("call-2", {
url: "https://docs.openclaw.ai",
extractMode: "text",
proxy: "invalid",
});
expect(runFirecrawlScrape).toHaveBeenCalledWith({
cfg: { env: "test" },
url: "https://docs.openclaw.ai",
extractMode: "text",
maxChars: undefined,
onlyMainContent: undefined,
maxAgeMs: undefined,
proxy: undefined,
storeInCache: undefined,
timeoutSeconds: undefined,
});
});
it("prefers plugin webSearch config over legacy tool search config", () => {
const cfg = {
plugins: {
entries: {
firecrawl: {
config: {
webSearch: {
apiKey: "plugin-key",
baseUrl: "https://plugin.firecrawl.test",
},
},
},
},
},
tools: {
web: {
search: {
firecrawl: {
apiKey: "legacy-key",
baseUrl: "https://legacy.firecrawl.test",
},
},
},
},
} as OpenClawConfig;
expect(resolveFirecrawlSearchConfig(cfg)).toEqual({
apiKey: "plugin-key",
baseUrl: "https://plugin.firecrawl.test",
});
expect(resolveFirecrawlApiKey(cfg)).toBe("plugin-key");
expect(resolveFirecrawlBaseUrl(cfg)).toBe("https://plugin.firecrawl.test");
});
it("falls back to environment and defaults for fetch config values", () => {
vi.stubEnv("FIRECRAWL_API_KEY", "env-key");
vi.stubEnv("FIRECRAWL_BASE_URL", "https://env.firecrawl.test");
expect(resolveFirecrawlApiKey()).toBe("env-key");
expect(resolveFirecrawlBaseUrl()).toBe("https://env.firecrawl.test");
expect(resolveFirecrawlOnlyMainContent()).toBe(true);
expect(resolveFirecrawlMaxAgeMs()).toBe(DEFAULT_FIRECRAWL_MAX_AGE_MS);
expect(resolveFirecrawlScrapeTimeoutSeconds()).toBe(DEFAULT_FIRECRAWL_SCRAPE_TIMEOUT_SECONDS);
expect(resolveFirecrawlSearchTimeoutSeconds()).toBe(DEFAULT_FIRECRAWL_SEARCH_TIMEOUT_SECONDS);
expect(resolveFirecrawlBaseUrl({} as OpenClawConfig)).not.toBe(DEFAULT_FIRECRAWL_BASE_URL);
});
it("respects positive numeric overrides for scrape and cache behavior", () => {
const cfg = {
tools: {
web: {
fetch: {
firecrawl: {
onlyMainContent: false,
maxAgeMs: 1234,
timeoutSeconds: 42,
},
},
},
},
} as OpenClawConfig;
expect(resolveFirecrawlOnlyMainContent(cfg)).toBe(false);
expect(resolveFirecrawlMaxAgeMs(cfg)).toBe(1234);
expect(resolveFirecrawlMaxAgeMs(cfg, 77.9)).toBe(77);
expect(resolveFirecrawlScrapeTimeoutSeconds(cfg)).toBe(42);
expect(resolveFirecrawlScrapeTimeoutSeconds(cfg, 19.8)).toBe(19);
expect(resolveFirecrawlSearchTimeoutSeconds(9.7)).toBe(9);
});
it("normalizes mixed search payload shapes into search items", () => {
expect(
firecrawlClientTesting.resolveSearchItems({
data: {
results: [
{
sourceURL: "https://www.example.com/post",
snippet: "Snippet text",
markdown: "# Title\nBody",
metadata: {
title: "Example title",
publishedDate: "2026-03-22",
},
},
{
url: "",
},
],
},
}),
).toEqual([
{
title: "Example title",
url: "https://www.example.com/post",
description: "Snippet text",
content: "# Title\nBody",
published: "2026-03-22",
siteName: "example.com",
},
]);
});
it("parses scrape payloads, extracts text, and marks truncation", () => {
const result = firecrawlClientTesting.parseFirecrawlScrapePayload({
payload: {
data: {
markdown: "# Hello\n\nThis is a long body for scraping.",
metadata: {
title: "Example page",
sourceURL: "https://docs.example.com/page",
statusCode: 200,
},
},
warning: "cached result",
},
url: "https://docs.example.com/page",
extractMode: "text",
maxChars: 12,
});
expect(result.finalUrl).toBe("https://docs.example.com/page");
expect(result.status).toBe(200);
expect(result.extractMode).toBe("text");
expect(result.truncated).toBe(true);
expect(result.rawLength).toBeGreaterThan(12);
expect(String(result.text)).toContain("Hello");
expect(String(result.title)).toContain("Example page");
expect(String(result.warning)).toContain("cached result");
});
it("throws when scrape payload has no usable content", () => {
expect(() =>
firecrawlClientTesting.parseFirecrawlScrapePayload({
payload: {
data: {},
},
url: "https://docs.example.com/page",
extractMode: "markdown",
maxChars: 100,
}),
).toThrow("Firecrawl scrape returned no content.");
});
});