From bb3e5654870baeec102142a3260dcc9945c14ae3 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sun, 22 Mar 2026 15:00:56 -0700 Subject: [PATCH] docs(tools): restructure web tools IA and rewrite web.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Navigation restructure: - "Browser" group -> "Web Browser" - New "Web Tools" group containing Web Fetch, Web Search, and all 7 search provider sub-pages - Other tools (btw, diffs, etc.) stay at top level New page: - tools/web-fetch.md: dedicated web_fetch reference with Steps, config, Firecrawl fallback, limits Rewritten page: - tools/web.md: now "Web Search" -- focused search overview with Steps quick-start, CardGroup provider picker, Tabs for key storage, provider comparison table, auto-detection, parameters, examples. Removed all inline provider setup (lives in sub-pages) and web_fetch content (now in dedicated page). Final sidebar: Tools ├── Web Browser (browser, login, troubleshooting) ├── Web Tools │ ├── Web Fetch │ ├── Web Search │ └── Brave / Firecrawl / Gemini / Grok / Kimi / Perplexity / Tavily ├── btw, diffs, exec, ... --- docs/docs.json | 35 +-- docs/tools/web-fetch.md | 135 +++++++++++ docs/tools/web.md | 507 +++++++++++----------------------------- 3 files changed, 290 insertions(+), 387 deletions(-) create mode 100644 docs/tools/web-fetch.md diff --git a/docs/docs.json b/docs/docs.json index 7d3628401b0..422c0842e74 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -84,6 +84,10 @@ "source": "/perplexity", "destination": "/tools/perplexity-search" }, + { + "source": "/web-fetch", + "destination": "/tools/web-fetch" + }, { "source": "/gemini-search", "destination": "/tools/gemini-search" @@ -1104,7 +1108,7 @@ "pages": [ "tools/apply-patch", { - "group": "Browser", + "group": "Web Browser", "pages": [ "tools/browser", "tools/browser-login", @@ -1112,6 +1116,20 @@ "tools/browser-wsl2-windows-remote-cdp-troubleshooting" ] }, + { + "group": "Web Tools", + "pages": [ + "tools/web-fetch", + "tools/web", + "tools/brave-search", + "tools/firecrawl", + "tools/gemini-search", + "tools/grok-search", + "tools/kimi-search", + "tools/perplexity-search", + "tools/tavily" + ] + }, "tools/btw", "tools/diffs", "tools/elevated", @@ -1122,20 +1140,7 @@ "tools/loop-detection", "tools/pdf", "tools/reactions", - "tools/thinking", - "tools/web", - { - "group": "Web Search", - "pages": [ - "tools/brave-search", - "tools/firecrawl", - "tools/gemini-search", - "tools/grok-search", - "tools/kimi-search", - "tools/perplexity-search", - "tools/tavily" - ] - } + "tools/thinking" ] }, { diff --git a/docs/tools/web-fetch.md b/docs/tools/web-fetch.md new file mode 100644 index 00000000000..8eb23517f04 --- /dev/null +++ b/docs/tools/web-fetch.md @@ -0,0 +1,135 @@ +--- +summary: "web_fetch tool -- HTTP fetch with readable content extraction" +read_when: + - You want to fetch a URL and extract readable content + - You need to configure web_fetch or its Firecrawl fallback + - You want to understand web_fetch limits and caching +title: "Web Fetch" +sidebarTitle: "Web Fetch" +--- + +# Web Fetch + +The `web_fetch` tool does a plain HTTP GET and extracts readable content +(HTML to markdown or text). It does **not** execute JavaScript. + +For JS-heavy sites or login-protected pages, use the +[Web Browser](/tools/browser) instead. + +## Quick start + +`web_fetch` is **enabled by default** -- no configuration needed. The agent can +call it immediately: + +```javascript +await web_fetch({ url: "https://example.com/article" }); +``` + +## Tool parameters + +| Parameter | Type | Description | +| ------------- | -------- | ---------------------------------------- | +| `url` | `string` | URL to fetch (required, http/https only) | +| `extractMode` | `string` | `"markdown"` (default) or `"text"` | +| `maxChars` | `number` | Truncate output to this many chars | + +## How it works + + + + Sends an HTTP GET with a Chrome-like User-Agent and `Accept-Language` + header. Blocks private/internal hostnames and re-checks redirects. + + + Runs Readability (main-content extraction) on the HTML response. + + + If Readability fails and Firecrawl is configured, retries through the + Firecrawl API with bot-circumvention mode. + + + Results are cached for 15 minutes (configurable) to reduce repeated + fetches of the same URL. + + + +## Config + +```json5 +{ + tools: { + web: { + fetch: { + enabled: true, // default: true + maxChars: 50000, // max output chars + maxCharsCap: 50000, // hard cap for maxChars param + maxResponseBytes: 2000000, // max download size before truncation + timeoutSeconds: 30, + cacheTtlMinutes: 15, + maxRedirects: 3, + readability: true, // use Readability extraction + userAgent: "Mozilla/5.0 ...", // override User-Agent + }, + }, + }, +} +``` + +## Firecrawl fallback + +If Readability extraction fails, `web_fetch` can fall back to +[Firecrawl](/tools/firecrawl) for bot-circumvention and better extraction: + +```json5 +{ + tools: { + web: { + fetch: { + firecrawl: { + enabled: true, + apiKey: "fc-...", // optional if FIRECRAWL_API_KEY is set + baseUrl: "https://api.firecrawl.dev", + onlyMainContent: true, + maxAgeMs: 86400000, // cache duration (1 day) + timeoutSeconds: 60, + }, + }, + }, + }, +} +``` + +`tools.web.fetch.firecrawl.apiKey` supports SecretRef objects. + + + If Firecrawl is enabled and its SecretRef is unresolved with no + `FIRECRAWL_API_KEY` env fallback, gateway startup fails fast. + + +## Limits and safety + +- `maxChars` is clamped to `tools.web.fetch.maxCharsCap` +- Response body is capped at `maxResponseBytes` before parsing; oversized + responses are truncated with a warning +- Private/internal hostnames are blocked +- Redirects are checked and limited by `maxRedirects` +- `web_fetch` is best-effort -- some sites need the [Web Browser](/tools/browser) + +## Tool profiles + +If you use tool profiles or allowlists, add `web_fetch` or `group:web`: + +```json5 +{ + tools: { + allow: ["web_fetch"], + // or: allow: ["group:web"] (includes both web_fetch and web_search) + }, +} +``` + +## Related + +- [Web Search](/tools/web) -- search the web with multiple providers +- [Web Browser](/tools/browser) -- full browser automation for JS-heavy sites +- [Firecrawl](/tools/firecrawl) -- Firecrawl search and scrape tools diff --git a/docs/tools/web.md b/docs/tools/web.md index 36be613eb4e..fc70bb0b90f 100644 --- a/docs/tools/web.md +++ b/docs/tools/web.md @@ -1,328 +1,117 @@ --- -summary: "Web search + fetch tools (Brave, Firecrawl, Gemini, Grok, Kimi, Perplexity, and Tavily providers)" +summary: "web_search tool -- search the web with Brave, Firecrawl, Gemini, Grok, Kimi, Perplexity, or Tavily" read_when: - - You want to enable web_search or web_fetch - - You need provider API key setup - - You want to use Gemini with Google Search grounding -title: "Web Tools" + - You want to enable or configure web_search + - You need to choose a search provider + - You want to understand auto-detection and provider fallback +title: "Web Search" +sidebarTitle: "Web Search" --- -# Web tools +# Web Search -OpenClaw ships two lightweight web tools: +The `web_search` tool searches the web using your configured provider and +returns results. Results are cached by query for 15 minutes (configurable). -- `web_search` — Search the web using Brave Search API, Firecrawl Search, Gemini with Google Search grounding, Grok, Kimi, Perplexity Search API, or Tavily Search API. -- `web_fetch` — HTTP fetch + readable extraction (HTML → markdown/text). + + `web_search` is a lightweight HTTP tool, not browser automation. For + JS-heavy sites or logins, use the [Web Browser](/tools/browser). For + fetching a specific URL, use [Web Fetch](/tools/web-fetch). + -These are **not** browser automation. For JS-heavy sites or logins, use the -[Browser tool](/tools/browser). +## Quick start -## How it works + + + Pick a provider and get an API key. See the provider pages below for + sign-up links. + + + ```bash + openclaw configure --section web + ``` + This stores the key and sets the provider. You can also set an env var + (e.g. `BRAVE_API_KEY`) and skip this step. + + + The agent can now call `web_search`: -- `web_search` calls your configured provider and returns results. -- Results are cached by query for 15 minutes (configurable). -- `web_fetch` does a plain HTTP GET and extracts readable content - (HTML → markdown/text). It does **not** execute JavaScript. -- `web_fetch` is enabled by default (unless explicitly disabled). -- The bundled Firecrawl plugin also adds `firecrawl_search` and `firecrawl_scrape` when enabled. -- The bundled Tavily plugin also adds `tavily_search` and `tavily_extract` when enabled. + ```javascript + await web_search({ query: "OpenClaw plugin SDK" }); + ``` -See [Brave Search setup](/tools/brave-search), [Perplexity Search setup](/tools/perplexity-search), and [Tavily Search setup](/tools/tavily) for provider-specific details. + + -## Choosing a search provider - -| Provider | Result shape | Setup guide | -| ------------------------------------------ | ---------------------------------- | ------------------------------------------- | -| [**Brave Search**](/tools/brave-search) | Structured results with snippets | `BRAVE_API_KEY` | -| [**Firecrawl**](/tools/firecrawl) | Structured results with snippets | `FIRECRAWL_API_KEY` | -| [**Gemini**](/tools/gemini-search) | AI-synthesized answers + citations | `GEMINI_API_KEY` | -| [**Grok**](/tools/grok-search) | AI-synthesized answers + citations | `XAI_API_KEY` | -| [**Kimi**](/tools/kimi-search) | AI-synthesized answers + citations | `KIMI_API_KEY` / `MOONSHOT_API_KEY` | -| [**Perplexity**](/tools/perplexity-search) | Structured results with snippets | `PERPLEXITY_API_KEY` / `OPENROUTER_API_KEY` | -| [**Tavily**](/tools/tavily) | Structured results with snippets | `TAVILY_API_KEY` | - -### Auto-detection - -The table above is alphabetical. If no `provider` is explicitly set, runtime auto-detection checks providers in this order: - -1. **Brave** — `BRAVE_API_KEY` env var or `plugins.entries.brave.config.webSearch.apiKey` -2. **Gemini** — `GEMINI_API_KEY` env var or `plugins.entries.google.config.webSearch.apiKey` -3. **Grok** — `XAI_API_KEY` env var or `plugins.entries.xai.config.webSearch.apiKey` -4. **Kimi** — `KIMI_API_KEY` / `MOONSHOT_API_KEY` env var or `plugins.entries.moonshot.config.webSearch.apiKey` -5. **Perplexity** — `PERPLEXITY_API_KEY`, `OPENROUTER_API_KEY`, or `plugins.entries.perplexity.config.webSearch.apiKey` -6. **Firecrawl** — `FIRECRAWL_API_KEY` env var or `plugins.entries.firecrawl.config.webSearch.apiKey` -7. **Tavily** — `TAVILY_API_KEY` env var or `plugins.entries.tavily.config.webSearch.apiKey` - -If no keys are found, it falls back to Brave (you'll get a missing-key error prompting you to configure one). - -Runtime SecretRef behavior: - -- Web tool SecretRefs are resolved atomically at gateway startup/reload. -- In auto-detect mode, OpenClaw resolves only the selected provider key. Non-selected provider SecretRefs stay inactive until selected. -- If the selected provider SecretRef is unresolved and no provider env fallback exists, startup/reload fails fast. - -## Setting up web search - -Run `openclaw configure --section web` to choose a provider and store your API key. For detailed setup, see the provider-specific pages: +## Choosing a provider - Structured results with snippets and LLM context mode + Structured results with snippets. Supports `llm-context` mode, country/language filters. Free tier available. - - Search + scraping with content extraction + + Structured results. Best paired with `firecrawl_search` and `firecrawl_scrape` for deep extraction. - AI-synthesized answers via Google Search grounding + AI-synthesized answers with citations via Google Search grounding. - AI-synthesized answers via xAI web grounding + AI-synthesized answers with citations via xAI web grounding. - AI-synthesized answers via Moonshot web search + AI-synthesized answers with citations via Moonshot web search. - Structured results with content extraction controls + Structured results with content extraction controls and domain filtering. - Search depth, topic filtering, and URL extraction + Structured results with search depth, topic filtering, and `tavily_extract` for URL extraction. -Provider-specific web search config lives under `plugins.entries..config.webSearch.*`. +### Provider comparison -### Where to store the key +| Provider | Result style | Filters | API key | +| -------------------------------------- | -------------------------- | ------------------------------------------------ | ------------------------------------------- | +| [Brave](/tools/brave-search) | Structured snippets | Country, language, time, `llm-context` mode | `BRAVE_API_KEY` | +| [Firecrawl](/tools/firecrawl) | Structured snippets | Via `firecrawl_search` tool | `FIRECRAWL_API_KEY` | +| [Gemini](/tools/gemini-search) | AI-synthesized + citations | -- | `GEMINI_API_KEY` | +| [Grok](/tools/grok-search) | AI-synthesized + citations | -- | `XAI_API_KEY` | +| [Kimi](/tools/kimi-search) | AI-synthesized + citations | -- | `KIMI_API_KEY` / `MOONSHOT_API_KEY` | +| [Perplexity](/tools/perplexity-search) | Structured snippets | Country, language, time, domains, content limits | `PERPLEXITY_API_KEY` / `OPENROUTER_API_KEY` | +| [Tavily](/tools/tavily) | Structured snippets | Via `tavily_search` tool | `TAVILY_API_KEY` | -**Via config:** run `openclaw configure --section web`. It stores the key under the provider-specific config path: +## Auto-detection -- Brave: `plugins.entries.brave.config.webSearch.apiKey` -- Firecrawl: `plugins.entries.firecrawl.config.webSearch.apiKey` -- Gemini: `plugins.entries.google.config.webSearch.apiKey` -- Grok: `plugins.entries.xai.config.webSearch.apiKey` -- Kimi: `plugins.entries.moonshot.config.webSearch.apiKey` -- Perplexity: `plugins.entries.perplexity.config.webSearch.apiKey` -- Tavily: `plugins.entries.tavily.config.webSearch.apiKey` +If no `provider` is set, OpenClaw checks for API keys in this order and uses +the first one found: -All of these fields also support SecretRef objects. +1. **Brave** -- `BRAVE_API_KEY` or `plugins.entries.brave.config.webSearch.apiKey` +2. **Gemini** -- `GEMINI_API_KEY` or `plugins.entries.google.config.webSearch.apiKey` +3. **Grok** -- `XAI_API_KEY` or `plugins.entries.xai.config.webSearch.apiKey` +4. **Kimi** -- `KIMI_API_KEY` / `MOONSHOT_API_KEY` or `plugins.entries.moonshot.config.webSearch.apiKey` +5. **Perplexity** -- `PERPLEXITY_API_KEY` / `OPENROUTER_API_KEY` or `plugins.entries.perplexity.config.webSearch.apiKey` +6. **Firecrawl** -- `FIRECRAWL_API_KEY` or `plugins.entries.firecrawl.config.webSearch.apiKey` +7. **Tavily** -- `TAVILY_API_KEY` or `plugins.entries.tavily.config.webSearch.apiKey` -**Via environment:** set provider env vars in the Gateway process environment: +If no keys are found, it falls back to Brave (you will get a missing-key error +prompting you to configure one). -- Brave: `BRAVE_API_KEY` -- Firecrawl: `FIRECRAWL_API_KEY` -- Gemini: `GEMINI_API_KEY` -- Grok: `XAI_API_KEY` -- Kimi: `KIMI_API_KEY` or `MOONSHOT_API_KEY` -- Perplexity: `PERPLEXITY_API_KEY` or `OPENROUTER_API_KEY` -- Tavily: `TAVILY_API_KEY` + + All provider key fields support SecretRef objects. In auto-detect mode, + OpenClaw resolves only the selected provider key -- non-selected SecretRefs + stay inactive. + -For a gateway install, put these in `~/.openclaw/.env` (or your service environment). See [Env vars](/help/faq#how-does-openclaw-load-environment-variables). - -### Config examples - -**Brave Search:** - -```json5 -{ - plugins: { - entries: { - brave: { - config: { - webSearch: { - apiKey: "YOUR_BRAVE_API_KEY", // optional if BRAVE_API_KEY is set // pragma: allowlist secret - }, - }, - }, - }, - }, - tools: { - web: { - search: { - enabled: true, - provider: "brave", - }, - }, - }, -} -``` - -**Firecrawl Search:** - -```json5 -{ - plugins: { - entries: { - firecrawl: { - enabled: true, - }, - }, - }, - tools: { - web: { - search: { - enabled: true, - provider: "firecrawl", - }, - }, - }, - plugins: { - entries: { - firecrawl: { - enabled: true, - config: { - webSearch: { - apiKey: "fc-...", // optional if FIRECRAWL_API_KEY is set - baseUrl: "https://api.firecrawl.dev", - }, - }, - }, - }, - }, -} -``` - -When you choose Firecrawl in onboarding or `openclaw configure --section web`, OpenClaw enables the bundled Firecrawl plugin automatically so `web_search`, `firecrawl_search`, and `firecrawl_scrape` are all available. - -**Tavily Search:** - -```json5 -{ - plugins: { - entries: { - tavily: { - enabled: true, - config: { - webSearch: { - apiKey: "tvly-...", // optional if TAVILY_API_KEY is set - baseUrl: "https://api.tavily.com", - }, - }, - }, - }, - }, - tools: { - web: { - search: { - enabled: true, - provider: "tavily", - }, - }, - }, -} -``` - -When you choose Tavily in onboarding or `openclaw configure --section web`, OpenClaw enables the bundled Tavily plugin automatically so `web_search`, `tavily_search`, and `tavily_extract` are all available. - -**Brave LLM Context mode:** - -```json5 -{ - plugins: { - entries: { - brave: { - config: { - webSearch: { - apiKey: "YOUR_BRAVE_API_KEY", // optional if BRAVE_API_KEY is set // pragma: allowlist secret - mode: "llm-context", - }, - }, - }, - }, - }, - tools: { - web: { - search: { - enabled: true, - provider: "brave", - }, - }, - }, -} -``` - -`llm-context` returns extracted page chunks for grounding instead of standard Brave snippets. -In this mode, `country` and `language` / `search_lang` still work, but `ui_lang`, -`freshness`, `date_after`, and `date_before` are rejected. - -**Perplexity Search:** - -```json5 -{ - plugins: { - entries: { - perplexity: { - config: { - webSearch: { - apiKey: "pplx-...", // optional if PERPLEXITY_API_KEY is set - }, - }, - }, - }, - }, - tools: { - web: { - search: { - enabled: true, - provider: "perplexity", - }, - }, - }, -} -``` - -**Perplexity via OpenRouter / Sonar compatibility:** - -```json5 -{ - plugins: { - entries: { - perplexity: { - config: { - webSearch: { - apiKey: "", // optional if OPENROUTER_API_KEY is set - baseUrl: "https://openrouter.ai/api/v1", - model: "perplexity/sonar-pro", - }, - }, - }, - }, - }, - tools: { - web: { - search: { - enabled: true, - provider: "perplexity", - }, - }, - }, -} -``` - -## web_search - -Search the web using your configured provider. - -### Requirements - -- `tools.web.search.enabled` must not be `false` (default: enabled) -- API key for your chosen provider: - - **Brave**: `BRAVE_API_KEY` or `plugins.entries.brave.config.webSearch.apiKey` - - **Firecrawl**: `FIRECRAWL_API_KEY` or `plugins.entries.firecrawl.config.webSearch.apiKey` - - **Gemini**: `GEMINI_API_KEY` or `plugins.entries.google.config.webSearch.apiKey` - - **Grok**: `XAI_API_KEY` or `plugins.entries.xai.config.webSearch.apiKey` - - **Kimi**: `KIMI_API_KEY`, `MOONSHOT_API_KEY`, or `plugins.entries.moonshot.config.webSearch.apiKey` - - **Perplexity**: `PERPLEXITY_API_KEY`, `OPENROUTER_API_KEY`, or `plugins.entries.perplexity.config.webSearch.apiKey` - - **Tavily**: `TAVILY_API_KEY` or `plugins.entries.tavily.config.webSearch.apiKey` -- All provider key fields above support SecretRef objects. - -### Config +## Config ```json5 { tools: { web: { search: { - enabled: true, - apiKey: "BRAVE_API_KEY_HERE", // optional if BRAVE_API_KEY is set + enabled: true, // default: true + provider: "brave", // or omit for auto-detection maxResults: 5, timeoutSeconds: 30, cacheTtlMinutes: 15, @@ -332,19 +121,54 @@ Search the web using your configured provider. } ``` -### Tool parameters +Provider-specific config (API keys, base URLs, modes) lives under +`plugins.entries..config.webSearch.*`. See the provider pages for +examples. -Parameters depend on the selected provider. +### Storing API keys -Perplexity's OpenRouter / Sonar compatibility path supports only `query` and `freshness`. -If you set `plugins.entries.perplexity.config.webSearch.baseUrl` / `model`, use `OPENROUTER_API_KEY`, or configure an `sk-or-...` key under `plugins.entries.perplexity.config.webSearch.apiKey`, Search API-only filters return explicit errors. + + + Run `openclaw configure --section web` or set the key directly: + + ```json5 + { + plugins: { + entries: { + brave: { + config: { + webSearch: { + apiKey: "YOUR_KEY", // pragma: allowlist secret + }, + }, + }, + }, + }, + } + ``` + + + + Set the provider env var in the Gateway process environment: + + ```bash + export BRAVE_API_KEY="YOUR_KEY" + ``` + + For a gateway install, put it in `~/.openclaw/.env`. + See [Env vars](/help/faq#how-does-openclaw-load-environment-variables). + + + + +## Tool parameters | Parameter | Description | | --------------------- | ----------------------------------------------------- | | `query` | Search query (required) | | `count` | Results to return (1-10, default: 5) | -| `country` | 2-letter ISO country code (e.g., "US", "DE") | -| `language` | ISO 639-1 language code (e.g., "en", "de") | +| `country` | 2-letter ISO country code (e.g. "US", "DE") | +| `language` | ISO 639-1 language code (e.g. "en", "de") | | `freshness` | Time filter: `day`, `week`, `month`, or `year` | | `date_after` | Results after this date (YYYY-MM-DD) | | `date_before` | Results before this date (YYYY-MM-DD) | @@ -353,114 +177,53 @@ If you set `plugins.entries.perplexity.config.webSearch.baseUrl` / `model`, use | `max_tokens` | Total content budget, default 25000 (Perplexity only) | | `max_tokens_per_page` | Per-page token limit, default 2048 (Perplexity only) | -Firecrawl `web_search` supports `query` and `count`. For Firecrawl-specific controls like `sources`, `categories`, result scraping, or scrape timeout, use `firecrawl_search` from the bundled Firecrawl plugin. + + Not all parameters work with all providers. Brave `llm-context` mode + rejects `ui_lang`, `freshness`, `date_after`, and `date_before`. + Firecrawl and Tavily only support `query` and `count` through `web_search` + -- use their dedicated tools for advanced options. + -Tavily `web_search` supports `query` and `count` (up to 20 results). For Tavily-specific controls like `search_depth`, `topic`, `include_answer`, or domain filters, use `tavily_search` from the bundled Tavily plugin. For URL content extraction, use `tavily_extract`. See [Tavily](/tools/tavily) for details. - -**Examples:** +## Examples ```javascript +// Basic search +await web_search({ query: "OpenClaw plugin SDK" }); + // German-specific search -await web_search({ - query: "TV online schauen", - country: "DE", - language: "de", -}); +await web_search({ query: "TV online schauen", country: "DE", language: "de" }); // Recent results (past week) -await web_search({ - query: "TMBG interview", - freshness: "week", -}); +await web_search({ query: "AI developments", freshness: "week" }); -// Date range search +// Date range await web_search({ - query: "AI developments", + query: "climate research", date_after: "2024-01-01", date_before: "2024-06-30", }); // Domain filtering (Perplexity only) -await web_search({ - query: "climate research", - domain_filter: ["nature.com", "science.org", ".edu"], -}); - -// Exclude domains (Perplexity only) await web_search({ query: "product reviews", domain_filter: ["-reddit.com", "-pinterest.com"], }); - -// More content extraction (Perplexity only) -await web_search({ - query: "detailed AI research", - max_tokens: 50000, - max_tokens_per_page: 4096, -}); ``` -When Brave `llm-context` mode is enabled, `ui_lang`, `freshness`, `date_after`, and -`date_before` are not supported. Use Brave `web` mode for those filters. +## Tool profiles -## web_fetch - -Fetch a URL and extract readable content. - -### web_fetch requirements - -- `tools.web.fetch.enabled` must not be `false` (default: enabled) -- Optional Firecrawl fallback: set `tools.web.fetch.firecrawl.apiKey` or `FIRECRAWL_API_KEY`. -- `tools.web.fetch.firecrawl.apiKey` supports SecretRef objects. - -### web_fetch config +If you use tool profiles or allowlists, add `web_search` or `group:web`: ```json5 { tools: { - web: { - fetch: { - enabled: true, - maxChars: 50000, - maxCharsCap: 50000, - maxResponseBytes: 2000000, - timeoutSeconds: 30, - cacheTtlMinutes: 15, - maxRedirects: 3, - userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", - readability: true, - firecrawl: { - enabled: true, - apiKey: "FIRECRAWL_API_KEY_HERE", // optional if FIRECRAWL_API_KEY is set - baseUrl: "https://api.firecrawl.dev", - onlyMainContent: true, - maxAgeMs: 86400000, // ms (1 day) - timeoutSeconds: 60, - }, - }, - }, + allow: ["web_search"], + // or: allow: ["group:web"] (includes both web_search and web_fetch) }, } ``` -### web_fetch tool parameters +## Related -- `url` (required, http/https only) -- `extractMode` (`markdown` | `text`) -- `maxChars` (truncate long pages) - -Notes: - -- `web_fetch` uses Readability (main-content extraction) first, then Firecrawl (if configured). If both fail, the tool returns an error. -- Firecrawl requests use bot-circumvention mode and cache results by default. -- Firecrawl SecretRefs are resolved only when Firecrawl is active (`tools.web.fetch.enabled !== false` and `tools.web.fetch.firecrawl.enabled !== false`). -- If Firecrawl is active and its SecretRef is unresolved with no `FIRECRAWL_API_KEY` fallback, startup/reload fails fast. -- `web_fetch` sends a Chrome-like User-Agent and `Accept-Language` by default; override `userAgent` if needed. -- `web_fetch` blocks private/internal hostnames and re-checks redirects (limit with `maxRedirects`). -- `maxChars` is clamped to `tools.web.fetch.maxCharsCap`. -- `web_fetch` caps the downloaded response body size to `tools.web.fetch.maxResponseBytes` before parsing; oversized responses are truncated and include a warning. -- `web_fetch` is best-effort extraction; some sites will need the browser tool. -- See [Firecrawl](/tools/firecrawl) for key setup and service details. -- Responses are cached (default 15 minutes) to reduce repeated fetches. -- If you use tool profiles/allowlists, add `web_search`/`web_fetch` or `group:web`. -- If the API key is missing, `web_search` returns a short setup hint with a docs link. +- [Web Fetch](/tools/web-fetch) -- fetch a URL and extract readable content +- [Web Browser](/tools/browser) -- full browser automation for JS-heavy sites