From ff61343d76933c2d7bc01a13db87a2554514e0d0 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 15 Mar 2026 08:44:02 -0700 Subject: [PATCH] fix: harden mention pattern regex compilation --- CHANGELOG.md | 1 + docs/channels/group-messages.md | 4 +- docs/channels/groups.md | 2 +- docs/gateway/configuration-reference.md | 4 +- docs/gateway/configuration.md | 2 +- src/auto-reply/inbound.test.ts | 19 +++- src/auto-reply/reply/mentions.ts | 124 +++++++++++++++++------- src/channels/dock.ts | 8 +- src/channels/plugins/types.core.ts | 5 + src/channels/plugins/whatsapp-shared.ts | 4 + src/config/schema.help.ts | 2 +- src/infra/exec-approval-forwarder.ts | 7 +- src/logging/redact.ts | 6 +- src/plugin-sdk/whatsapp.ts | 1 + src/security/config-regex.ts | 78 +++++++++++++++ src/security/safe-regex.test.ts | 14 ++- src/security/safe-regex.ts | 49 ++++++++-- 17 files changed, 265 insertions(+), 65 deletions(-) create mode 100644 src/security/config-regex.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bcd43d2b62..5fa449296ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Group mention gating: reject invalid and unsafe nested-repetition `mentionPatterns`, reuse the shared safe config-regex compiler across mention stripping and detection, and cache strip-time regex compilation so noisy groups avoid repeated recompiles. - Control UI/chat sessions: show human-readable labels in the grouped session dropdown again, keep unique scoped fallbacks when metadata is missing, and disambiguate duplicate labels only when needed. (#45130) thanks @luzhidong. - Slack/interactive replies: preserve `channelData.slack.blocks` through live DM delivery and preview-finalized edits so Block Kit button and select directives render instead of falling back to raw text. Thanks @vincentkoc. - Feishu/topic threads: fetch full thread context, including prior bot replies, when starting a topic-thread session so follow-up turns in Feishu topics keep the right conversation state. Thanks @Coobiw. diff --git a/docs/channels/group-messages.md b/docs/channels/group-messages.md index e6a00ab5c5e..078ae9e7845 100644 --- a/docs/channels/group-messages.md +++ b/docs/channels/group-messages.md @@ -13,7 +13,7 @@ Note: `agents.list[].groupChat.mentionPatterns` is now used by Telegram/Discord/ ## What’s implemented (2025-12-03) -- Activation modes: `mention` (default) or `always`. `mention` requires a ping (real WhatsApp @-mentions via `mentionedJids`, regex patterns, or the bot’s E.164 anywhere in the text). `always` wakes the agent on every message but it should reply only when it can add meaningful value; otherwise it returns the silent token `NO_REPLY`. Defaults can be set in config (`channels.whatsapp.groups`) and overridden per group via `/activation`. When `channels.whatsapp.groups` is set, it also acts as a group allowlist (include `"*"` to allow all). +- Activation modes: `mention` (default) or `always`. `mention` requires a ping (real WhatsApp @-mentions via `mentionedJids`, safe regex patterns, or the bot’s E.164 anywhere in the text). `always` wakes the agent on every message but it should reply only when it can add meaningful value; otherwise it returns the silent token `NO_REPLY`. Defaults can be set in config (`channels.whatsapp.groups`) and overridden per group via `/activation`. When `channels.whatsapp.groups` is set, it also acts as a group allowlist (include `"*"` to allow all). - Group policy: `channels.whatsapp.groupPolicy` controls whether group messages are accepted (`open|disabled|allowlist`). `allowlist` uses `channels.whatsapp.groupAllowFrom` (fallback: explicit `channels.whatsapp.allowFrom`). Default is `allowlist` (blocked until you add senders). - Per-group sessions: session keys look like `agent::whatsapp:group:` so commands such as `/verbose on` or `/think high` (sent as standalone messages) are scoped to that group; personal DM state is untouched. Heartbeats are skipped for group threads. - Context injection: **pending-only** group messages (default 50) that _did not_ trigger a run are prefixed under `[Chat messages since your last reply - for context]`, with the triggering line under `[Current message - respond to this]`. Messages already in the session are not re-injected. @@ -50,7 +50,7 @@ Add a `groupChat` block to `~/.openclaw/openclaw.json` so display-name pings wor Notes: -- The regexes are case-insensitive; they cover a display-name ping like `@openclaw` and the raw number with or without `+`/spaces. +- The regexes are case-insensitive and use the same safe-regex guardrails as other config regex surfaces; invalid patterns and unsafe nested repetition are ignored. - WhatsApp still sends canonical mentions via `mentionedJids` when someone taps the contact, so the number fallback is rarely needed but is a useful safety net. ### Activation command (owner-only) diff --git a/docs/channels/groups.md b/docs/channels/groups.md index 3f9df076454..a6bd8621784 100644 --- a/docs/channels/groups.md +++ b/docs/channels/groups.md @@ -243,7 +243,7 @@ Replying to a bot message counts as an implicit mention (when the channel suppor Notes: -- `mentionPatterns` are case-insensitive regexes. +- `mentionPatterns` are case-insensitive safe regex patterns; invalid patterns and unsafe nested-repetition forms are ignored. - Surfaces that provide explicit mentions still pass; patterns are a fallback. - Per-agent override: `agents.list[].groupChat.mentionPatterns` (useful when multiple agents share a group). - Mention gating is only enforced when mention detection is possible (native mentions or `mentionPatterns` are configured). diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index 7bb7fb5824f..b87ad930161 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -655,12 +655,12 @@ See the full channel index: [Channels](/channels). ### Group chat mention gating -Group messages default to **require mention** (metadata mention or regex patterns). Applies to WhatsApp, Telegram, Discord, Google Chat, and iMessage group chats. +Group messages default to **require mention** (metadata mention or safe regex patterns). Applies to WhatsApp, Telegram, Discord, Google Chat, and iMessage group chats. **Mention types:** - **Metadata mentions**: Native platform @-mentions. Ignored in WhatsApp self-chat mode. -- **Text patterns**: Regex patterns in `agents.list[].groupChat.mentionPatterns`. Always checked. +- **Text patterns**: Safe regex patterns in `agents.list[].groupChat.mentionPatterns`. Invalid patterns and unsafe nested repetition are ignored. - Mention gating is enforced only when detection is possible (native mentions or at least one pattern). ```json5 diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 0f1dd65cbbc..9a047cab857 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -170,7 +170,7 @@ When validation fails: ``` - **Metadata mentions**: native @-mentions (WhatsApp tap-to-mention, Telegram @bot, etc.) - - **Text patterns**: regex patterns in `mentionPatterns` + - **Text patterns**: safe regex patterns in `mentionPatterns` - See [full reference](/gateway/configuration-reference#group-chat-mention-gating) for per-channel overrides and self-chat mode. diff --git a/src/auto-reply/inbound.test.ts b/src/auto-reply/inbound.test.ts index 4d624ecabd1..77ff61e814e 100644 --- a/src/auto-reply/inbound.test.ts +++ b/src/auto-reply/inbound.test.ts @@ -17,6 +17,7 @@ import { buildMentionRegexes, matchesMentionPatterns, normalizeMentionText, + stripMentions, } from "./reply/mentions.js"; import { initSessionState } from "./reply/session.js"; import { applyTemplate, type MsgContext, type TemplateContext } from "./templating.js"; @@ -394,10 +395,10 @@ describe("initSessionState BodyStripped", () => { }); describe("mention helpers", () => { - it("builds regexes and skips invalid patterns", () => { + it("builds regexes and skips invalid or unsafe patterns", () => { const regexes = buildMentionRegexes({ messages: { - groupChat: { mentionPatterns: ["\\bopenclaw\\b", "(invalid"] }, + groupChat: { mentionPatterns: ["\\bopenclaw\\b", "(invalid", "(a+)+$"] }, }, }); expect(regexes).toHaveLength(1); @@ -435,6 +436,20 @@ describe("mention helpers", () => { expect(matchesMentionPatterns("workbot: hi", regexes)).toBe(true); expect(matchesMentionPatterns("global: hi", regexes)).toBe(false); }); + + it("strips safe mention patterns and ignores unsafe ones", () => { + const stripped = stripMentions("openclaw " + "a".repeat(28) + "!", {} as MsgContext, { + messages: { + groupChat: { mentionPatterns: ["\\bopenclaw\\b", "(a+)+$"] }, + }, + }); + expect(stripped).toBe(`${"a".repeat(28)}!`); + }); + + it("strips provider mention regexes without config compilation", () => { + const stripped = stripMentions("<@12345> hello", { Provider: "discord" } as MsgContext, {}); + expect(stripped).toBe("hello"); + }); }); describe("resolveGroupRequireMention", () => { diff --git a/src/auto-reply/reply/mentions.ts b/src/auto-reply/reply/mentions.ts index ca20905efae..714e599e38a 100644 --- a/src/auto-reply/reply/mentions.ts +++ b/src/auto-reply/reply/mentions.ts @@ -2,6 +2,8 @@ import { resolveAgentConfig } from "../../agents/agent-scope.js"; import { getChannelDock } from "../../channels/dock.js"; import { normalizeChannelId } from "../../channels/plugins/index.js"; import type { OpenClawConfig } from "../../config/config.js"; +import { createSubsystemLogger } from "../../logging/subsystem.js"; +import { compileConfigRegexes, type ConfigRegexRejectReason } from "../../security/config-regex.js"; import { escapeRegExp } from "../../utils.js"; import type { MsgContext } from "../templating.js"; @@ -21,8 +23,12 @@ function deriveMentionPatterns(identity?: { name?: string; emoji?: string }) { } const BACKSPACE_CHAR = "\u0008"; -const mentionRegexCompileCache = new Map(); +const mentionMatchRegexCompileCache = new Map(); +const mentionStripRegexCompileCache = new Map(); const MAX_MENTION_REGEX_COMPILE_CACHE_KEYS = 512; +const mentionPatternWarningCache = new Set(); +const MAX_MENTION_PATTERN_WARNING_KEYS = 512; +const log = createSubsystemLogger("mentions"); export const CURRENT_MESSAGE_MARKER = "[Current message - respond to this]"; @@ -37,6 +43,64 @@ function normalizeMentionPatterns(patterns: string[]): string[] { return patterns.map(normalizeMentionPattern); } +function warnRejectedMentionPattern( + pattern: string, + flags: string, + reason: ConfigRegexRejectReason, +) { + const key = `${flags}::${reason}::${pattern}`; + if (mentionPatternWarningCache.has(key)) { + return; + } + mentionPatternWarningCache.add(key); + if (mentionPatternWarningCache.size > MAX_MENTION_PATTERN_WARNING_KEYS) { + mentionPatternWarningCache.clear(); + mentionPatternWarningCache.add(key); + } + log.warn("Ignoring unsupported group mention pattern", { + pattern, + flags, + reason, + }); +} + +function cacheMentionRegexes( + cache: Map, + cacheKey: string, + regexes: RegExp[], +): RegExp[] { + cache.set(cacheKey, regexes); + if (cache.size > MAX_MENTION_REGEX_COMPILE_CACHE_KEYS) { + cache.clear(); + cache.set(cacheKey, regexes); + } + return [...regexes]; +} + +function compileMentionPatternsCached(params: { + patterns: string[]; + flags: string; + cache: Map; + warnRejected: boolean; +}): RegExp[] { + if (params.patterns.length === 0) { + return []; + } + const cacheKey = `${params.flags}\u001e${params.patterns.join("\u001f")}`; + const cached = params.cache.get(cacheKey); + if (cached) { + return [...cached]; + } + + const compiled = compileConfigRegexes(params.patterns, params.flags); + if (params.warnRejected) { + for (const rejected of compiled.rejected) { + warnRejectedMentionPattern(rejected.pattern, rejected.flags, rejected.reason); + } + } + return cacheMentionRegexes(params.cache, cacheKey, compiled.regexes); +} + function resolveMentionPatterns(cfg: OpenClawConfig | undefined, agentId?: string): string[] { if (!cfg) { return []; @@ -56,29 +120,12 @@ function resolveMentionPatterns(cfg: OpenClawConfig | undefined, agentId?: strin export function buildMentionRegexes(cfg: OpenClawConfig | undefined, agentId?: string): RegExp[] { const patterns = normalizeMentionPatterns(resolveMentionPatterns(cfg, agentId)); - if (patterns.length === 0) { - return []; - } - const cacheKey = patterns.join("\u001f"); - const cached = mentionRegexCompileCache.get(cacheKey); - if (cached) { - return [...cached]; - } - const compiled = patterns - .map((pattern) => { - try { - return new RegExp(pattern, "i"); - } catch { - return null; - } - }) - .filter((value): value is RegExp => Boolean(value)); - mentionRegexCompileCache.set(cacheKey, compiled); - if (mentionRegexCompileCache.size > MAX_MENTION_REGEX_COMPILE_CACHE_KEYS) { - mentionRegexCompileCache.clear(); - mentionRegexCompileCache.set(cacheKey, compiled); - } - return [...compiled]; + return compileMentionPatternsCached({ + patterns, + flags: "i", + cache: mentionMatchRegexCompileCache, + warnRejected: true, + }); } export function normalizeMentionText(text: string): string { @@ -153,17 +200,24 @@ export function stripMentions( let result = text; const providerId = ctx.Provider ? normalizeChannelId(ctx.Provider) : null; const providerMentions = providerId ? getChannelDock(providerId)?.mentions : undefined; - const patterns = normalizeMentionPatterns([ - ...resolveMentionPatterns(cfg, agentId), - ...(providerMentions?.stripPatterns?.({ ctx, cfg, agentId }) ?? []), - ]); - for (const p of patterns) { - try { - const re = new RegExp(p, "gi"); - result = result.replace(re, " "); - } catch { - // ignore invalid regex - } + const configRegexes = compileMentionPatternsCached({ + patterns: normalizeMentionPatterns(resolveMentionPatterns(cfg, agentId)), + flags: "gi", + cache: mentionStripRegexCompileCache, + warnRejected: true, + }); + const providerRegexes = + providerMentions?.stripRegexes?.({ ctx, cfg, agentId }) ?? + compileMentionPatternsCached({ + patterns: normalizeMentionPatterns( + providerMentions?.stripPatterns?.({ ctx, cfg, agentId }) ?? [], + ), + flags: "gi", + cache: mentionStripRegexCompileCache, + warnRejected: false, + }); + for (const re of [...configRegexes, ...providerRegexes]) { + result = result.replace(re, " "); } if (providerMentions?.stripMentions) { result = providerMentions.stripMentions({ diff --git a/src/channels/dock.ts b/src/channels/dock.ts index e080d513c16..2e63583ca1b 100644 --- a/src/channels/dock.ts +++ b/src/channels/dock.ts @@ -58,7 +58,7 @@ import type { } from "./plugins/types.js"; import { resolveWhatsAppGroupIntroHint, - resolveWhatsAppMentionStripPatterns, + resolveWhatsAppMentionStripRegexes, } from "./plugins/whatsapp-shared.js"; import { CHAT_CHANNEL_ORDER, type ChatChannelId, getChatChannelMeta } from "./registry.js"; @@ -303,7 +303,7 @@ const DOCKS: Record = { resolveGroupIntroHint: resolveWhatsAppGroupIntroHint, }, mentions: { - stripPatterns: ({ ctx }) => resolveWhatsAppMentionStripPatterns(ctx), + stripRegexes: ({ ctx }) => resolveWhatsAppMentionStripRegexes(ctx), }, threading: { buildToolContext: ({ context, hasRepliedRef }) => { @@ -346,7 +346,7 @@ const DOCKS: Record = { resolveToolPolicy: resolveDiscordGroupToolPolicy, }, mentions: { - stripPatterns: () => ["<@!?\\d+>"], + stripRegexes: () => [/<@!?\d+>/g], }, threading: { resolveReplyToMode: ({ cfg }) => cfg.channels?.discord?.replyToMode ?? "off", @@ -484,7 +484,7 @@ const DOCKS: Record = { resolveToolPolicy: resolveSlackGroupToolPolicy, }, mentions: { - stripPatterns: () => ["<@[^>]+>"], + stripRegexes: () => [/<@[^>]+>/g], }, threading: { resolveReplyToMode: ({ cfg, accountId, chatType }) => diff --git a/src/channels/plugins/types.core.ts b/src/channels/plugins/types.core.ts index 3bf3c07ddc6..fef8b010ca5 100644 --- a/src/channels/plugins/types.core.ts +++ b/src/channels/plugins/types.core.ts @@ -209,6 +209,11 @@ export type ChannelSecurityContext = { }; export type ChannelMentionAdapter = { + stripRegexes?: (params: { + ctx: MsgContext; + cfg: OpenClawConfig | undefined; + agentId?: string; + }) => RegExp[]; stripPatterns?: (params: { ctx: MsgContext; cfg: OpenClawConfig | undefined; diff --git a/src/channels/plugins/whatsapp-shared.ts b/src/channels/plugins/whatsapp-shared.ts index 3a51e2263bd..c8db1d068c8 100644 --- a/src/channels/plugins/whatsapp-shared.ts +++ b/src/channels/plugins/whatsapp-shared.ts @@ -20,6 +20,10 @@ export function resolveWhatsAppMentionStripPatterns(ctx: { To?: string | null }) return [escaped, `@${escaped}`]; } +export function resolveWhatsAppMentionStripRegexes(ctx: { To?: string | null }): RegExp[] { + return resolveWhatsAppMentionStripPatterns(ctx).map((pattern) => new RegExp(pattern, "g")); +} + type WhatsAppChunker = NonNullable; type WhatsAppSendMessage = PluginRuntimeChannel["whatsapp"]["sendMessageWhatsApp"]; type WhatsAppSendPoll = PluginRuntimeChannel["whatsapp"]["sendPollWhatsApp"]; diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index a4e2e125528..0d03f9574b1 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -1346,7 +1346,7 @@ export const FIELD_HELP: Record = { "messages.groupChat": "Group-message handling controls including mention triggers and history window sizing. Keep mention patterns narrow so group channels do not trigger on every message.", "messages.groupChat.mentionPatterns": - "Regex-like patterns used to detect explicit mentions/trigger phrases in group chats. Use precise patterns to reduce false positives in high-volume channels.", + "Safe case-insensitive regex patterns used to detect explicit mentions/trigger phrases in group chats. Use precise patterns to reduce false positives in high-volume channels; invalid or unsafe nested-repetition patterns are ignored.", "messages.groupChat.historyLimit": "Maximum number of prior group messages loaded as context per turn for group sessions. Use higher values for richer continuity, or lower values for faster and cheaper responses.", "messages.queue": diff --git a/src/infra/exec-approval-forwarder.ts b/src/infra/exec-approval-forwarder.ts index de3a54a4c77..5d197d6ae62 100644 --- a/src/infra/exec-approval-forwarder.ts +++ b/src/infra/exec-approval-forwarder.ts @@ -9,7 +9,8 @@ import type { } from "../config/types.approvals.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { normalizeAccountId, parseAgentSessionKey } from "../routing/session-key.js"; -import { compileSafeRegex, testRegexWithBoundedInput } from "../security/safe-regex.js"; +import { compileConfigRegex } from "../security/config-regex.js"; +import { testRegexWithBoundedInput } from "../security/safe-regex.js"; import { isDeliverableMessageChannel, normalizeMessageChannel, @@ -63,8 +64,8 @@ function matchSessionFilter(sessionKey: string, patterns: string[]): boolean { if (sessionKey.includes(pattern)) { return true; } - const regex = compileSafeRegex(pattern); - return regex ? testRegexWithBoundedInput(regex, sessionKey) : false; + const compiled = compileConfigRegex(pattern); + return compiled?.regex ? testRegexWithBoundedInput(compiled.regex, sessionKey) : false; }); } diff --git a/src/logging/redact.ts b/src/logging/redact.ts index 7e47ac0b663..42266f71eec 100644 --- a/src/logging/redact.ts +++ b/src/logging/redact.ts @@ -1,5 +1,5 @@ import type { OpenClawConfig } from "../config/config.js"; -import { compileSafeRegex } from "../security/safe-regex.js"; +import { compileConfigRegex } from "../security/config-regex.js"; import { resolveNodeRequireFromMeta } from "./node-require.js"; import { replacePatternBounded } from "./redact-bounded.js"; @@ -55,9 +55,9 @@ function parsePattern(raw: string): RegExp | null { const match = raw.match(/^\/(.+)\/([gimsuy]*)$/); if (match) { const flags = match[2].includes("g") ? match[2] : `${match[2]}g`; - return compileSafeRegex(match[1], flags); + return compileConfigRegex(match[1], flags)?.regex ?? null; } - return compileSafeRegex(raw, "gi"); + return compileConfigRegex(raw, "gi")?.regex ?? null; } function resolvePatterns(value?: string[]): RegExp[] { diff --git a/src/plugin-sdk/whatsapp.ts b/src/plugin-sdk/whatsapp.ts index 0227322f868..ea6465e8faa 100644 --- a/src/plugin-sdk/whatsapp.ts +++ b/src/plugin-sdk/whatsapp.ts @@ -38,6 +38,7 @@ export { export { createWhatsAppOutboundBase, resolveWhatsAppGroupIntroHint, + resolveWhatsAppMentionStripRegexes, resolveWhatsAppMentionStripPatterns, } from "../channels/plugins/whatsapp-shared.js"; export { resolveWhatsAppHeartbeatRecipients } from "../channels/plugins/whatsapp-heartbeat.js"; diff --git a/src/security/config-regex.ts b/src/security/config-regex.ts new file mode 100644 index 00000000000..76e8d0e86c7 --- /dev/null +++ b/src/security/config-regex.ts @@ -0,0 +1,78 @@ +import { + compileSafeRegexDetailed, + type SafeRegexCompileResult, + type SafeRegexRejectReason, +} from "./safe-regex.js"; + +export type ConfigRegexRejectReason = Exclude; + +export type CompiledConfigRegex = + | { + regex: RegExp; + pattern: string; + flags: string; + reason: null; + } + | { + regex: null; + pattern: string; + flags: string; + reason: ConfigRegexRejectReason; + }; + +function normalizeRejectReason(result: SafeRegexCompileResult): ConfigRegexRejectReason | null { + if (result.reason === null || result.reason === "empty") { + return null; + } + return result.reason; +} + +export function compileConfigRegex(pattern: string, flags = ""): CompiledConfigRegex | null { + const result = compileSafeRegexDetailed(pattern, flags); + if (result.reason === "empty") { + return null; + } + return { + regex: result.regex, + pattern: result.source, + flags: result.flags, + reason: normalizeRejectReason(result), + } as CompiledConfigRegex; +} + +export function compileConfigRegexes( + patterns: string[], + flags = "", +): { + regexes: RegExp[]; + rejected: Array<{ + pattern: string; + flags: string; + reason: ConfigRegexRejectReason; + }>; +} { + const regexes: RegExp[] = []; + const rejected: Array<{ + pattern: string; + flags: string; + reason: ConfigRegexRejectReason; + }> = []; + + for (const pattern of patterns) { + const compiled = compileConfigRegex(pattern, flags); + if (!compiled) { + continue; + } + if (compiled.regex) { + regexes.push(compiled.regex); + continue; + } + rejected.push({ + pattern: compiled.pattern, + flags: compiled.flags, + reason: compiled.reason, + }); + } + + return { regexes, rejected }; +} diff --git a/src/security/safe-regex.test.ts b/src/security/safe-regex.test.ts index 460149ad8ce..d4d3d650d91 100644 --- a/src/security/safe-regex.test.ts +++ b/src/security/safe-regex.test.ts @@ -1,5 +1,10 @@ import { describe, expect, it } from "vitest"; -import { compileSafeRegex, hasNestedRepetition, testRegexWithBoundedInput } from "./safe-regex.js"; +import { + compileSafeRegex, + compileSafeRegexDetailed, + hasNestedRepetition, + testRegexWithBoundedInput, +} from "./safe-regex.js"; describe("safe regex", () => { it("flags nested repetition patterns", () => { @@ -28,6 +33,13 @@ describe("safe regex", () => { expect("TOKEN=abcd1234".replace(re as RegExp, "***")).toBe("***"); }); + it("returns structured reject reasons", () => { + expect(compileSafeRegexDetailed(" ").reason).toBe("empty"); + expect(compileSafeRegexDetailed("(a+)+$").reason).toBe("unsafe-nested-repetition"); + expect(compileSafeRegexDetailed("(invalid").reason).toBe("invalid-regex"); + expect(compileSafeRegexDetailed("^agent:main$").reason).toBeNull(); + }); + it("checks bounded regex windows for long inputs", () => { expect( testRegexWithBoundedInput(/^agent:main:discord:/, `agent:main:discord:${"x".repeat(5000)}`), diff --git a/src/security/safe-regex.ts b/src/security/safe-regex.ts index ffa34509130..e197929c4a4 100644 --- a/src/security/safe-regex.ts +++ b/src/security/safe-regex.ts @@ -30,7 +30,23 @@ type PatternToken = const SAFE_REGEX_CACHE_MAX = 256; const SAFE_REGEX_TEST_WINDOW = 2048; -const safeRegexCache = new Map(); +export type SafeRegexRejectReason = "empty" | "unsafe-nested-repetition" | "invalid-regex"; + +export type SafeRegexCompileResult = + | { + regex: RegExp; + source: string; + flags: string; + reason: null; + } + | { + regex: null; + source: string; + flags: string; + reason: SafeRegexRejectReason; + }; + +const safeRegexCache = new Map(); function createParseFrame(): ParseFrame { return { @@ -302,31 +318,44 @@ export function hasNestedRepetition(source: string): boolean { return analyzeTokensForNestedRepetition(tokenizePattern(source)); } -export function compileSafeRegex(source: string, flags = ""): RegExp | null { +export function compileSafeRegexDetailed(source: string, flags = ""): SafeRegexCompileResult { const trimmed = source.trim(); if (!trimmed) { - return null; + return { regex: null, source: trimmed, flags, reason: "empty" }; } const cacheKey = `${flags}::${trimmed}`; if (safeRegexCache.has(cacheKey)) { - return safeRegexCache.get(cacheKey) ?? null; + return ( + safeRegexCache.get(cacheKey) ?? { + regex: null, + source: trimmed, + flags, + reason: "invalid-regex", + } + ); } - let compiled: RegExp | null = null; - if (!hasNestedRepetition(trimmed)) { + let result: SafeRegexCompileResult; + if (hasNestedRepetition(trimmed)) { + result = { regex: null, source: trimmed, flags, reason: "unsafe-nested-repetition" }; + } else { try { - compiled = new RegExp(trimmed, flags); + result = { regex: new RegExp(trimmed, flags), source: trimmed, flags, reason: null }; } catch { - compiled = null; + result = { regex: null, source: trimmed, flags, reason: "invalid-regex" }; } } - safeRegexCache.set(cacheKey, compiled); + safeRegexCache.set(cacheKey, result); if (safeRegexCache.size > SAFE_REGEX_CACHE_MAX) { const oldestKey = safeRegexCache.keys().next().value; if (oldestKey) { safeRegexCache.delete(oldestKey); } } - return compiled; + return result; +} + +export function compileSafeRegex(source: string, flags = ""): RegExp | null { + return compileSafeRegexDetailed(source, flags).regex; }