fix: harden mention pattern regex compilation

This commit is contained in:
Peter Steinberger 2026-03-15 08:44:02 -07:00
parent e4c61723cd
commit ff61343d76
No known key found for this signature in database
17 changed files with 265 additions and 65 deletions

View File

@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Group mention gating: reject invalid and unsafe nested-repetition `mentionPatterns`, reuse the shared safe config-regex compiler across mention stripping and detection, and cache strip-time regex compilation so noisy groups avoid repeated recompiles.
- Control UI/chat sessions: show human-readable labels in the grouped session dropdown again, keep unique scoped fallbacks when metadata is missing, and disambiguate duplicate labels only when needed. (#45130) thanks @luzhidong.
- Slack/interactive replies: preserve `channelData.slack.blocks` through live DM delivery and preview-finalized edits so Block Kit button and select directives render instead of falling back to raw text. Thanks @vincentkoc.
- Feishu/topic threads: fetch full thread context, including prior bot replies, when starting a topic-thread session so follow-up turns in Feishu topics keep the right conversation state. Thanks @Coobiw.

View File

@ -13,7 +13,7 @@ Note: `agents.list[].groupChat.mentionPatterns` is now used by Telegram/Discord/
## Whats implemented (2025-12-03)
- Activation modes: `mention` (default) or `always`. `mention` requires a ping (real WhatsApp @-mentions via `mentionedJids`, regex patterns, or the bots E.164 anywhere in the text). `always` wakes the agent on every message but it should reply only when it can add meaningful value; otherwise it returns the silent token `NO_REPLY`. Defaults can be set in config (`channels.whatsapp.groups`) and overridden per group via `/activation`. When `channels.whatsapp.groups` is set, it also acts as a group allowlist (include `"*"` to allow all).
- Activation modes: `mention` (default) or `always`. `mention` requires a ping (real WhatsApp @-mentions via `mentionedJids`, safe regex patterns, or the bots E.164 anywhere in the text). `always` wakes the agent on every message but it should reply only when it can add meaningful value; otherwise it returns the silent token `NO_REPLY`. Defaults can be set in config (`channels.whatsapp.groups`) and overridden per group via `/activation`. When `channels.whatsapp.groups` is set, it also acts as a group allowlist (include `"*"` to allow all).
- Group policy: `channels.whatsapp.groupPolicy` controls whether group messages are accepted (`open|disabled|allowlist`). `allowlist` uses `channels.whatsapp.groupAllowFrom` (fallback: explicit `channels.whatsapp.allowFrom`). Default is `allowlist` (blocked until you add senders).
- Per-group sessions: session keys look like `agent:<agentId>:whatsapp:group:<jid>` so commands such as `/verbose on` or `/think high` (sent as standalone messages) are scoped to that group; personal DM state is untouched. Heartbeats are skipped for group threads.
- Context injection: **pending-only** group messages (default 50) that _did not_ trigger a run are prefixed under `[Chat messages since your last reply - for context]`, with the triggering line under `[Current message - respond to this]`. Messages already in the session are not re-injected.
@ -50,7 +50,7 @@ Add a `groupChat` block to `~/.openclaw/openclaw.json` so display-name pings wor
Notes:
- The regexes are case-insensitive; they cover a display-name ping like `@openclaw` and the raw number with or without `+`/spaces.
- The regexes are case-insensitive and use the same safe-regex guardrails as other config regex surfaces; invalid patterns and unsafe nested repetition are ignored.
- WhatsApp still sends canonical mentions via `mentionedJids` when someone taps the contact, so the number fallback is rarely needed but is a useful safety net.
### Activation command (owner-only)

View File

@ -243,7 +243,7 @@ Replying to a bot message counts as an implicit mention (when the channel suppor
Notes:
- `mentionPatterns` are case-insensitive regexes.
- `mentionPatterns` are case-insensitive safe regex patterns; invalid patterns and unsafe nested-repetition forms are ignored.
- Surfaces that provide explicit mentions still pass; patterns are a fallback.
- Per-agent override: `agents.list[].groupChat.mentionPatterns` (useful when multiple agents share a group).
- Mention gating is only enforced when mention detection is possible (native mentions or `mentionPatterns` are configured).

View File

@ -655,12 +655,12 @@ See the full channel index: [Channels](/channels).
### Group chat mention gating
Group messages default to **require mention** (metadata mention or regex patterns). Applies to WhatsApp, Telegram, Discord, Google Chat, and iMessage group chats.
Group messages default to **require mention** (metadata mention or safe regex patterns). Applies to WhatsApp, Telegram, Discord, Google Chat, and iMessage group chats.
**Mention types:**
- **Metadata mentions**: Native platform @-mentions. Ignored in WhatsApp self-chat mode.
- **Text patterns**: Regex patterns in `agents.list[].groupChat.mentionPatterns`. Always checked.
- **Text patterns**: Safe regex patterns in `agents.list[].groupChat.mentionPatterns`. Invalid patterns and unsafe nested repetition are ignored.
- Mention gating is enforced only when detection is possible (native mentions or at least one pattern).
```json5

View File

@ -170,7 +170,7 @@ When validation fails:
```
- **Metadata mentions**: native @-mentions (WhatsApp tap-to-mention, Telegram @bot, etc.)
- **Text patterns**: regex patterns in `mentionPatterns`
- **Text patterns**: safe regex patterns in `mentionPatterns`
- See [full reference](/gateway/configuration-reference#group-chat-mention-gating) for per-channel overrides and self-chat mode.
</Accordion>

View File

@ -17,6 +17,7 @@ import {
buildMentionRegexes,
matchesMentionPatterns,
normalizeMentionText,
stripMentions,
} from "./reply/mentions.js";
import { initSessionState } from "./reply/session.js";
import { applyTemplate, type MsgContext, type TemplateContext } from "./templating.js";
@ -394,10 +395,10 @@ describe("initSessionState BodyStripped", () => {
});
describe("mention helpers", () => {
it("builds regexes and skips invalid patterns", () => {
it("builds regexes and skips invalid or unsafe patterns", () => {
const regexes = buildMentionRegexes({
messages: {
groupChat: { mentionPatterns: ["\\bopenclaw\\b", "(invalid"] },
groupChat: { mentionPatterns: ["\\bopenclaw\\b", "(invalid", "(a+)+$"] },
},
});
expect(regexes).toHaveLength(1);
@ -435,6 +436,20 @@ describe("mention helpers", () => {
expect(matchesMentionPatterns("workbot: hi", regexes)).toBe(true);
expect(matchesMentionPatterns("global: hi", regexes)).toBe(false);
});
it("strips safe mention patterns and ignores unsafe ones", () => {
const stripped = stripMentions("openclaw " + "a".repeat(28) + "!", {} as MsgContext, {
messages: {
groupChat: { mentionPatterns: ["\\bopenclaw\\b", "(a+)+$"] },
},
});
expect(stripped).toBe(`${"a".repeat(28)}!`);
});
it("strips provider mention regexes without config compilation", () => {
const stripped = stripMentions("<@12345> hello", { Provider: "discord" } as MsgContext, {});
expect(stripped).toBe("hello");
});
});
describe("resolveGroupRequireMention", () => {

View File

@ -2,6 +2,8 @@ import { resolveAgentConfig } from "../../agents/agent-scope.js";
import { getChannelDock } from "../../channels/dock.js";
import { normalizeChannelId } from "../../channels/plugins/index.js";
import type { OpenClawConfig } from "../../config/config.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { compileConfigRegexes, type ConfigRegexRejectReason } from "../../security/config-regex.js";
import { escapeRegExp } from "../../utils.js";
import type { MsgContext } from "../templating.js";
@ -21,8 +23,12 @@ function deriveMentionPatterns(identity?: { name?: string; emoji?: string }) {
}
const BACKSPACE_CHAR = "\u0008";
const mentionRegexCompileCache = new Map<string, RegExp[]>();
const mentionMatchRegexCompileCache = new Map<string, RegExp[]>();
const mentionStripRegexCompileCache = new Map<string, RegExp[]>();
const MAX_MENTION_REGEX_COMPILE_CACHE_KEYS = 512;
const mentionPatternWarningCache = new Set<string>();
const MAX_MENTION_PATTERN_WARNING_KEYS = 512;
const log = createSubsystemLogger("mentions");
export const CURRENT_MESSAGE_MARKER = "[Current message - respond to this]";
@ -37,6 +43,64 @@ function normalizeMentionPatterns(patterns: string[]): string[] {
return patterns.map(normalizeMentionPattern);
}
function warnRejectedMentionPattern(
pattern: string,
flags: string,
reason: ConfigRegexRejectReason,
) {
const key = `${flags}::${reason}::${pattern}`;
if (mentionPatternWarningCache.has(key)) {
return;
}
mentionPatternWarningCache.add(key);
if (mentionPatternWarningCache.size > MAX_MENTION_PATTERN_WARNING_KEYS) {
mentionPatternWarningCache.clear();
mentionPatternWarningCache.add(key);
}
log.warn("Ignoring unsupported group mention pattern", {
pattern,
flags,
reason,
});
}
function cacheMentionRegexes(
cache: Map<string, RegExp[]>,
cacheKey: string,
regexes: RegExp[],
): RegExp[] {
cache.set(cacheKey, regexes);
if (cache.size > MAX_MENTION_REGEX_COMPILE_CACHE_KEYS) {
cache.clear();
cache.set(cacheKey, regexes);
}
return [...regexes];
}
function compileMentionPatternsCached(params: {
patterns: string[];
flags: string;
cache: Map<string, RegExp[]>;
warnRejected: boolean;
}): RegExp[] {
if (params.patterns.length === 0) {
return [];
}
const cacheKey = `${params.flags}\u001e${params.patterns.join("\u001f")}`;
const cached = params.cache.get(cacheKey);
if (cached) {
return [...cached];
}
const compiled = compileConfigRegexes(params.patterns, params.flags);
if (params.warnRejected) {
for (const rejected of compiled.rejected) {
warnRejectedMentionPattern(rejected.pattern, rejected.flags, rejected.reason);
}
}
return cacheMentionRegexes(params.cache, cacheKey, compiled.regexes);
}
function resolveMentionPatterns(cfg: OpenClawConfig | undefined, agentId?: string): string[] {
if (!cfg) {
return [];
@ -56,29 +120,12 @@ function resolveMentionPatterns(cfg: OpenClawConfig | undefined, agentId?: strin
export function buildMentionRegexes(cfg: OpenClawConfig | undefined, agentId?: string): RegExp[] {
const patterns = normalizeMentionPatterns(resolveMentionPatterns(cfg, agentId));
if (patterns.length === 0) {
return [];
}
const cacheKey = patterns.join("\u001f");
const cached = mentionRegexCompileCache.get(cacheKey);
if (cached) {
return [...cached];
}
const compiled = patterns
.map((pattern) => {
try {
return new RegExp(pattern, "i");
} catch {
return null;
}
})
.filter((value): value is RegExp => Boolean(value));
mentionRegexCompileCache.set(cacheKey, compiled);
if (mentionRegexCompileCache.size > MAX_MENTION_REGEX_COMPILE_CACHE_KEYS) {
mentionRegexCompileCache.clear();
mentionRegexCompileCache.set(cacheKey, compiled);
}
return [...compiled];
return compileMentionPatternsCached({
patterns,
flags: "i",
cache: mentionMatchRegexCompileCache,
warnRejected: true,
});
}
export function normalizeMentionText(text: string): string {
@ -153,17 +200,24 @@ export function stripMentions(
let result = text;
const providerId = ctx.Provider ? normalizeChannelId(ctx.Provider) : null;
const providerMentions = providerId ? getChannelDock(providerId)?.mentions : undefined;
const patterns = normalizeMentionPatterns([
...resolveMentionPatterns(cfg, agentId),
...(providerMentions?.stripPatterns?.({ ctx, cfg, agentId }) ?? []),
]);
for (const p of patterns) {
try {
const re = new RegExp(p, "gi");
result = result.replace(re, " ");
} catch {
// ignore invalid regex
}
const configRegexes = compileMentionPatternsCached({
patterns: normalizeMentionPatterns(resolveMentionPatterns(cfg, agentId)),
flags: "gi",
cache: mentionStripRegexCompileCache,
warnRejected: true,
});
const providerRegexes =
providerMentions?.stripRegexes?.({ ctx, cfg, agentId }) ??
compileMentionPatternsCached({
patterns: normalizeMentionPatterns(
providerMentions?.stripPatterns?.({ ctx, cfg, agentId }) ?? [],
),
flags: "gi",
cache: mentionStripRegexCompileCache,
warnRejected: false,
});
for (const re of [...configRegexes, ...providerRegexes]) {
result = result.replace(re, " ");
}
if (providerMentions?.stripMentions) {
result = providerMentions.stripMentions({

View File

@ -58,7 +58,7 @@ import type {
} from "./plugins/types.js";
import {
resolveWhatsAppGroupIntroHint,
resolveWhatsAppMentionStripPatterns,
resolveWhatsAppMentionStripRegexes,
} from "./plugins/whatsapp-shared.js";
import { CHAT_CHANNEL_ORDER, type ChatChannelId, getChatChannelMeta } from "./registry.js";
@ -303,7 +303,7 @@ const DOCKS: Record<ChatChannelId, ChannelDock> = {
resolveGroupIntroHint: resolveWhatsAppGroupIntroHint,
},
mentions: {
stripPatterns: ({ ctx }) => resolveWhatsAppMentionStripPatterns(ctx),
stripRegexes: ({ ctx }) => resolveWhatsAppMentionStripRegexes(ctx),
},
threading: {
buildToolContext: ({ context, hasRepliedRef }) => {
@ -346,7 +346,7 @@ const DOCKS: Record<ChatChannelId, ChannelDock> = {
resolveToolPolicy: resolveDiscordGroupToolPolicy,
},
mentions: {
stripPatterns: () => ["<@!?\\d+>"],
stripRegexes: () => [/<@!?\d+>/g],
},
threading: {
resolveReplyToMode: ({ cfg }) => cfg.channels?.discord?.replyToMode ?? "off",
@ -484,7 +484,7 @@ const DOCKS: Record<ChatChannelId, ChannelDock> = {
resolveToolPolicy: resolveSlackGroupToolPolicy,
},
mentions: {
stripPatterns: () => ["<@[^>]+>"],
stripRegexes: () => [/<@[^>]+>/g],
},
threading: {
resolveReplyToMode: ({ cfg, accountId, chatType }) =>

View File

@ -209,6 +209,11 @@ export type ChannelSecurityContext<ResolvedAccount = unknown> = {
};
export type ChannelMentionAdapter = {
stripRegexes?: (params: {
ctx: MsgContext;
cfg: OpenClawConfig | undefined;
agentId?: string;
}) => RegExp[];
stripPatterns?: (params: {
ctx: MsgContext;
cfg: OpenClawConfig | undefined;

View File

@ -20,6 +20,10 @@ export function resolveWhatsAppMentionStripPatterns(ctx: { To?: string | null })
return [escaped, `@${escaped}`];
}
export function resolveWhatsAppMentionStripRegexes(ctx: { To?: string | null }): RegExp[] {
return resolveWhatsAppMentionStripPatterns(ctx).map((pattern) => new RegExp(pattern, "g"));
}
type WhatsAppChunker = NonNullable<ChannelOutboundAdapter["chunker"]>;
type WhatsAppSendMessage = PluginRuntimeChannel["whatsapp"]["sendMessageWhatsApp"];
type WhatsAppSendPoll = PluginRuntimeChannel["whatsapp"]["sendPollWhatsApp"];

View File

@ -1346,7 +1346,7 @@ export const FIELD_HELP: Record<string, string> = {
"messages.groupChat":
"Group-message handling controls including mention triggers and history window sizing. Keep mention patterns narrow so group channels do not trigger on every message.",
"messages.groupChat.mentionPatterns":
"Regex-like patterns used to detect explicit mentions/trigger phrases in group chats. Use precise patterns to reduce false positives in high-volume channels.",
"Safe case-insensitive regex patterns used to detect explicit mentions/trigger phrases in group chats. Use precise patterns to reduce false positives in high-volume channels; invalid or unsafe nested-repetition patterns are ignored.",
"messages.groupChat.historyLimit":
"Maximum number of prior group messages loaded as context per turn for group sessions. Use higher values for richer continuity, or lower values for faster and cheaper responses.",
"messages.queue":

View File

@ -9,7 +9,8 @@ import type {
} from "../config/types.approvals.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import { normalizeAccountId, parseAgentSessionKey } from "../routing/session-key.js";
import { compileSafeRegex, testRegexWithBoundedInput } from "../security/safe-regex.js";
import { compileConfigRegex } from "../security/config-regex.js";
import { testRegexWithBoundedInput } from "../security/safe-regex.js";
import {
isDeliverableMessageChannel,
normalizeMessageChannel,
@ -63,8 +64,8 @@ function matchSessionFilter(sessionKey: string, patterns: string[]): boolean {
if (sessionKey.includes(pattern)) {
return true;
}
const regex = compileSafeRegex(pattern);
return regex ? testRegexWithBoundedInput(regex, sessionKey) : false;
const compiled = compileConfigRegex(pattern);
return compiled?.regex ? testRegexWithBoundedInput(compiled.regex, sessionKey) : false;
});
}

View File

@ -1,5 +1,5 @@
import type { OpenClawConfig } from "../config/config.js";
import { compileSafeRegex } from "../security/safe-regex.js";
import { compileConfigRegex } from "../security/config-regex.js";
import { resolveNodeRequireFromMeta } from "./node-require.js";
import { replacePatternBounded } from "./redact-bounded.js";
@ -55,9 +55,9 @@ function parsePattern(raw: string): RegExp | null {
const match = raw.match(/^\/(.+)\/([gimsuy]*)$/);
if (match) {
const flags = match[2].includes("g") ? match[2] : `${match[2]}g`;
return compileSafeRegex(match[1], flags);
return compileConfigRegex(match[1], flags)?.regex ?? null;
}
return compileSafeRegex(raw, "gi");
return compileConfigRegex(raw, "gi")?.regex ?? null;
}
function resolvePatterns(value?: string[]): RegExp[] {

View File

@ -38,6 +38,7 @@ export {
export {
createWhatsAppOutboundBase,
resolveWhatsAppGroupIntroHint,
resolveWhatsAppMentionStripRegexes,
resolveWhatsAppMentionStripPatterns,
} from "../channels/plugins/whatsapp-shared.js";
export { resolveWhatsAppHeartbeatRecipients } from "../channels/plugins/whatsapp-heartbeat.js";

View File

@ -0,0 +1,78 @@
import {
compileSafeRegexDetailed,
type SafeRegexCompileResult,
type SafeRegexRejectReason,
} from "./safe-regex.js";
export type ConfigRegexRejectReason = Exclude<SafeRegexRejectReason, "empty">;
export type CompiledConfigRegex =
| {
regex: RegExp;
pattern: string;
flags: string;
reason: null;
}
| {
regex: null;
pattern: string;
flags: string;
reason: ConfigRegexRejectReason;
};
function normalizeRejectReason(result: SafeRegexCompileResult): ConfigRegexRejectReason | null {
if (result.reason === null || result.reason === "empty") {
return null;
}
return result.reason;
}
export function compileConfigRegex(pattern: string, flags = ""): CompiledConfigRegex | null {
const result = compileSafeRegexDetailed(pattern, flags);
if (result.reason === "empty") {
return null;
}
return {
regex: result.regex,
pattern: result.source,
flags: result.flags,
reason: normalizeRejectReason(result),
} as CompiledConfigRegex;
}
export function compileConfigRegexes(
patterns: string[],
flags = "",
): {
regexes: RegExp[];
rejected: Array<{
pattern: string;
flags: string;
reason: ConfigRegexRejectReason;
}>;
} {
const regexes: RegExp[] = [];
const rejected: Array<{
pattern: string;
flags: string;
reason: ConfigRegexRejectReason;
}> = [];
for (const pattern of patterns) {
const compiled = compileConfigRegex(pattern, flags);
if (!compiled) {
continue;
}
if (compiled.regex) {
regexes.push(compiled.regex);
continue;
}
rejected.push({
pattern: compiled.pattern,
flags: compiled.flags,
reason: compiled.reason,
});
}
return { regexes, rejected };
}

View File

@ -1,5 +1,10 @@
import { describe, expect, it } from "vitest";
import { compileSafeRegex, hasNestedRepetition, testRegexWithBoundedInput } from "./safe-regex.js";
import {
compileSafeRegex,
compileSafeRegexDetailed,
hasNestedRepetition,
testRegexWithBoundedInput,
} from "./safe-regex.js";
describe("safe regex", () => {
it("flags nested repetition patterns", () => {
@ -28,6 +33,13 @@ describe("safe regex", () => {
expect("TOKEN=abcd1234".replace(re as RegExp, "***")).toBe("***");
});
it("returns structured reject reasons", () => {
expect(compileSafeRegexDetailed(" ").reason).toBe("empty");
expect(compileSafeRegexDetailed("(a+)+$").reason).toBe("unsafe-nested-repetition");
expect(compileSafeRegexDetailed("(invalid").reason).toBe("invalid-regex");
expect(compileSafeRegexDetailed("^agent:main$").reason).toBeNull();
});
it("checks bounded regex windows for long inputs", () => {
expect(
testRegexWithBoundedInput(/^agent:main:discord:/, `agent:main:discord:${"x".repeat(5000)}`),

View File

@ -30,7 +30,23 @@ type PatternToken =
const SAFE_REGEX_CACHE_MAX = 256;
const SAFE_REGEX_TEST_WINDOW = 2048;
const safeRegexCache = new Map<string, RegExp | null>();
export type SafeRegexRejectReason = "empty" | "unsafe-nested-repetition" | "invalid-regex";
export type SafeRegexCompileResult =
| {
regex: RegExp;
source: string;
flags: string;
reason: null;
}
| {
regex: null;
source: string;
flags: string;
reason: SafeRegexRejectReason;
};
const safeRegexCache = new Map<string, SafeRegexCompileResult>();
function createParseFrame(): ParseFrame {
return {
@ -302,31 +318,44 @@ export function hasNestedRepetition(source: string): boolean {
return analyzeTokensForNestedRepetition(tokenizePattern(source));
}
export function compileSafeRegex(source: string, flags = ""): RegExp | null {
export function compileSafeRegexDetailed(source: string, flags = ""): SafeRegexCompileResult {
const trimmed = source.trim();
if (!trimmed) {
return null;
return { regex: null, source: trimmed, flags, reason: "empty" };
}
const cacheKey = `${flags}::${trimmed}`;
if (safeRegexCache.has(cacheKey)) {
return safeRegexCache.get(cacheKey) ?? null;
return (
safeRegexCache.get(cacheKey) ?? {
regex: null,
source: trimmed,
flags,
reason: "invalid-regex",
}
);
}
let compiled: RegExp | null = null;
if (!hasNestedRepetition(trimmed)) {
let result: SafeRegexCompileResult;
if (hasNestedRepetition(trimmed)) {
result = { regex: null, source: trimmed, flags, reason: "unsafe-nested-repetition" };
} else {
try {
compiled = new RegExp(trimmed, flags);
result = { regex: new RegExp(trimmed, flags), source: trimmed, flags, reason: null };
} catch {
compiled = null;
result = { regex: null, source: trimmed, flags, reason: "invalid-regex" };
}
}
safeRegexCache.set(cacheKey, compiled);
safeRegexCache.set(cacheKey, result);
if (safeRegexCache.size > SAFE_REGEX_CACHE_MAX) {
const oldestKey = safeRegexCache.keys().next().value;
if (oldestKey) {
safeRegexCache.delete(oldestKey);
}
}
return compiled;
return result;
}
export function compileSafeRegex(source: string, flags = ""): RegExp | null {
return compileSafeRegexDetailed(source, flags).regex;
}