From 3ce48aff660a0dca487fb195132d53e6e0e404ed Mon Sep 17 00:00:00 2001 From: Tak Hoffman <781889+Takhoffman@users.noreply.github.com> Date: Sat, 28 Mar 2026 20:53:29 -0500 Subject: [PATCH] Memory: add configurable FTS5 tokenizer for CJK text support (openclaw#56707) Verified: - pnpm build - pnpm check - pnpm test -- extensions/memory-core/src/memory/manager-search.test.ts packages/memory-host-sdk/src/host/query-expansion.test.ts - pnpm test -- extensions/memory-core/src/memory/index.test.ts -t "reindexes when extraPaths change" - pnpm test -- src/config/schema.base.generated.test.ts - pnpm test -- src/media-understanding/image.test.ts - pnpm test Co-authored-by: Mitsuyuki Osabe <24588751+carrotRakko@users.noreply.github.com> --- CHANGELOG.md | 1 + .../src/memory/manager-search.test.ts | 88 +++++++++++++++++++ .../memory-core/src/memory/manager-search.ts | 81 +++++++++++++++-- .../src/memory/manager-sync-ops.ts | 10 ++- extensions/memory-core/src/memory/manager.ts | 5 +- .../memory-host-sdk/src/host/memory-schema.ts | 5 +- .../src/host/query-expansion.test.ts | 45 ++++++++++ .../src/host/query-expansion.ts | 46 +++++++--- src/agents/memory-search.ts | 7 ++ src/config/schema.base.generated.ts | 36 ++++++++ src/config/types.tools.ts | 4 + src/config/zod-schema.agent-runtime.ts | 6 ++ 12 files changed, 310 insertions(+), 24 deletions(-) create mode 100644 extensions/memory-core/src/memory/manager-search.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index ffb9fd32630..005e61875f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai - Memory/QMD: honor `memory.qmd.update.embedInterval` even when regular QMD update cadence is disabled or slower by arming a dedicated embed-cadence maintenance timer, while avoiding redundant timers when regular updates are already frequent enough. (#37326) Thanks @barronlroth. - Agents/memory flush: keep daily memory flush files append-only during embedded attempts so compaction writes do not overwrite earlier notes. (#53725) Thanks @HPluseven. - Web UI/markdown: stop bare auto-links from swallowing adjacent CJK text while preserving valid mixed-script path and query characters in rendered links. (#48410) Thanks @jnuyao. +- Memory/FTS: add configurable trigram tokenization plus short-CJK substring fallback so memory search can find Chinese, Japanese, and Korean text without breaking mixed long-and-short queries. Thanks @carrotRakko. ## 2026.3.28 diff --git a/extensions/memory-core/src/memory/manager-search.test.ts b/extensions/memory-core/src/memory/manager-search.test.ts new file mode 100644 index 00000000000..32eee675820 --- /dev/null +++ b/extensions/memory-core/src/memory/manager-search.test.ts @@ -0,0 +1,88 @@ +import { + ensureMemoryIndexSchema, + requireNodeSqlite, +} from "openclaw/plugin-sdk/memory-core-host-engine-storage"; +import { describe, expect, it } from "vitest"; +import { bm25RankToScore, buildFtsQuery } from "./hybrid.js"; +import { searchKeyword } from "./manager-search.js"; + +describe("searchKeyword trigram fallback", () => { + const { DatabaseSync } = requireNodeSqlite(); + + function createTrigramDb() { + const db = new DatabaseSync(":memory:"); + ensureMemoryIndexSchema({ + db, + embeddingCacheTable: "embedding_cache", + cacheEnabled: false, + ftsTable: "chunks_fts", + ftsEnabled: true, + ftsTokenizer: "trigram", + }); + return db; + } + + async function runSearch(params: { + rows: Array<{ id: string; path: string; text: string }>; + query: string; + }) { + const db = createTrigramDb(); + try { + const insert = db.prepare( + "INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)", + ); + for (const row of params.rows) { + insert.run(row.text, row.id, row.path, "memory", "mock-embed", 1, 1); + } + return await searchKeyword({ + db, + ftsTable: "chunks_fts", + providerModel: "mock-embed", + query: params.query, + ftsTokenizer: "trigram", + limit: 10, + snippetMaxChars: 200, + sourceFilter: { sql: "", params: [] }, + buildFtsQuery, + bm25RankToScore, + }); + } finally { + db.close(); + } + } + + it("finds short Chinese queries with substring fallback", async () => { + const results = await runSearch({ + rows: [{ id: "1", path: "memory/zh.md", text: "今天玩成语接龙游戏" }], + query: "成语", + }); + expect(results.map((row) => row.id)).toContain("1"); + expect(results[0]?.textScore).toBe(1); + }); + + it("finds short Japanese and Korean queries with substring fallback", async () => { + const japaneseResults = await runSearch({ + rows: [{ id: "jp", path: "memory/jp.md", text: "今日はしりとり大会" }], + query: "しり とり", + }); + expect(japaneseResults.map((row) => row.id)).toEqual(["jp"]); + + const koreanResults = await runSearch({ + rows: [{ id: "ko", path: "memory/ko.md", text: "오늘 끝말잇기 게임을 했다" }], + query: "끝말", + }); + expect(koreanResults.map((row) => row.id)).toEqual(["ko"]); + }); + + it("keeps MATCH semantics for long trigram terms while requiring short CJK substrings", async () => { + const results = await runSearch({ + rows: [ + { id: "match", path: "memory/good.md", text: "今天玩成语接龙游戏" }, + { id: "partial", path: "memory/partial.md", text: "今天玩成语接龙" }, + ], + query: "成语接龙 游戏", + }); + expect(results.map((row) => row.id)).toEqual(["match"]); + expect(results[0]?.textScore).toBeGreaterThan(0); + }); +}); diff --git a/extensions/memory-core/src/memory/manager-search.ts b/extensions/memory-core/src/memory/manager-search.ts index 14f981491e6..039118c5361 100644 --- a/extensions/memory-core/src/memory/manager-search.ts +++ b/extensions/memory-core/src/memory/manager-search.ts @@ -7,6 +7,8 @@ import { const vectorToBlob = (embedding: number[]): Buffer => Buffer.from(new Float32Array(embedding).buffer); +const FTS_QUERY_TOKEN_RE = /[\p{L}\p{N}_]+/gu; +const SHORT_CJK_TRIGRAM_RE = /[\u3040-\u30ff\u3400-\u9fff\uac00-\ud7af\u3131-\u3163]/u; export type SearchSource = string; @@ -20,6 +22,55 @@ export type SearchRowResult = { source: SearchSource; }; +function escapeLikePattern(term: string): string { + return term.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_"); +} + +function buildMatchQueryFromTerms(terms: string[]): string | null { + if (terms.length === 0) { + return null; + } + const quoted = terms.map((term) => `"${term.replaceAll('"', "")}"`); + return quoted.join(" AND "); +} + +function planKeywordSearch(params: { + query: string; + ftsTokenizer?: "unicode61" | "trigram"; + buildFtsQuery: (raw: string) => string | null; +}): { matchQuery: string | null; substringTerms: string[] } { + if (params.ftsTokenizer !== "trigram") { + return { + matchQuery: params.buildFtsQuery(params.query), + substringTerms: [], + }; + } + + const tokens = + params.query + .match(FTS_QUERY_TOKEN_RE) + ?.map((token) => token.trim()) + .filter(Boolean) ?? []; + if (tokens.length === 0) { + return { matchQuery: null, substringTerms: [] }; + } + + const matchTerms: string[] = []; + const substringTerms: string[] = []; + for (const token of tokens) { + if (SHORT_CJK_TRIGRAM_RE.test(token) && Array.from(token).length < 3) { + substringTerms.push(token); + continue; + } + matchTerms.push(token); + } + + return { + matchQuery: buildMatchQueryFromTerms(matchTerms), + substringTerms, + }; +} + export async function searchVector(params: { db: DatabaseSync; vectorTable: string; @@ -141,6 +192,7 @@ export async function searchKeyword(params: { ftsTable: string; providerModel: string | undefined; query: string; + ftsTokenizer?: "unicode61" | "trigram"; limit: number; snippetMaxChars: number; sourceFilter: { sql: string; params: SearchSource[] }; @@ -150,25 +202,42 @@ export async function searchKeyword(params: { if (params.limit <= 0) { return []; } - const ftsQuery = params.buildFtsQuery(params.query); - if (!ftsQuery) { + const plan = planKeywordSearch({ + query: params.query, + ftsTokenizer: params.ftsTokenizer, + buildFtsQuery: params.buildFtsQuery, + }); + if (!plan.matchQuery && plan.substringTerms.length === 0) { return []; } // When providerModel is undefined (FTS-only mode), search all models const modelClause = params.providerModel ? " AND model = ?" : ""; const modelParams = params.providerModel ? [params.providerModel] : []; + const substringClause = plan.substringTerms.map(() => " AND text LIKE ? ESCAPE '\\'").join(""); + const substringParams = plan.substringTerms.map((term) => `%${escapeLikePattern(term)}%`); + const whereClause = plan.matchQuery + ? `${params.ftsTable} MATCH ?${substringClause}${modelClause}${params.sourceFilter.sql}` + : `1=1${substringClause}${modelClause}${params.sourceFilter.sql}`; + const queryParams = [ + ...(plan.matchQuery ? [plan.matchQuery] : []), + ...substringParams, + ...modelParams, + ...params.sourceFilter.params, + params.limit, + ]; + const rankExpression = plan.matchQuery ? `bm25(${params.ftsTable})` : "0"; const rows = params.db .prepare( `SELECT id, path, source, start_line, end_line, text,\n` + - ` bm25(${params.ftsTable}) AS rank\n` + + ` ${rankExpression} AS rank\n` + ` FROM ${params.ftsTable}\n` + - ` WHERE ${params.ftsTable} MATCH ?${modelClause}${params.sourceFilter.sql}\n` + + ` WHERE ${whereClause}\n` + ` ORDER BY rank ASC\n` + ` LIMIT ?`, ) - .all(ftsQuery, ...modelParams, ...params.sourceFilter.params, params.limit) as Array<{ + .all(...queryParams) as Array<{ id: string; path: string; source: SearchSource; @@ -179,7 +248,7 @@ export async function searchKeyword(params: { }>; return rows.map((row) => { - const textScore = params.bm25RankToScore(row.rank); + const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1; return { id: row.id, path: row.path, diff --git a/extensions/memory-core/src/memory/manager-sync-ops.ts b/extensions/memory-core/src/memory/manager-sync-ops.ts index 55f9e822c85..a994072e5b1 100644 --- a/extensions/memory-core/src/memory/manager-sync-ops.ts +++ b/extensions/memory-core/src/memory/manager-sync-ops.ts @@ -56,6 +56,7 @@ type MemoryIndexMeta = { chunkTokens: number; chunkOverlap: number; vectorDims?: number; + ftsTokenizer?: string; }; type MemorySyncProgressState = { @@ -362,6 +363,7 @@ export abstract class MemoryManagerSyncOps { cacheEnabled: this.cache.enabled, ftsTable: FTS_TABLE, ftsEnabled: this.fts.enabled, + ftsTokenizer: this.settings.store.fts.tokenizer, }); this.fts.available = result.ftsAvailable; if (result.ftsError) { @@ -1028,7 +1030,8 @@ export abstract class MemoryManagerSyncOps { meta.scopeHash !== configuredScopeHash || meta.chunkTokens !== this.settings.chunking.tokens || meta.chunkOverlap !== this.settings.chunking.overlap || - (vectorReady && !meta?.vectorDims); + (vectorReady && !meta?.vectorDims) || + (meta.ftsTokenizer ?? "unicode61") !== this.settings.store.fts.tokenizer; try { if (needsFullReindex) { if ( @@ -1220,6 +1223,7 @@ export abstract class MemoryManagerSyncOps { scopeHash: this.resolveConfiguredScopeHash(), chunkTokens: this.settings.chunking.tokens, chunkOverlap: this.settings.chunking.overlap, + ftsTokenizer: this.settings.store.fts.tokenizer, }; if (!nextMeta) { throw new Error("Failed to compute memory index metadata for reindexing."); @@ -1292,6 +1296,7 @@ export abstract class MemoryManagerSyncOps { scopeHash: this.resolveConfiguredScopeHash(), chunkTokens: this.settings.chunking.tokens, chunkOverlap: this.settings.chunking.overlap, + ftsTokenizer: this.settings.store.fts.tokenizer, }; if (this.vector.available && this.vector.dims) { nextMeta.vectorDims = this.vector.dims; @@ -1306,9 +1311,10 @@ export abstract class MemoryManagerSyncOps { this.db.exec(`DELETE FROM chunks`); if (this.fts.enabled && this.fts.available) { try { - this.db.exec(`DELETE FROM ${FTS_TABLE}`); + this.db.exec(`DROP TABLE IF EXISTS ${FTS_TABLE}`); } catch {} } + this.ensureSchema(); this.dropVectorTable(); this.vector.dims = undefined; this.sessionsDirtyFiles.clear(); diff --git a/extensions/memory-core/src/memory/manager.ts b/extensions/memory-core/src/memory/manager.ts index 654a6869ec4..07b9c24d436 100644 --- a/extensions/memory-core/src/memory/manager.ts +++ b/extensions/memory-core/src/memory/manager.ts @@ -352,7 +352,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem // Extract keywords for better FTS matching on conversational queries // e.g., "that thing we discussed about the API" → ["discussed", "API"] - const keywords = extractKeywords(cleaned); + const keywords = extractKeywords(cleaned, { + ftsTokenizer: this.settings.store.fts.tokenizer, + }); const searchTerms = keywords.length > 0 ? keywords : [cleaned]; // Search with each keyword and merge results @@ -488,6 +490,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem ftsTable: FTS_TABLE, providerModel, query, + ftsTokenizer: this.settings.store.fts.tokenizer, limit, snippetMaxChars: SNIPPET_MAX_CHARS, sourceFilter, diff --git a/packages/memory-host-sdk/src/host/memory-schema.ts b/packages/memory-host-sdk/src/host/memory-schema.ts index cdc7f2e0451..582cddeee1c 100644 --- a/packages/memory-host-sdk/src/host/memory-schema.ts +++ b/packages/memory-host-sdk/src/host/memory-schema.ts @@ -6,6 +6,7 @@ export function ensureMemoryIndexSchema(params: { cacheEnabled: boolean; ftsTable: string; ftsEnabled: boolean; + ftsTokenizer?: "unicode61" | "trigram"; }): { ftsAvailable: boolean; ftsError?: string } { params.db.exec(` CREATE TABLE IF NOT EXISTS meta ( @@ -58,6 +59,8 @@ export function ensureMemoryIndexSchema(params: { let ftsError: string | undefined; if (params.ftsEnabled) { try { + const tokenizer = params.ftsTokenizer ?? "unicode61"; + const tokenizeClause = tokenizer === "trigram" ? `, tokenize='trigram case_sensitive 0'` : ""; params.db.exec( `CREATE VIRTUAL TABLE IF NOT EXISTS ${params.ftsTable} USING fts5(\n` + ` text,\n` + @@ -67,7 +70,7 @@ export function ensureMemoryIndexSchema(params: { ` model UNINDEXED,\n` + ` start_line UNINDEXED,\n` + ` end_line UNINDEXED\n` + - `);`, + `${tokenizeClause});`, ); ftsAvailable = true; } catch (err) { diff --git a/packages/memory-host-sdk/src/host/query-expansion.test.ts b/packages/memory-host-sdk/src/host/query-expansion.test.ts index ac535b438e8..f1e9bff520e 100644 --- a/packages/memory-host-sdk/src/host/query-expansion.test.ts +++ b/packages/memory-host-sdk/src/host/query-expansion.test.ts @@ -174,6 +174,51 @@ describe("extractKeywords", () => { const testCount = keywords.filter((k) => k === "test").length; expect(testCount).toBe(1); }); + + describe("with trigram tokenizer", () => { + const trigramOpts = { ftsTokenizer: "trigram" as const }; + + it("emits whole CJK block instead of unigrams in trigram mode", () => { + const defaultKeywords = extractKeywords("之前讨论的那个方案"); + const trigramKeywords = extractKeywords("之前讨论的那个方案", trigramOpts); + // Default mode produces bigrams + expect(defaultKeywords).toContain("讨论"); + expect(defaultKeywords).toContain("方案"); + // Trigram mode emits the whole contiguous CJK block (FTS5 trigram + // requires >= 3 chars per term; individual characters return no results) + expect(trigramKeywords).toContain("之前讨论的那个方案"); + expect(trigramKeywords).not.toContain("讨论"); + expect(trigramKeywords).not.toContain("方案"); + }); + + it("skips Japanese kanji bigrams in trigram mode", () => { + const defaultKeywords = extractKeywords("経済政策について"); + const trigramKeywords = extractKeywords("経済政策について", trigramOpts); + // Default mode adds kanji bigrams: 経済, 済政, 政策 + expect(defaultKeywords).toContain("経済"); + expect(defaultKeywords).toContain("済政"); + expect(defaultKeywords).toContain("政策"); + // Trigram mode keeps the full kanji block but skips bigram splitting + expect(trigramKeywords).toContain("経済政策"); + expect(trigramKeywords).not.toContain("済政"); + }); + + it("still filters stop words in trigram mode", () => { + const keywords = extractKeywords("これ それ そして どう", trigramOpts); + expect(keywords).not.toContain("これ"); + expect(keywords).not.toContain("それ"); + expect(keywords).not.toContain("そして"); + expect(keywords).not.toContain("どう"); + }); + + it("does not affect English keyword extraction", () => { + const keywords = extractKeywords("that thing we discussed about the API", trigramOpts); + expect(keywords).toContain("discussed"); + expect(keywords).toContain("api"); + expect(keywords).not.toContain("that"); + expect(keywords).not.toContain("the"); + }); + }); }); describe("expandQueryForFts", () => { diff --git a/packages/memory-host-sdk/src/host/query-expansion.ts b/packages/memory-host-sdk/src/host/query-expansion.ts index 0bbff2674de..5ce120f1453 100644 --- a/packages/memory-host-sdk/src/host/query-expansion.ts +++ b/packages/memory-host-sdk/src/host/query-expansion.ts @@ -670,7 +670,8 @@ function isValidKeyword(token: string): boolean { * For Chinese, we do character-based splitting since we don't have a proper segmenter. * For English, we split on whitespace and punctuation. */ -function tokenize(text: string): string[] { +function tokenize(text: string, opts?: { ftsTokenizer?: "unicode61" | "trigram" }): string[] { + const useTrigram = opts?.ftsTokenizer === "trigram"; const tokens: string[] = []; const normalized = text.toLowerCase().trim(); @@ -686,8 +687,10 @@ function tokenize(text: string): string[] { for (const part of jpParts) { if (/^[\u4e00-\u9fff]+$/.test(part)) { tokens.push(part); - for (let i = 0; i < part.length - 1; i++) { - tokens.push(part[i] + part[i + 1]); + if (!useTrigram) { + for (let i = 0; i < part.length - 1; i++) { + tokens.push(part[i] + part[i + 1]); + } } } else { tokens.push(part); @@ -695,13 +698,21 @@ function tokenize(text: string): string[] { } } else if (/[\u4e00-\u9fff]/.test(segment)) { // Check if segment contains CJK characters (Chinese) - // For Chinese, extract character n-grams (unigrams and bigrams) const chars = Array.from(segment).filter((c) => /[\u4e00-\u9fff]/.test(c)); - // Add individual characters - tokens.push(...chars); - // Add bigrams for better phrase matching - for (let i = 0; i < chars.length - 1; i++) { - tokens.push(chars[i] + chars[i + 1]); + if (useTrigram) { + // In trigram mode, push the whole contiguous CJK block (mirroring the + // Japanese kanji path). SQLite's trigram FTS requires at least 3 characters + // per query term — individual characters silently return no results. + const block = chars.join(""); + if (block.length > 0) { + tokens.push(block); + } + } else { + // Default mode: unigrams + bigrams for phrase matching + tokens.push(...chars); + for (let i = 0; i < chars.length - 1; i++) { + tokens.push(chars[i] + chars[i + 1]); + } } } else if (/[\uac00-\ud7af\u3131-\u3163]/.test(segment)) { // For Korean (Hangul syllables and jamo), keep the word as-is unless it is @@ -732,8 +743,11 @@ function tokenize(text: string): string[] { * - "之前讨论的那个方案" → ["讨论", "方案"] * - "what was the solution for the bug" → ["solution", "bug"] */ -export function extractKeywords(query: string): string[] { - const tokens = tokenize(query); +export function extractKeywords( + query: string, + opts?: { ftsTokenizer?: "unicode61" | "trigram" }, +): string[] { + const tokens = tokenize(query, opts); const keywords: string[] = []; const seen = new Set(); @@ -764,13 +778,16 @@ export function extractKeywords(query: string): string[] { * @param query - User's original query * @returns Object with original query and extracted keywords */ -export function expandQueryForFts(query: string): { +export function expandQueryForFts( + query: string, + opts?: { ftsTokenizer?: "unicode61" | "trigram" }, +): { original: string; keywords: string[]; expanded: string; } { const original = query.trim(); - const keywords = extractKeywords(original); + const keywords = extractKeywords(original, opts); // Build expanded query: original terms OR extracted keywords // This ensures both exact matches and keyword matches are found @@ -792,6 +809,7 @@ export type LlmQueryExpander = (query: string) => Promise; export async function expandQueryWithLlm( query: string, llmExpander?: LlmQueryExpander, + opts?: { ftsTokenizer?: "unicode61" | "trigram" }, ): Promise { // If LLM expander is provided, try it first if (llmExpander) { @@ -806,5 +824,5 @@ export async function expandQueryWithLlm( } // Fall back to local keyword extraction - return extractKeywords(query); + return extractKeywords(query, opts); } diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts index 747945c5d36..bf32ad33a53 100644 --- a/src/agents/memory-search.ts +++ b/src/agents/memory-search.ts @@ -43,6 +43,9 @@ export type ResolvedMemorySearchConfig = { store: { driver: "sqlite"; path: string; + fts: { + tokenizer: "unicode61" | "trigram"; + }; vector: { enabled: boolean; extensionPath?: string; @@ -206,9 +209,13 @@ function mergeConfig( extensionPath: overrides?.store?.vector?.extensionPath ?? defaults?.store?.vector?.extensionPath, }; + const fts = { + tokenizer: overrides?.store?.fts?.tokenizer ?? defaults?.store?.fts?.tokenizer ?? "unicode61", + }; const store = { driver: overrides?.store?.driver ?? defaults?.store?.driver ?? "sqlite", path: resolveStorePath(agentId, overrides?.store?.path ?? defaults?.store?.path), + fts, vector, }; const chunking = { diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 2ef625aa34e..c230f5ca683 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -2033,6 +2033,24 @@ export const GENERATED_BASE_CONFIG_SCHEMA = { path: { type: "string", }, + fts: { + type: "object", + properties: { + tokenizer: { + anyOf: [ + { + type: "string", + const: "unicode61", + }, + { + type: "string", + const: "trigram", + }, + ], + }, + }, + additionalProperties: false, + }, vector: { type: "object", properties: { @@ -3596,6 +3614,24 @@ export const GENERATED_BASE_CONFIG_SCHEMA = { path: { type: "string", }, + fts: { + type: "object", + properties: { + tokenizer: { + anyOf: [ + { + type: "string", + const: "unicode61", + }, + { + type: "string", + const: "trigram", + }, + ], + }, + }, + additionalProperties: false, + }, vector: { type: "object", properties: { diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index 3325ebd59af..2bb63000ec4 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -379,6 +379,10 @@ export type MemorySearchConfig = { store?: { driver?: "sqlite"; path?: string; + fts?: { + /** FTS5 tokenizer (default: "unicode61"). Use "trigram" for CJK text support. */ + tokenizer?: "unicode61" | "trigram"; + }; vector?: { /** Enable sqlite-vec extension for vector search (default: true). */ enabled?: boolean; diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index fa231127705..13a4e78cb7b 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -653,6 +653,12 @@ export const MemorySearchSchema = z .object({ driver: z.literal("sqlite").optional(), path: z.string().optional(), + fts: z + .object({ + tokenizer: z.union([z.literal("unicode61"), z.literal("trigram")]).optional(), + }) + .strict() + .optional(), vector: z .object({ enabled: z.boolean().optional(),