From 3ce48aff660a0dca487fb195132d53e6e0e404ed Mon Sep 17 00:00:00 2001
From: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
Date: Sat, 28 Mar 2026 20:53:29 -0500
Subject: [PATCH] Memory: add configurable FTS5 tokenizer for CJK text support
 (openclaw#56707)

Verified:
- pnpm build
- pnpm check
- pnpm test -- extensions/memory-core/src/memory/manager-search.test.ts packages/memory-host-sdk/src/host/query-expansion.test.ts
- pnpm test -- extensions/memory-core/src/memory/index.test.ts -t "reindexes when extraPaths change"
- pnpm test -- src/config/schema.base.generated.test.ts
- pnpm test -- src/media-understanding/image.test.ts
- pnpm test

Co-authored-by: Mitsuyuki Osabe <24588751+carrotRakko@users.noreply.github.com>
---
 CHANGELOG.md                                  |  1 +
 .../src/memory/manager-search.test.ts         | 88 +++++++++++++++++++
 .../memory-core/src/memory/manager-search.ts  | 81 +++++++++++++++--
 .../src/memory/manager-sync-ops.ts            | 10 ++-
 extensions/memory-core/src/memory/manager.ts  |  5 +-
 .../memory-host-sdk/src/host/memory-schema.ts |  5 +-
 .../src/host/query-expansion.test.ts          | 45 ++++++++++
 .../src/host/query-expansion.ts               | 46 +++++++---
 src/agents/memory-search.ts                   |  7 ++
 src/config/schema.base.generated.ts           | 36 ++++++++
 src/config/types.tools.ts                     |  4 +
 src/config/zod-schema.agent-runtime.ts        |  6 ++
 12 files changed, 310 insertions(+), 24 deletions(-)
 create mode 100644 extensions/memory-core/src/memory/manager-search.test.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ffb9fd32630..005e61875f3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
 - Memory/QMD: honor `memory.qmd.update.embedInterval` even when regular QMD update cadence is disabled or slower by arming a dedicated embed-cadence maintenance timer, while avoiding redundant timers when regular updates are already frequent enough. (#37326) Thanks @barronlroth.
 - Agents/memory flush: keep daily memory flush files append-only during embedded attempts so compaction writes do not overwrite earlier notes. (#53725) Thanks @HPluseven.
 - Web UI/markdown: stop bare auto-links from swallowing adjacent CJK text while preserving valid mixed-script path and query characters in rendered links. (#48410) Thanks @jnuyao.
+- Memory/FTS: add configurable trigram tokenization plus short-CJK substring fallback so memory search can find Chinese, Japanese, and Korean text without breaking mixed long-and-short queries. Thanks @carrotRakko.
 
 ## 2026.3.28
 
diff --git a/extensions/memory-core/src/memory/manager-search.test.ts b/extensions/memory-core/src/memory/manager-search.test.ts
new file mode 100644
index 00000000000..32eee675820
--- /dev/null
+++ b/extensions/memory-core/src/memory/manager-search.test.ts
@@ -0,0 +1,88 @@
+import {
+  ensureMemoryIndexSchema,
+  requireNodeSqlite,
+} from "openclaw/plugin-sdk/memory-core-host-engine-storage";
+import { describe, expect, it } from "vitest";
+import { bm25RankToScore, buildFtsQuery } from "./hybrid.js";
+import { searchKeyword } from "./manager-search.js";
+
+describe("searchKeyword trigram fallback", () => {
+  const { DatabaseSync } = requireNodeSqlite();
+
+  function createTrigramDb() {
+    const db = new DatabaseSync(":memory:");
+    ensureMemoryIndexSchema({
+      db,
+      embeddingCacheTable: "embedding_cache",
+      cacheEnabled: false,
+      ftsTable: "chunks_fts",
+      ftsEnabled: true,
+      ftsTokenizer: "trigram",
+    });
+    return db;
+  }
+
+  async function runSearch(params: {
+    rows: Array<{ id: string; path: string; text: string }>;
+    query: string;
+  }) {
+    const db = createTrigramDb();
+    try {
+      const insert = db.prepare(
+        "INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
+      );
+      for (const row of params.rows) {
+        insert.run(row.text, row.id, row.path, "memory", "mock-embed", 1, 1);
+      }
+      return await searchKeyword({
+        db,
+        ftsTable: "chunks_fts",
+        providerModel: "mock-embed",
+        query: params.query,
+        ftsTokenizer: "trigram",
+        limit: 10,
+        snippetMaxChars: 200,
+        sourceFilter: { sql: "", params: [] },
+        buildFtsQuery,
+        bm25RankToScore,
+      });
+    } finally {
+      db.close();
+    }
+  }
+
+  it("finds short Chinese queries with substring fallback", async () => {
+    const results = await runSearch({
+      rows: [{ id: "1", path: "memory/zh.md", text: "今天玩成语接龙游戏" }],
+      query: "成语",
+    });
+    expect(results.map((row) => row.id)).toContain("1");
+    expect(results[0]?.textScore).toBe(1);
+  });
+
+  it("finds short Japanese and Korean queries with substring fallback", async () => {
+    const japaneseResults = await runSearch({
+      rows: [{ id: "jp", path: "memory/jp.md", text: "今日はしりとり大会" }],
+      query: "しり とり",
+    });
+    expect(japaneseResults.map((row) => row.id)).toEqual(["jp"]);
+
+    const koreanResults = await runSearch({
+      rows: [{ id: "ko", path: "memory/ko.md", text: "오늘 끝말잇기 게임을 했다" }],
+      query: "끝말",
+    });
+    expect(koreanResults.map((row) => row.id)).toEqual(["ko"]);
+  });
+
+  it("keeps MATCH semantics for long trigram terms while requiring short CJK substrings", async () => {
+    const results = await runSearch({
+      rows: [
+        { id: "match", path: "memory/good.md", text: "今天玩成语接龙游戏" },
+        { id: "partial", path: "memory/partial.md", text: "今天玩成语接龙" },
+      ],
+      query: "成语接龙 游戏",
+    });
+    expect(results.map((row) => row.id)).toEqual(["match"]);
+    expect(results[0]?.textScore).toBeGreaterThan(0);
+  });
+});
diff --git a/extensions/memory-core/src/memory/manager-search.ts b/extensions/memory-core/src/memory/manager-search.ts
index 14f981491e6..039118c5361 100644
--- a/extensions/memory-core/src/memory/manager-search.ts
+++ b/extensions/memory-core/src/memory/manager-search.ts
@@ -7,6 +7,8 @@ import {
 
 const vectorToBlob = (embedding: number[]): Buffer =>
   Buffer.from(new Float32Array(embedding).buffer);
+const FTS_QUERY_TOKEN_RE = /[\p{L}\p{N}_]+/gu;
+const SHORT_CJK_TRIGRAM_RE = /[\u3040-\u30ff\u3400-\u9fff\uac00-\ud7af\u3131-\u3163]/u;
 
 export type SearchSource = string;
 
@@ -20,6 +22,55 @@ export type SearchRowResult = {
   source: SearchSource;
 };
 
+function escapeLikePattern(term: string): string {
+  return term.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_");
+}
+
+function buildMatchQueryFromTerms(terms: string[]): string | null {
+  if (terms.length === 0) {
+    return null;
+  }
+  const quoted = terms.map((term) => `"${term.replaceAll('"', "")}"`);
+  return quoted.join(" AND ");
+}
+
+function planKeywordSearch(params: {
+  query: string;
+  ftsTokenizer?: "unicode61" | "trigram";
+  buildFtsQuery: (raw: string) => string | null;
+}): { matchQuery: string | null; substringTerms: string[] } {
+  if (params.ftsTokenizer !== "trigram") {
+    return {
+      matchQuery: params.buildFtsQuery(params.query),
+      substringTerms: [],
+    };
+  }
+
+  const tokens =
+    params.query
+      .match(FTS_QUERY_TOKEN_RE)
+      ?.map((token) => token.trim())
+      .filter(Boolean) ?? [];
+  if (tokens.length === 0) {
+    return { matchQuery: null, substringTerms: [] };
+  }
+
+  const matchTerms: string[] = [];
+  const substringTerms: string[] = [];
+  for (const token of tokens) {
+    if (SHORT_CJK_TRIGRAM_RE.test(token) && Array.from(token).length < 3) {
+      substringTerms.push(token);
+      continue;
+    }
+    matchTerms.push(token);
+  }
+
+  return {
+    matchQuery: buildMatchQueryFromTerms(matchTerms),
+    substringTerms,
+  };
+}
+
 export async function searchVector(params: {
   db: DatabaseSync;
   vectorTable: string;
@@ -141,6 +192,7 @@ export async function searchKeyword(params: {
   ftsTable: string;
   providerModel: string | undefined;
   query: string;
+  ftsTokenizer?: "unicode61" | "trigram";
   limit: number;
   snippetMaxChars: number;
   sourceFilter: { sql: string; params: SearchSource[] };
@@ -150,25 +202,42 @@ export async function searchKeyword(params: {
   if (params.limit <= 0) {
     return [];
   }
-  const ftsQuery = params.buildFtsQuery(params.query);
-  if (!ftsQuery) {
+  const plan = planKeywordSearch({
+    query: params.query,
+    ftsTokenizer: params.ftsTokenizer,
+    buildFtsQuery: params.buildFtsQuery,
+  });
+  if (!plan.matchQuery && plan.substringTerms.length === 0) {
     return [];
   }
 
   // When providerModel is undefined (FTS-only mode), search all models
   const modelClause = params.providerModel ? " AND model = ?" : "";
   const modelParams = params.providerModel ? [params.providerModel] : [];
+  const substringClause = plan.substringTerms.map(() => " AND text LIKE ? ESCAPE '\\'").join("");
+  const substringParams = plan.substringTerms.map((term) => `%${escapeLikePattern(term)}%`);
+  const whereClause = plan.matchQuery
+    ? `${params.ftsTable} MATCH ?${substringClause}${modelClause}${params.sourceFilter.sql}`
+    : `1=1${substringClause}${modelClause}${params.sourceFilter.sql}`;
+  const queryParams = [
+    ...(plan.matchQuery ? [plan.matchQuery] : []),
+    ...substringParams,
+    ...modelParams,
+    ...params.sourceFilter.params,
+    params.limit,
+  ];
+  const rankExpression = plan.matchQuery ? `bm25(${params.ftsTable})` : "0";
 
   const rows = params.db
     .prepare(
       `SELECT id, path, source, start_line, end_line, text,\n` +
-        `       bm25(${params.ftsTable}) AS rank\n` +
+        `       ${rankExpression} AS rank\n` +
         `  FROM ${params.ftsTable}\n` +
-        ` WHERE ${params.ftsTable} MATCH ?${modelClause}${params.sourceFilter.sql}\n` +
+        ` WHERE ${whereClause}\n` +
         ` ORDER BY rank ASC\n` +
         ` LIMIT ?`,
     )
-    .all(ftsQuery, ...modelParams, ...params.sourceFilter.params, params.limit) as Array<{
+    .all(...queryParams) as Array<{
     id: string;
     path: string;
     source: SearchSource;
@@ -179,7 +248,7 @@ export async function searchKeyword(params: {
   }>;
 
   return rows.map((row) => {
-    const textScore = params.bm25RankToScore(row.rank);
+    const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1;
     return {
       id: row.id,
       path: row.path,
diff --git a/extensions/memory-core/src/memory/manager-sync-ops.ts b/extensions/memory-core/src/memory/manager-sync-ops.ts
index 55f9e822c85..a994072e5b1 100644
--- a/extensions/memory-core/src/memory/manager-sync-ops.ts
+++ b/extensions/memory-core/src/memory/manager-sync-ops.ts
@@ -56,6 +56,7 @@ type MemoryIndexMeta = {
   chunkTokens: number;
   chunkOverlap: number;
   vectorDims?: number;
+  ftsTokenizer?: string;
 };
 
 type MemorySyncProgressState = {
@@ -362,6 +363,7 @@ export abstract class MemoryManagerSyncOps {
       cacheEnabled: this.cache.enabled,
       ftsTable: FTS_TABLE,
       ftsEnabled: this.fts.enabled,
+      ftsTokenizer: this.settings.store.fts.tokenizer,
     });
     this.fts.available = result.ftsAvailable;
     if (result.ftsError) {
@@ -1028,7 +1030,8 @@ export abstract class MemoryManagerSyncOps {
       meta.scopeHash !== configuredScopeHash ||
       meta.chunkTokens !== this.settings.chunking.tokens ||
       meta.chunkOverlap !== this.settings.chunking.overlap ||
-      (vectorReady && !meta?.vectorDims);
+      (vectorReady && !meta?.vectorDims) ||
+      (meta.ftsTokenizer ?? "unicode61") !== this.settings.store.fts.tokenizer;
     try {
       if (needsFullReindex) {
         if (
@@ -1220,6 +1223,7 @@ export abstract class MemoryManagerSyncOps {
         scopeHash: this.resolveConfiguredScopeHash(),
         chunkTokens: this.settings.chunking.tokens,
         chunkOverlap: this.settings.chunking.overlap,
+        ftsTokenizer: this.settings.store.fts.tokenizer,
       };
       if (!nextMeta) {
         throw new Error("Failed to compute memory index metadata for reindexing.");
@@ -1292,6 +1296,7 @@ export abstract class MemoryManagerSyncOps {
       scopeHash: this.resolveConfiguredScopeHash(),
       chunkTokens: this.settings.chunking.tokens,
       chunkOverlap: this.settings.chunking.overlap,
+      ftsTokenizer: this.settings.store.fts.tokenizer,
     };
     if (this.vector.available && this.vector.dims) {
       nextMeta.vectorDims = this.vector.dims;
@@ -1306,9 +1311,10 @@ export abstract class MemoryManagerSyncOps {
     this.db.exec(`DELETE FROM chunks`);
     if (this.fts.enabled && this.fts.available) {
       try {
-        this.db.exec(`DELETE FROM ${FTS_TABLE}`);
+        this.db.exec(`DROP TABLE IF EXISTS ${FTS_TABLE}`);
       } catch {}
     }
+    this.ensureSchema();
     this.dropVectorTable();
     this.vector.dims = undefined;
     this.sessionsDirtyFiles.clear();
diff --git a/extensions/memory-core/src/memory/manager.ts b/extensions/memory-core/src/memory/manager.ts
index 654a6869ec4..07b9c24d436 100644
--- a/extensions/memory-core/src/memory/manager.ts
+++ b/extensions/memory-core/src/memory/manager.ts
@@ -352,7 +352,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
 
       // Extract keywords for better FTS matching on conversational queries
       // e.g., "that thing we discussed about the API" → ["discussed", "API"]
-      const keywords = extractKeywords(cleaned);
+      const keywords = extractKeywords(cleaned, {
+        ftsTokenizer: this.settings.store.fts.tokenizer,
+      });
       const searchTerms = keywords.length > 0 ? keywords : [cleaned];
 
       // Search with each keyword and merge results
@@ -488,6 +490,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
       ftsTable: FTS_TABLE,
       providerModel,
       query,
+      ftsTokenizer: this.settings.store.fts.tokenizer,
       limit,
       snippetMaxChars: SNIPPET_MAX_CHARS,
       sourceFilter,
diff --git a/packages/memory-host-sdk/src/host/memory-schema.ts b/packages/memory-host-sdk/src/host/memory-schema.ts
index cdc7f2e0451..582cddeee1c 100644
--- a/packages/memory-host-sdk/src/host/memory-schema.ts
+++ b/packages/memory-host-sdk/src/host/memory-schema.ts
@@ -6,6 +6,7 @@ export function ensureMemoryIndexSchema(params: {
   cacheEnabled: boolean;
   ftsTable: string;
   ftsEnabled: boolean;
+  ftsTokenizer?: "unicode61" | "trigram";
 }): { ftsAvailable: boolean; ftsError?: string } {
   params.db.exec(`
     CREATE TABLE IF NOT EXISTS meta (
@@ -58,6 +59,8 @@ export function ensureMemoryIndexSchema(params: {
   let ftsError: string | undefined;
   if (params.ftsEnabled) {
     try {
+      const tokenizer = params.ftsTokenizer ?? "unicode61";
+      const tokenizeClause = tokenizer === "trigram" ? `, tokenize='trigram case_sensitive 0'` : "";
       params.db.exec(
         `CREATE VIRTUAL TABLE IF NOT EXISTS ${params.ftsTable} USING fts5(\n` +
           `  text,\n` +
@@ -67,7 +70,7 @@ export function ensureMemoryIndexSchema(params: {
           `  model UNINDEXED,\n` +
           `  start_line UNINDEXED,\n` +
           `  end_line UNINDEXED\n` +
-          `);`,
+          `${tokenizeClause});`,
       );
       ftsAvailable = true;
     } catch (err) {
diff --git a/packages/memory-host-sdk/src/host/query-expansion.test.ts b/packages/memory-host-sdk/src/host/query-expansion.test.ts
index ac535b438e8..f1e9bff520e 100644
--- a/packages/memory-host-sdk/src/host/query-expansion.test.ts
+++ b/packages/memory-host-sdk/src/host/query-expansion.test.ts
@@ -174,6 +174,51 @@ describe("extractKeywords", () => {
     const testCount = keywords.filter((k) => k === "test").length;
     expect(testCount).toBe(1);
   });
+
+  describe("with trigram tokenizer", () => {
+    const trigramOpts = { ftsTokenizer: "trigram" as const };
+
+    it("emits whole CJK block instead of unigrams in trigram mode", () => {
+      const defaultKeywords = extractKeywords("之前讨论的那个方案");
+      const trigramKeywords = extractKeywords("之前讨论的那个方案", trigramOpts);
+      // Default mode produces bigrams
+      expect(defaultKeywords).toContain("讨论");
+      expect(defaultKeywords).toContain("方案");
+      // Trigram mode emits the whole contiguous CJK block (FTS5 trigram
+      // requires >= 3 chars per term; individual characters return no results)
+      expect(trigramKeywords).toContain("之前讨论的那个方案");
+      expect(trigramKeywords).not.toContain("讨论");
+      expect(trigramKeywords).not.toContain("方案");
+    });
+
+    it("skips Japanese kanji bigrams in trigram mode", () => {
+      const defaultKeywords = extractKeywords("経済政策について");
+      const trigramKeywords = extractKeywords("経済政策について", trigramOpts);
+      // Default mode adds kanji bigrams: 経済, 済政, 政策
+      expect(defaultKeywords).toContain("経済");
+      expect(defaultKeywords).toContain("済政");
+      expect(defaultKeywords).toContain("政策");
+      // Trigram mode keeps the full kanji block but skips bigram splitting
+      expect(trigramKeywords).toContain("経済政策");
+      expect(trigramKeywords).not.toContain("済政");
+    });
+
+    it("still filters stop words in trigram mode", () => {
+      const keywords = extractKeywords("これ それ そして どう", trigramOpts);
+      expect(keywords).not.toContain("これ");
+      expect(keywords).not.toContain("それ");
+      expect(keywords).not.toContain("そして");
+      expect(keywords).not.toContain("どう");
+    });
+
+    it("does not affect English keyword extraction", () => {
+      const keywords = extractKeywords("that thing we discussed about the API", trigramOpts);
+      expect(keywords).toContain("discussed");
+      expect(keywords).toContain("api");
+      expect(keywords).not.toContain("that");
+      expect(keywords).not.toContain("the");
+    });
+  });
 });
 
 describe("expandQueryForFts", () => {
diff --git a/packages/memory-host-sdk/src/host/query-expansion.ts b/packages/memory-host-sdk/src/host/query-expansion.ts
index 0bbff2674de..5ce120f1453 100644
--- a/packages/memory-host-sdk/src/host/query-expansion.ts
+++ b/packages/memory-host-sdk/src/host/query-expansion.ts
@@ -670,7 +670,8 @@ function isValidKeyword(token: string): boolean {
  * For Chinese, we do character-based splitting since we don't have a proper segmenter.
  * For English, we split on whitespace and punctuation.
  */
-function tokenize(text: string): string[] {
+function tokenize(text: string, opts?: { ftsTokenizer?: "unicode61" | "trigram" }): string[] {
+  const useTrigram = opts?.ftsTokenizer === "trigram";
   const tokens: string[] = [];
   const normalized = text.toLowerCase().trim();
 
@@ -686,8 +687,10 @@ function tokenize(text: string): string[] {
       for (const part of jpParts) {
         if (/^[\u4e00-\u9fff]+$/.test(part)) {
           tokens.push(part);
-          for (let i = 0; i < part.length - 1; i++) {
-            tokens.push(part[i] + part[i + 1]);
+          if (!useTrigram) {
+            for (let i = 0; i < part.length - 1; i++) {
+              tokens.push(part[i] + part[i + 1]);
+            }
           }
         } else {
           tokens.push(part);
@@ -695,13 +698,21 @@ function tokenize(text: string): string[] {
       }
     } else if (/[\u4e00-\u9fff]/.test(segment)) {
       // Check if segment contains CJK characters (Chinese)
-      // For Chinese, extract character n-grams (unigrams and bigrams)
       const chars = Array.from(segment).filter((c) => /[\u4e00-\u9fff]/.test(c));
-      // Add individual characters
-      tokens.push(...chars);
-      // Add bigrams for better phrase matching
-      for (let i = 0; i < chars.length - 1; i++) {
-        tokens.push(chars[i] + chars[i + 1]);
+      if (useTrigram) {
+        // In trigram mode, push the whole contiguous CJK block (mirroring the
+        // Japanese kanji path). SQLite's trigram FTS requires at least 3 characters
+        // per query term — individual characters silently return no results.
+        const block = chars.join("");
+        if (block.length > 0) {
+          tokens.push(block);
+        }
+      } else {
+        // Default mode: unigrams + bigrams for phrase matching
+        tokens.push(...chars);
+        for (let i = 0; i < chars.length - 1; i++) {
+          tokens.push(chars[i] + chars[i + 1]);
+        }
       }
     } else if (/[\uac00-\ud7af\u3131-\u3163]/.test(segment)) {
       // For Korean (Hangul syllables and jamo), keep the word as-is unless it is
@@ -732,8 +743,11 @@ function tokenize(text: string): string[] {
  * - "之前讨论的那个方案" → ["讨论", "方案"]
  * - "what was the solution for the bug" → ["solution", "bug"]
  */
-export function extractKeywords(query: string): string[] {
-  const tokens = tokenize(query);
+export function extractKeywords(
+  query: string,
+  opts?: { ftsTokenizer?: "unicode61" | "trigram" },
+): string[] {
+  const tokens = tokenize(query, opts);
   const keywords: string[] = [];
   const seen = new Set<string>();
 
@@ -764,13 +778,16 @@ export function extractKeywords(query: string): string[] {
  * @param query - User's original query
  * @returns Object with original query and extracted keywords
  */
-export function expandQueryForFts(query: string): {
+export function expandQueryForFts(
+  query: string,
+  opts?: { ftsTokenizer?: "unicode61" | "trigram" },
+): {
   original: string;
   keywords: string[];
   expanded: string;
 } {
   const original = query.trim();
-  const keywords = extractKeywords(original);
+  const keywords = extractKeywords(original, opts);
 
   // Build expanded query: original terms OR extracted keywords
   // This ensures both exact matches and keyword matches are found
@@ -792,6 +809,7 @@ export type LlmQueryExpander = (query: string) => Promise<string[]>;
 export async function expandQueryWithLlm(
   query: string,
   llmExpander?: LlmQueryExpander,
+  opts?: { ftsTokenizer?: "unicode61" | "trigram" },
 ): Promise<string[]> {
   // If LLM expander is provided, try it first
   if (llmExpander) {
@@ -806,5 +824,5 @@ export async function expandQueryWithLlm(
   }
 
   // Fall back to local keyword extraction
-  return extractKeywords(query);
+  return extractKeywords(query, opts);
 }
diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts
index 747945c5d36..bf32ad33a53 100644
--- a/src/agents/memory-search.ts
+++ b/src/agents/memory-search.ts
@@ -43,6 +43,9 @@ export type ResolvedMemorySearchConfig = {
   store: {
     driver: "sqlite";
     path: string;
+    fts: {
+      tokenizer: "unicode61" | "trigram";
+    };
     vector: {
       enabled: boolean;
       extensionPath?: string;
@@ -206,9 +209,13 @@ function mergeConfig(
     extensionPath:
       overrides?.store?.vector?.extensionPath ?? defaults?.store?.vector?.extensionPath,
   };
+  const fts = {
+    tokenizer: overrides?.store?.fts?.tokenizer ?? defaults?.store?.fts?.tokenizer ?? "unicode61",
+  };
   const store = {
     driver: overrides?.store?.driver ?? defaults?.store?.driver ?? "sqlite",
     path: resolveStorePath(agentId, overrides?.store?.path ?? defaults?.store?.path),
+    fts,
     vector,
   };
   const chunking = {
diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts
index 2ef625aa34e..c230f5ca683 100644
--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@@ -2033,6 +2033,24 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
                       path: {
                         type: "string",
                       },
+                      fts: {
+                        type: "object",
+                        properties: {
+                          tokenizer: {
+                            anyOf: [
+                              {
+                                type: "string",
+                                const: "unicode61",
+                              },
+                              {
+                                type: "string",
+                                const: "trigram",
+                              },
+                            ],
+                          },
+                        },
+                        additionalProperties: false,
+                      },
                       vector: {
                         type: "object",
                         properties: {
@@ -3596,6 +3614,24 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
                         path: {
                           type: "string",
                         },
+                        fts: {
+                          type: "object",
+                          properties: {
+                            tokenizer: {
+                              anyOf: [
+                                {
+                                  type: "string",
+                                  const: "unicode61",
+                                },
+                                {
+                                  type: "string",
+                                  const: "trigram",
+                                },
+                              ],
+                            },
+                          },
+                          additionalProperties: false,
+                        },
                         vector: {
                           type: "object",
                           properties: {
diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts
index 3325ebd59af..2bb63000ec4 100644
--- a/src/config/types.tools.ts
+++ b/src/config/types.tools.ts
@@ -379,6 +379,10 @@ export type MemorySearchConfig = {
   store?: {
     driver?: "sqlite";
     path?: string;
+    fts?: {
+      /** FTS5 tokenizer (default: "unicode61"). Use "trigram" for CJK text support. */
+      tokenizer?: "unicode61" | "trigram";
+    };
     vector?: {
       /** Enable sqlite-vec extension for vector search (default: true). */
       enabled?: boolean;
diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts
index fa231127705..13a4e78cb7b 100644
--- a/src/config/zod-schema.agent-runtime.ts
+++ b/src/config/zod-schema.agent-runtime.ts
@@ -653,6 +653,12 @@ export const MemorySearchSchema = z
       .object({
         driver: z.literal("sqlite").optional(),
         path: z.string().optional(),
+        fts: z
+          .object({
+            tokenizer: z.union([z.literal("unicode61"), z.literal("trigram")]).optional(),
+          })
+          .strict()
+          .optional(),
         vector: z
           .object({
             enabled: z.boolean().optional(),