perf(memory): builtin sqlite hot-path follow-ups (#53939)

* chore(perf): start builtin sqlite hotpath workstream

* perf(memory): reuse sqlite statements during sync

* perf(memory): snapshot file state during sync

* perf(memory): consolidate status sqlite reads

* docs(changelog): note builtin sqlite perf work

* perf(memory): avoid session table scans on targeted sync
This commit is contained in:
Vincent Koc 2026-03-24 14:47:40 -07:00 committed by GitHub
parent 23a4ae4759
commit d648aebf4d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 255 additions and 80 deletions

View File

@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Memory/builtin sqlite: cut redundant sync and status query churn by snapshotting file state once per source, reusing sync statements, and consolidating status aggregation reads, which reduces builtin memory overhead on sync/status/doctor-style paths. Thanks @vincentkoc.
- ACP/direct chats: always deliver a terminal ACP result when final TTS does not yield audio, even if block text already streamed earlier, and skip redundant empty-text final synthesis. (#53692) Thanks @w-sss.
- Doctor/image generation: seed migrated legacy Nano Banana Google provider config with the `/v1beta` API root and an empty model list so `openclaw doctor --fix` completes and the migrated native Google image path keeps hitting the correct endpoint. (#53757) Thanks @mahopan.
- Security/skills: validate skill installer metadata against strict regex allowlists per package manager, sanitize skill metadata for terminal output, add URL protocol allowlisting in markdown preview and skill homepage links, warn on non-bundled skill install sources, and remove unsafe `file://` workspace links. (#53471) Thanks @BunsDev.

View File

@ -554,6 +554,7 @@ describe("memory index", () => {
db: {
prepare: (sql: string) => {
get: (path: string, source: string) => { hash: string } | undefined;
all?: (...args: unknown[]) => unknown;
};
};
}
@ -587,13 +588,41 @@ describe("memory index", () => {
})}\n`,
);
const originalPrepare = db.prepare.bind(db);
let bulkSessionStateAllCalls = 0;
let perFileSessionHashPrepareCalls = 0;
db.prepare = ((sql: string) => {
const statement = originalPrepare(sql);
if (sql === `SELECT path, hash FROM files WHERE source = ?`) {
if (!statement.all) {
throw new Error("expected sqlite statement.all for bulk session state query");
}
const bulkAll = statement.all.bind(statement);
return {
...statement,
all: (...args: unknown[]) => {
bulkSessionStateAllCalls += 1;
return bulkAll(...args);
},
};
}
if (sql === `SELECT hash FROM files WHERE path = ? AND source = ?`) {
perFileSessionHashPrepareCalls += 1;
}
return statement;
}) as typeof db.prepare;
await manager.sync?.({
reason: "post-compaction",
sessionFiles: [firstSessionPath],
});
db.prepare = originalPrepare;
expect(getSessionHash("sessions/targeted-first.jsonl")).not.toBe(firstOriginalHash);
expect(getSessionHash("sessions/targeted-second.jsonl")).toBe(secondOriginalHash);
expect(bulkSessionStateAllCalls).toBe(0);
expect(perFileSessionHashPrepareCalls).toBeGreaterThan(0);
await manager.close?.();
} finally {
if (previousStateDir === undefined) {
@ -990,6 +1019,121 @@ describe("memory index", () => {
await manager.close?.();
});
it("snapshots builtin file hashes with a single sqlite query per sync", async () => {
await fs.writeFile(path.join(memoryDir, "2026-01-13.md"), "beta line\n");
const cfg = createCfg({
storePath: path.join(workspaceDir, `index-prepare-reuse-${randomUUID()}.sqlite`),
onSearch: false,
});
const result = await getMemorySearchManager({ cfg, agentId: "main" });
const manager = requireManager(result);
managersForCleanup.add(manager);
const db = (
manager as unknown as {
db: {
prepare: (sql: string) => { get: (...args: unknown[]) => unknown };
};
}
).db;
const originalPrepare = db.prepare.bind(db);
let selectSourceFileStatePrepareCalls = 0;
let perFileHashPrepareCalls = 0;
db.prepare = ((sql: string) => {
if (sql === `SELECT path, hash FROM files WHERE source = ?`) {
selectSourceFileStatePrepareCalls += 1;
}
if (sql === `SELECT hash FROM files WHERE path = ? AND source = ?`) {
perFileHashPrepareCalls += 1;
}
return originalPrepare(sql);
}) as typeof db.prepare;
try {
await manager.sync({ reason: "test" });
} finally {
db.prepare = originalPrepare;
}
expect(selectSourceFileStatePrepareCalls).toBe(1);
expect(perFileHashPrepareCalls).toBe(0);
});
it("uses a single sqlite aggregation query for status counts", async () => {
const cfg = createCfg({
storePath: path.join(workspaceDir, `index-status-aggregate-${randomUUID()}.sqlite`),
sources: ["memory", "sessions"],
sessionMemory: true,
onSearch: false,
});
await fs.writeFile(path.join(memoryDir, "2026-01-13.md"), "beta line\n");
const stateDir = path.join(fixtureRoot, `state-status-${randomUUID()}`);
vi.stubEnv("OPENCLAW_STATE_DIR", stateDir);
const sessionDir = path.join(stateDir, "agents", "main", "sessions");
await fs.mkdir(sessionDir, { recursive: true });
await fs.writeFile(
path.join(sessionDir, "status.jsonl"),
JSON.stringify({
type: "message",
message: { role: "user", content: [{ type: "text", text: "session status line" }] },
}) + "\n",
);
const result = await getMemorySearchManager({ cfg, agentId: "main" });
const manager = requireManager(result);
managersForCleanup.add(manager);
await manager.sync({ reason: "test" });
const db = (
manager as unknown as {
db: {
prepare: (sql: string) => { all: (...args: unknown[]) => unknown };
};
}
).db;
const originalPrepare = db.prepare.bind(db);
let aggregatePrepareCalls = 0;
let legacyCountPrepareCalls = 0;
db.prepare = ((sql: string) => {
if (
sql.includes(`SELECT 'files' AS kind, source, COUNT(*) as c FROM files`) &&
sql.includes(`UNION ALL`)
) {
aggregatePrepareCalls += 1;
}
if (
sql === `SELECT COUNT(*) as c FROM files WHERE 1=1` ||
sql === `SELECT COUNT(*) as c FROM chunks WHERE 1=1` ||
sql === `SELECT source, COUNT(*) as c FROM files WHERE 1=1 GROUP BY source` ||
sql === `SELECT source, COUNT(*) as c FROM chunks WHERE 1=1 GROUP BY source`
) {
legacyCountPrepareCalls += 1;
}
return originalPrepare(sql);
}) as typeof db.prepare;
try {
const status = manager.status();
expect(status.files).toBeGreaterThan(0);
expect(status.chunks).toBeGreaterThan(0);
expect(
status.sourceCounts?.find((entry) => entry.source === "memory")?.files,
).toBeGreaterThan(0);
expect(
status.sourceCounts?.find((entry) => entry.source === "sessions")?.files,
).toBeGreaterThan(0);
} finally {
db.prepare = originalPrepare;
vi.unstubAllEnvs();
}
expect(aggregatePrepareCalls).toBe(1);
expect(legacyCountPrepareCalls).toBe(0);
});
it("reindexes when Gemini outputDimensionality changes", async () => {
const base = createCfg({
storePath: indexModelPath,

View File

@ -705,6 +705,23 @@ export abstract class MemoryManagerSyncOps {
log.debug("Skipping memory file sync in FTS-only mode (no embedding provider)");
return;
}
const selectSourceFileState = this.db.prepare(`SELECT path, hash FROM files WHERE source = ?`);
const deleteFileByPathAndSource = this.db.prepare(
`DELETE FROM files WHERE path = ? AND source = ?`,
);
const deleteChunksByPathAndSource = this.db.prepare(
`DELETE FROM chunks WHERE path = ? AND source = ?`,
);
const deleteVectorRowsByPathAndSource =
this.vector.enabled && this.vector.available
? this.db.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
: null;
const deleteFtsRowsByPathSourceAndModel =
this.fts.enabled && this.fts.available
? this.db.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
: null;
const files = await listMemoryFiles(
this.workspaceDir,
@ -726,6 +743,11 @@ export abstract class MemoryManagerSyncOps {
batch: this.batch.enabled,
concurrency: this.getIndexConcurrency(),
});
const existingRows = selectSourceFileState.all("memory") as Array<{
path: string;
hash: string;
}>;
const existingHashes = new Map(existingRows.map((row) => [row.path, row.hash]));
const activePaths = new Set(fileEntries.map((entry) => entry.path));
if (params.progress) {
params.progress.total += fileEntries.length;
@ -737,10 +759,7 @@ export abstract class MemoryManagerSyncOps {
}
const tasks = fileEntries.map((entry) => async () => {
const record = this.db
.prepare(`SELECT hash FROM files WHERE path = ? AND source = ?`)
.get(entry.path, "memory") as { hash: string } | undefined;
if (!params.needsFullReindex && record?.hash === entry.hash) {
if (!params.needsFullReindex && existingHashes.get(entry.path) === entry.hash) {
if (params.progress) {
params.progress.completed += 1;
params.progress.report({
@ -761,27 +780,20 @@ export abstract class MemoryManagerSyncOps {
});
await runWithConcurrency(tasks, this.getIndexConcurrency());
const staleRows = this.db
.prepare(`SELECT path FROM files WHERE source = ?`)
.all("memory") as Array<{ path: string }>;
for (const stale of staleRows) {
for (const stale of existingRows) {
if (activePaths.has(stale.path)) {
continue;
}
this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(stale.path, "memory");
try {
this.db
.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
.run(stale.path, "memory");
} catch {}
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(stale.path, "memory");
if (this.fts.enabled && this.fts.available) {
deleteFileByPathAndSource.run(stale.path, "memory");
if (deleteVectorRowsByPathAndSource) {
try {
this.db
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
.run(stale.path, "memory", this.provider.model);
deleteVectorRowsByPathAndSource.run(stale.path, "memory");
} catch {}
}
deleteChunksByPathAndSource.run(stale.path, "memory");
if (deleteFtsRowsByPathSourceAndModel) {
try {
deleteFtsRowsByPathSourceAndModel.run(stale.path, "memory", this.provider.model);
} catch {}
}
}
@ -797,6 +809,24 @@ export abstract class MemoryManagerSyncOps {
log.debug("Skipping session file sync in FTS-only mode (no embedding provider)");
return;
}
const selectFileHash = this.db.prepare(`SELECT hash FROM files WHERE path = ? AND source = ?`);
const selectSourceFileState = this.db.prepare(`SELECT path, hash FROM files WHERE source = ?`);
const deleteFileByPathAndSource = this.db.prepare(
`DELETE FROM files WHERE path = ? AND source = ?`,
);
const deleteChunksByPathAndSource = this.db.prepare(
`DELETE FROM chunks WHERE path = ? AND source = ?`,
);
const deleteVectorRowsByPathAndSource =
this.vector.enabled && this.vector.available
? this.db.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
: null;
const deleteFtsRowsByPathSourceAndModel =
this.fts.enabled && this.fts.available
? this.db.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
: null;
const targetSessionFiles = params.needsFullReindex
? null
@ -807,6 +837,15 @@ export abstract class MemoryManagerSyncOps {
const activePaths = targetSessionFiles
? null
: new Set(files.map((file) => sessionPathForFile(file)));
const existingRows =
activePaths === null
? null
: (selectSourceFileState.all("sessions") as Array<{
path: string;
hash: string;
}>);
const existingHashes =
existingRows === null ? null : new Map(existingRows.map((row) => [row.path, row.hash]));
const indexAll =
params.needsFullReindex || Boolean(targetSessionFiles) || this.sessionsDirtyFiles.size === 0;
log.debug("memory sync: indexing session files", {
@ -848,10 +887,16 @@ export abstract class MemoryManagerSyncOps {
}
return;
}
const record = this.db
.prepare(`SELECT hash FROM files WHERE path = ? AND source = ?`)
.get(entry.path, "sessions") as { hash: string } | undefined;
if (!params.needsFullReindex && record?.hash === entry.hash) {
const existingHash =
existingHashes?.get(entry.path) ??
(
selectFileHash.get(entry.path, "sessions") as
| {
hash: string;
}
| undefined
)?.hash;
if (!params.needsFullReindex && existingHash === entry.hash) {
if (params.progress) {
params.progress.completed += 1;
params.progress.report({
@ -880,31 +925,20 @@ export abstract class MemoryManagerSyncOps {
return;
}
const staleRows = this.db
.prepare(`SELECT path FROM files WHERE source = ?`)
.all("sessions") as Array<{ path: string }>;
for (const stale of staleRows) {
for (const stale of existingRows ?? []) {
if (activePaths.has(stale.path)) {
continue;
}
this.db
.prepare(`DELETE FROM files WHERE path = ? AND source = ?`)
.run(stale.path, "sessions");
try {
this.db
.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
.run(stale.path, "sessions");
} catch {}
this.db
.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
.run(stale.path, "sessions");
if (this.fts.enabled && this.fts.available) {
deleteFileByPathAndSource.run(stale.path, "sessions");
if (deleteVectorRowsByPathAndSource) {
try {
this.db
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
.run(stale.path, "sessions", this.provider.model);
deleteVectorRowsByPathAndSource.run(stale.path, "sessions");
} catch {}
}
deleteChunksByPathAndSource.run(stale.path, "sessions");
if (deleteFtsRowsByPathSourceAndModel) {
try {
deleteFtsRowsByPathSourceAndModel.run(stale.path, "sessions", this.provider.model);
} catch {}
}
}

View File

@ -694,46 +694,42 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
status(): MemoryProviderStatus {
const sourceFilter = this.buildSourceFilter();
const files = this.db
.prepare(`SELECT COUNT(*) as c FROM files WHERE 1=1${sourceFilter.sql}`)
.get(...sourceFilter.params) as {
const aggregateRows = this.db
.prepare(
`SELECT 'files' AS kind, source, COUNT(*) as c FROM files WHERE 1=1${sourceFilter.sql} GROUP BY source\n` +
`UNION ALL\n` +
`SELECT 'chunks' AS kind, source, COUNT(*) as c FROM chunks WHERE 1=1${sourceFilter.sql} GROUP BY source`,
)
.all(...sourceFilter.params, ...sourceFilter.params) as Array<{
kind: "files" | "chunks";
source: MemorySource;
c: number;
};
const chunks = this.db
.prepare(`SELECT COUNT(*) as c FROM chunks WHERE 1=1${sourceFilter.sql}`)
.get(...sourceFilter.params) as {
c: number;
};
const sourceCounts = (() => {
}>;
const aggregateState = (() => {
const sources = Array.from(this.sources);
if (sources.length === 0) {
return [];
}
const bySource = new Map<MemorySource, { files: number; chunks: number }>();
for (const source of sources) {
bySource.set(source, { files: 0, chunks: 0 });
}
const fileRows = this.db
.prepare(
`SELECT source, COUNT(*) as c FROM files WHERE 1=1${sourceFilter.sql} GROUP BY source`,
)
.all(...sourceFilter.params) as Array<{ source: MemorySource; c: number }>;
for (const row of fileRows) {
let files = 0;
let chunks = 0;
for (const row of aggregateRows) {
const count = row.c ?? 0;
const entry = bySource.get(row.source) ?? { files: 0, chunks: 0 };
entry.files = row.c ?? 0;
if (row.kind === "files") {
entry.files = count;
files += count;
} else {
entry.chunks = count;
chunks += count;
}
bySource.set(row.source, entry);
}
const chunkRows = this.db
.prepare(
`SELECT source, COUNT(*) as c FROM chunks WHERE 1=1${sourceFilter.sql} GROUP BY source`,
)
.all(...sourceFilter.params) as Array<{ source: MemorySource; c: number }>;
for (const row of chunkRows) {
const entry = bySource.get(row.source) ?? { files: 0, chunks: 0 };
entry.chunks = row.c ?? 0;
bySource.set(row.source, entry);
}
return sources.map((source) => Object.assign({ source }, bySource.get(source)!));
return {
files,
chunks,
sourceCounts: sources.map((source) => Object.assign({ source }, bySource.get(source)!)),
};
})();
const searchMode = this.provider || !this.providerInitialized ? "hybrid" : "fts-only";
@ -745,8 +741,8 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
return {
backend: "builtin",
files: files?.c ?? 0,
chunks: chunks?.c ?? 0,
files: aggregateState.files,
chunks: aggregateState.chunks,
dirty: this.dirty || this.sessionsDirty,
workspaceDir: this.workspaceDir,
dbPath: this.settings.store.path,
@ -755,7 +751,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
requestedProvider: this.requestedProvider,
sources: Array.from(this.sources),
extraPaths: this.settings.extraPaths,
sourceCounts,
sourceCounts: aggregateState.sourceCounts,
cache: this.cache.enabled
? {
enabled: true,