openclaw/src/agents/pi-embedded-block-chunker.e...

131 lines
3.7 KiB
TypeScript

import { describe, expect, it } from "vitest";
import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
describe("EmbeddedBlockChunker", () => {
it("breaks at paragraph boundary right after fence close", () => {
const chunker = new EmbeddedBlockChunker({
minChars: 1,
maxChars: 40,
breakPreference: "paragraph",
});
const text = [
"Intro",
"```js",
"console.log('x')",
"```",
"",
"After first line",
"After second line",
].join("\n");
chunker.append(text);
const chunks: string[] = [];
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
expect(chunks.length).toBe(1);
expect(chunks[0]).toContain("console.log");
expect(chunks[0]).toMatch(/```\n?$/);
expect(chunks[0]).not.toContain("After");
expect(chunker.bufferedText).toMatch(/^After/);
});
it("flushes paragraph boundaries before minChars when flushOnParagraph is set", () => {
const chunker = new EmbeddedBlockChunker({
minChars: 100,
maxChars: 200,
breakPreference: "paragraph",
flushOnParagraph: true,
});
chunker.append("First paragraph.\n\nSecond paragraph.");
const chunks: string[] = [];
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
expect(chunks).toEqual(["First paragraph."]);
expect(chunker.bufferedText).toBe("Second paragraph.");
});
it("treats blank lines with whitespace as paragraph boundaries when flushOnParagraph is set", () => {
const chunker = new EmbeddedBlockChunker({
minChars: 100,
maxChars: 200,
breakPreference: "paragraph",
flushOnParagraph: true,
});
chunker.append("First paragraph.\n \nSecond paragraph.");
const chunks: string[] = [];
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
expect(chunks).toEqual(["First paragraph."]);
expect(chunker.bufferedText).toBe("Second paragraph.");
});
it("falls back to maxChars when flushOnParagraph is set and no paragraph break exists", () => {
const chunker = new EmbeddedBlockChunker({
minChars: 1,
maxChars: 10,
breakPreference: "paragraph",
flushOnParagraph: true,
});
chunker.append("abcdefghijKLMNOP");
const chunks: string[] = [];
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
expect(chunks).toEqual(["abcdefghij"]);
expect(chunker.bufferedText).toBe("KLMNOP");
});
it("clamps long paragraphs to maxChars when flushOnParagraph is set", () => {
const chunker = new EmbeddedBlockChunker({
minChars: 1,
maxChars: 10,
breakPreference: "paragraph",
flushOnParagraph: true,
});
chunker.append("abcdefghijk\n\nRest");
const chunks: string[] = [];
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
expect(chunks.every((chunk) => chunk.length <= 10)).toBe(true);
expect(chunks).toEqual(["abcdefghij", "k"]);
expect(chunker.bufferedText).toBe("Rest");
});
it("ignores paragraph breaks inside fences when flushOnParagraph is set", () => {
const chunker = new EmbeddedBlockChunker({
minChars: 100,
maxChars: 200,
breakPreference: "paragraph",
flushOnParagraph: true,
});
const text = [
"Intro",
"```js",
"const a = 1;",
"",
"const b = 2;",
"```",
"",
"After fence",
].join("\n");
chunker.append(text);
const chunks: string[] = [];
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
expect(chunks).toEqual(["Intro\n```js\nconst a = 1;\n\nconst b = 2;\n```"]);
expect(chunker.bufferedText).toBe("After fence");
});
});