mirror of https://github.com/openclaw/openclaw.git
131 lines
3.7 KiB
TypeScript
131 lines
3.7 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
|
import { EmbeddedBlockChunker } from "./pi-embedded-block-chunker.js";
|
|
|
|
describe("EmbeddedBlockChunker", () => {
|
|
it("breaks at paragraph boundary right after fence close", () => {
|
|
const chunker = new EmbeddedBlockChunker({
|
|
minChars: 1,
|
|
maxChars: 40,
|
|
breakPreference: "paragraph",
|
|
});
|
|
|
|
const text = [
|
|
"Intro",
|
|
"```js",
|
|
"console.log('x')",
|
|
"```",
|
|
"",
|
|
"After first line",
|
|
"After second line",
|
|
].join("\n");
|
|
|
|
chunker.append(text);
|
|
|
|
const chunks: string[] = [];
|
|
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
|
|
|
|
expect(chunks.length).toBe(1);
|
|
expect(chunks[0]).toContain("console.log");
|
|
expect(chunks[0]).toMatch(/```\n?$/);
|
|
expect(chunks[0]).not.toContain("After");
|
|
expect(chunker.bufferedText).toMatch(/^After/);
|
|
});
|
|
|
|
it("flushes paragraph boundaries before minChars when flushOnParagraph is set", () => {
|
|
const chunker = new EmbeddedBlockChunker({
|
|
minChars: 100,
|
|
maxChars: 200,
|
|
breakPreference: "paragraph",
|
|
flushOnParagraph: true,
|
|
});
|
|
|
|
chunker.append("First paragraph.\n\nSecond paragraph.");
|
|
|
|
const chunks: string[] = [];
|
|
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
|
|
|
|
expect(chunks).toEqual(["First paragraph."]);
|
|
expect(chunker.bufferedText).toBe("Second paragraph.");
|
|
});
|
|
|
|
it("treats blank lines with whitespace as paragraph boundaries when flushOnParagraph is set", () => {
|
|
const chunker = new EmbeddedBlockChunker({
|
|
minChars: 100,
|
|
maxChars: 200,
|
|
breakPreference: "paragraph",
|
|
flushOnParagraph: true,
|
|
});
|
|
|
|
chunker.append("First paragraph.\n \nSecond paragraph.");
|
|
|
|
const chunks: string[] = [];
|
|
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
|
|
|
|
expect(chunks).toEqual(["First paragraph."]);
|
|
expect(chunker.bufferedText).toBe("Second paragraph.");
|
|
});
|
|
|
|
it("falls back to maxChars when flushOnParagraph is set and no paragraph break exists", () => {
|
|
const chunker = new EmbeddedBlockChunker({
|
|
minChars: 1,
|
|
maxChars: 10,
|
|
breakPreference: "paragraph",
|
|
flushOnParagraph: true,
|
|
});
|
|
|
|
chunker.append("abcdefghijKLMNOP");
|
|
|
|
const chunks: string[] = [];
|
|
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
|
|
|
|
expect(chunks).toEqual(["abcdefghij"]);
|
|
expect(chunker.bufferedText).toBe("KLMNOP");
|
|
});
|
|
|
|
it("clamps long paragraphs to maxChars when flushOnParagraph is set", () => {
|
|
const chunker = new EmbeddedBlockChunker({
|
|
minChars: 1,
|
|
maxChars: 10,
|
|
breakPreference: "paragraph",
|
|
flushOnParagraph: true,
|
|
});
|
|
|
|
chunker.append("abcdefghijk\n\nRest");
|
|
|
|
const chunks: string[] = [];
|
|
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
|
|
|
|
expect(chunks.every((chunk) => chunk.length <= 10)).toBe(true);
|
|
expect(chunks).toEqual(["abcdefghij", "k"]);
|
|
expect(chunker.bufferedText).toBe("Rest");
|
|
});
|
|
|
|
it("ignores paragraph breaks inside fences when flushOnParagraph is set", () => {
|
|
const chunker = new EmbeddedBlockChunker({
|
|
minChars: 100,
|
|
maxChars: 200,
|
|
breakPreference: "paragraph",
|
|
flushOnParagraph: true,
|
|
});
|
|
|
|
const text = [
|
|
"Intro",
|
|
"```js",
|
|
"const a = 1;",
|
|
"",
|
|
"const b = 2;",
|
|
"```",
|
|
"",
|
|
"After fence",
|
|
].join("\n");
|
|
|
|
chunker.append(text);
|
|
|
|
const chunks: string[] = [];
|
|
chunker.drain({ force: false, emit: (chunk) => chunks.push(chunk) });
|
|
|
|
expect(chunks).toEqual(["Intro\n```js\nconst a = 1;\n\nconst b = 2;\n```"]);
|
|
expect(chunker.bufferedText).toBe("After fence");
|
|
});
|
|
});
|