fix(guardian): stricter ALLOW/BLOCK verdict parsing in guardian response

Require a delimiter (colon, space, or end of line) after ALLOW/BLOCK keywords.
Previously `startsWith("ALLOW")` would match words like "ALLOWING" or
"ALLOWANCE", potentially causing a false ALLOW verdict if the model's
response started with such a word.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
ShengtongZhu 2026-03-15 11:20:52 +08:00
parent 8972213aee
commit f4488a73ff
2 changed files with 143 additions and 4 deletions

View File

@ -1,7 +1,7 @@
import type { AssistantMessage } from "@mariozechner/pi-ai";
import { describe, it, expect, vi, beforeEach } from "vitest";
import { callGuardian } from "./guardian-client.js";
import type { GuardianCallParams } from "./guardian-client.js";
import { callGuardian, callForText } from "./guardian-client.js";
import type { GuardianCallParams, TextCallParams } from "./guardian-client.js";
import type { ResolvedGuardianModel } from "./types.js";
// ---------------------------------------------------------------------------
@ -133,6 +133,39 @@ describe("guardian-client", () => {
expect(result.reason).toBe("dangerous");
});
it("does not match 'ALLOWING' as ALLOW verdict", async () => {
vi.mocked(completeSimple).mockResolvedValue(
mockResponse("ALLOWING this would be dangerous\nBLOCK: not requested"),
);
const result = await callGuardian(makeParams());
expect(result.action).toBe("block");
expect(result.reason).toBe("not requested");
});
it("does not match 'BLOCKED' as BLOCK verdict", async () => {
vi.mocked(completeSimple).mockResolvedValue(
mockResponse("BLOCKED by firewall is irrelevant\nALLOW: user asked for this"),
);
const result = await callGuardian(makeParams());
expect(result.action).toBe("allow");
});
it("matches bare 'ALLOW' without colon or space", async () => {
vi.mocked(completeSimple).mockResolvedValue(mockResponse("ALLOW"));
const result = await callGuardian(makeParams());
expect(result.action).toBe("allow");
});
it("matches bare 'BLOCK' without colon or space", async () => {
vi.mocked(completeSimple).mockResolvedValue(mockResponse("BLOCK"));
const result = await callGuardian(makeParams());
expect(result.action).toBe("block");
});
it("first verdict wins over later ones (forward scan for security)", async () => {
vi.mocked(completeSimple).mockResolvedValue(
mockResponse(
@ -325,6 +358,31 @@ describe("guardian-client", () => {
expect(result.reason).toContain("timed out");
});
it("returns fallback when abort signal fires during response processing (race condition)", async () => {
// Simulate the race: completeSimple resolves, but the abort signal
// has already been triggered (e.g., timeout fires at the exact moment
// the response arrives). The code checks controller.signal.aborted
// after receiving the response.
vi.mocked(completeSimple).mockImplementation((_model, _ctx, opts) => {
// Abort the signal before returning, simulating the race
const controller = (opts?.signal as AbortSignal & { _controller?: AbortController })
?._controller;
// We can't access the controller directly, so we simulate by
// returning a response and relying on the code's own abort check.
// Instead, use a short timeout that fires during await.
return new Promise((resolve) => {
// Let the abort timer fire first by introducing a slight delay
setTimeout(() => resolve(mockResponse("ALLOW: should be ignored")), 60);
});
});
const result = await callGuardian(makeParams({ timeoutMs: 10, fallbackOnError: "block" }));
// The abort fires before the response resolves, so it should be caught
// either by the abort race guard or by the catch block
expect(result.action).toBe("block");
expect(result.reason).toContain("timed out");
});
it("returns fallback on response with only whitespace text", async () => {
vi.mocked(completeSimple).mockResolvedValue(mockResponse(" \n \n "));
@ -426,3 +484,82 @@ describe("guardian-client", () => {
});
});
});
// ---------------------------------------------------------------------------
// callForText tests
// ---------------------------------------------------------------------------
describe("guardian-client callForText", () => {
beforeEach(() => {
vi.clearAllMocks();
});
function makeTextParams(overrides: Partial<TextCallParams> = {}): TextCallParams {
return {
model: makeModel(),
systemPrompt: "summary system prompt",
userPrompt: "summarize this conversation",
timeoutMs: 20000,
...overrides,
};
}
it("returns raw text from LLM response", async () => {
vi.mocked(completeSimple).mockResolvedValue(mockResponse("User is deploying a web app"));
const result = await callForText(makeTextParams());
expect(result).toBe("User is deploying a web app");
});
it("passes maxTokens=200 (not 150 like callGuardian)", async () => {
vi.mocked(completeSimple).mockResolvedValue(mockResponse("summary text"));
await callForText(makeTextParams());
const [, , options] = vi.mocked(completeSimple).mock.calls[0];
expect(options?.maxTokens).toBe(200);
});
it("returns undefined on error", async () => {
vi.mocked(completeSimple).mockRejectedValue(new Error("ECONNREFUSED"));
const result = await callForText(makeTextParams());
expect(result).toBeUndefined();
});
it("returns undefined on timeout (abort race)", async () => {
vi.mocked(completeSimple).mockImplementation(
(_model, _ctx, opts) =>
new Promise((_resolve, reject) => {
opts?.signal?.addEventListener("abort", () => {
reject(new Error("The operation was aborted"));
});
}),
);
const result = await callForText(makeTextParams({ timeoutMs: 50 }));
expect(result).toBeUndefined();
});
it("returns undefined on empty response", async () => {
vi.mocked(completeSimple).mockResolvedValue(mockEmptyResponse());
const result = await callForText(makeTextParams());
expect(result).toBeUndefined();
});
it("passes system and user prompts correctly", async () => {
vi.mocked(completeSimple).mockResolvedValue(mockResponse("result"));
await callForText(
makeTextParams({
systemPrompt: "custom system",
userPrompt: "custom user",
}),
);
const [, context] = vi.mocked(completeSimple).mock.calls[0];
expect(context.systemPrompt).toBe("custom system");
expect(context.messages[0].content).toBe("custom user");
});
});

View File

@ -252,13 +252,15 @@ function parseGuardianResponse(content: string, fallback: GuardianDecision): Gua
if (!line) continue;
const upper = line.toUpperCase();
if (upper.startsWith("ALLOW")) {
// Require a delimiter after ALLOW/BLOCK to avoid matching words like
// "ALLOWING" or "BLOCKED" which are not valid verdicts.
if (upper === "ALLOW" || upper.startsWith("ALLOW:") || upper.startsWith("ALLOW ")) {
const colonIndex = line.indexOf(":");
const reason = colonIndex >= 0 ? line.slice(colonIndex + 1).trim() : line.slice(5).trim();
return { action: "allow", reason: reason || undefined };
}
if (upper.startsWith("BLOCK")) {
if (upper === "BLOCK" || upper.startsWith("BLOCK:") || upper.startsWith("BLOCK ")) {
const colonIndex = line.indexOf(":");
const reason = colonIndex >= 0 ? line.slice(colonIndex + 1).trim() : line.slice(5).trim();
return { action: "block", reason: reason || "Blocked by guardian" };