From 1582bbbfc54db5ecf7f239d1aeb42a354419c3bf Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 5 Apr 2026 23:21:04 +0100 Subject: [PATCH] fix(qa): stabilize hermetic suite runtime --- .../qa-lab/src/mock-openai-server.test.ts | 116 ++++++++++ extensions/qa-lab/src/mock-openai-server.ts | 201 ++++++++++++++---- extensions/qa-lab/src/qa-gateway-config.ts | 23 ++ 3 files changed, 294 insertions(+), 46 deletions(-) diff --git a/extensions/qa-lab/src/mock-openai-server.test.ts b/extensions/qa-lab/src/mock-openai-server.test.ts index 6c57c8a5746..af527adaa6e 100644 --- a/extensions/qa-lab/src/mock-openai-server.test.ts +++ b/extensions/qa-lab/src/mock-openai-server.test.ts @@ -84,4 +84,120 @@ describe("qa mock openai server", () => { prompt: 'Please inspect "message_id" metadata first, then read `./QA_KICKOFF_TASK.md`.', }); }); + + it("drives the Lobster Invaders write flow and memory recall responses", async () => { + const server = await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await server.stop(); + }); + + const lobster = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: true, + model: "gpt-5.4", + input: [ + { + role: "user", + content: [ + { type: "input_text", text: "Please build Lobster Invaders after reading context." }, + ], + }, + { + type: "function_call_output", + output: "QA mission: read source and docs first.", + }, + ], + }), + }); + expect(lobster.status).toBe(200); + const lobsterBody = await lobster.text(); + expect(lobsterBody).toContain('"name":"write"'); + expect(lobsterBody).toContain("lobster-invaders.html"); + + const recall = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: false, + model: "gpt-5.4-alt", + input: [ + { + role: "user", + content: [ + { + type: "input_text", + text: "Please remember this fact for later: the QA canary code is ALPHA-7.", + }, + ], + }, + { + role: "user", + content: [ + { + type: "input_text", + text: "What was the QA canary code I asked you to remember earlier?", + }, + ], + }, + ], + }), + }); + expect(recall.status).toBe(200); + const payload = (await recall.json()) as { + output?: Array<{ content?: Array<{ text?: string }> }>; + }; + expect(payload.output?.[0]?.content?.[0]?.text).toContain("ALPHA-7"); + + const requests = await fetch(`${server.baseUrl}/debug/requests`); + expect(requests.status).toBe(200); + expect((await requests.json()) as Array<{ model?: string }>).toMatchObject([ + { model: "gpt-5.4" }, + { model: "gpt-5.4-alt" }, + ]); + }); + + it("requests non-threaded subagent handoff for QA channel runs", async () => { + const server = await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await server.stop(); + }); + + const response = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: true, + input: [ + { + role: "user", + content: [ + { + type: "input_text", + text: "Delegate a bounded QA task to a subagent, then summarize the delegated result clearly.", + }, + ], + }, + ], + }), + }); + expect(response.status).toBe(200); + const body = await response.text(); + expect(body).toContain('"name":"sessions_spawn"'); + expect(body).toContain('\\"label\\":\\"qa-sidecar\\"'); + expect(body).toContain('\\"thread\\":false'); + }); }); diff --git a/extensions/qa-lab/src/mock-openai-server.ts b/extensions/qa-lab/src/mock-openai-server.ts index ff7bb65e1ab..3c6591b78d6 100644 --- a/extensions/qa-lab/src/mock-openai-server.ts +++ b/extensions/qa-lab/src/mock-openai-server.ts @@ -25,6 +25,7 @@ type MockOpenAiRequestSnapshot = { body: Record; prompt: string; toolOutput: string; + model: string; }; function readBody(req: IncomingMessage): Promise { @@ -91,6 +92,30 @@ function extractToolOutput(input: ResponsesInputItem[]) { return ""; } +function extractAllUserTexts(input: ResponsesInputItem[]) { + const texts: string[] = []; + for (const item of input) { + if (item.role !== "user" || !Array.isArray(item.content)) { + continue; + } + const text = item.content + .filter( + (entry): entry is { type: "input_text"; text: string } => + !!entry && + typeof entry === "object" && + (entry as { type?: unknown }).type === "input_text" && + typeof (entry as { text?: unknown }).text === "string", + ) + .map((entry) => entry.text) + .join("\n") + .trim(); + if (text) { + texts.push(text); + } + } + return texts; +} + function normalizePromptPathCandidate(candidate: string) { const trimmed = candidate.trim().replace(/^`+|`+$/g, ""); if (!trimmed) { @@ -135,9 +160,95 @@ function readTargetFromPrompt(prompt: string) { return "repo/package.json"; } -function buildAssistantText(input: ResponsesInputItem[]) { +function buildToolCallEventsWithArgs(name: string, args: Record): StreamEvent[] { + const callId = `call_mock_${name}_1`; + const serialized = JSON.stringify(args); + return [ + { + type: "response.output_item.added", + item: { + type: "function_call", + id: `fc_mock_${name}_1`, + call_id: callId, + name, + arguments: "", + }, + }, + { type: "response.function_call_arguments.delta", delta: serialized }, + { + type: "response.output_item.done", + item: { + type: "function_call", + id: `fc_mock_${name}_1`, + call_id: callId, + name, + arguments: serialized, + }, + }, + { + type: "response.completed", + response: { + id: `resp_mock_${name}_1`, + status: "completed", + output: [ + { + type: "function_call", + id: `fc_mock_${name}_1`, + call_id: callId, + name, + arguments: serialized, + }, + ], + usage: { input_tokens: 64, output_tokens: 16, total_tokens: 80 }, + }, + }, + ]; +} + +function extractRememberedFact(userTexts: string[]) { + for (const text of userTexts) { + const qaCanaryMatch = /\bqa canary code is\s+([A-Za-z0-9-]+)/i.exec(text); + if (qaCanaryMatch?.[1]) { + return qaCanaryMatch[1]; + } + } + for (const text of userTexts) { + const match = /remember(?: this fact for later)?:\s*([A-Za-z0-9-]+)/i.exec(text); + if (match?.[1]) { + return match[1]; + } + } + return null; +} + +function buildAssistantText(input: ResponsesInputItem[], body: Record) { const prompt = extractLastUserText(input); const toolOutput = extractToolOutput(input); + const userTexts = extractAllUserTexts(input); + const rememberedFact = extractRememberedFact(userTexts); + const model = typeof body.model === "string" ? body.model : ""; + + if (/what was the qa canary code/i.test(prompt) && rememberedFact) { + return `Protocol note: the QA canary code was ${rememberedFact}.`; + } + if (/remember this fact/i.test(prompt) && rememberedFact) { + return `Protocol note: acknowledged. I will remember ${rememberedFact}.`; + } + if (/switch(?:ing)? models?/i.test(prompt)) { + return `Protocol note: model switch acknowledged. Continuing on ${model || "the requested model"}.`; + } + if (toolOutput && /delegate|subagent/i.test(prompt)) { + return `Protocol note: delegated result acknowledged. The bounded subagent task returned and is folded back into the main thread.`; + } + if (toolOutput && /worked, failed, blocked|worked\/failed\/blocked|follow-up/i.test(prompt)) { + return `Worked:\n- Read seeded QA material.\n- Expanded the report structure.\nFailed:\n- None observed in mock mode.\nBlocked:\n- No live provider evidence in this lane.\nFollow-up:\n- Re-run with a real model for qualitative coverage.`; + } + if (toolOutput && /lobster invaders/i.test(prompt)) { + if (toolOutput.includes("QA mission") || toolOutput.includes("Testing")) { + return ""; + } + return `Protocol note: Lobster Invaders built at lobster-invaders.html.`; + } if (toolOutput) { const snippet = toolOutput.replace(/\s+/g, " ").trim().slice(0, 220); return `Protocol note: I reviewed the requested material. Evidence snippet: ${snippet || "no content"}`; @@ -150,48 +261,7 @@ function buildAssistantText(input: ResponsesInputItem[]) { function buildToolCallEvents(prompt: string): StreamEvent[] { const targetPath = readTargetFromPrompt(prompt); - const callId = "call_mock_read_1"; - const args = JSON.stringify({ path: targetPath }); - return [ - { - type: "response.output_item.added", - item: { - type: "function_call", - id: "fc_mock_read_1", - call_id: callId, - name: "read", - arguments: "", - }, - }, - { type: "response.function_call_arguments.delta", delta: args }, - { - type: "response.output_item.done", - item: { - type: "function_call", - id: "fc_mock_read_1", - call_id: callId, - name: "read", - arguments: args, - }, - }, - { - type: "response.completed", - response: { - id: "resp_mock_tool_1", - status: "completed", - output: [ - { - type: "function_call", - id: "fc_mock_read_1", - call_id: callId, - name: "read", - arguments: args, - }, - ], - usage: { input_tokens: 64, output_tokens: 16, total_tokens: 80 }, - }, - }, - ]; + return buildToolCallEventsWithArgs("read", { path: targetPath }); } function buildAssistantEvents(text: string): StreamEvent[] { @@ -229,18 +299,48 @@ function buildAssistantEvents(text: string): StreamEvent[] { ]; } -function buildResponsesPayload(input: ResponsesInputItem[]) { +function buildResponsesPayload(body: Record) { + const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : []; const prompt = extractLastUserText(input); const toolOutput = extractToolOutput(input); + if (/lobster invaders/i.test(prompt)) { + if (!toolOutput) { + return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" }); + } + if (toolOutput.includes("QA mission") || toolOutput.includes("Testing")) { + return buildToolCallEventsWithArgs("write", { + path: "lobster-invaders.html", + content: ` + + Lobster Invaders +

Lobster Invaders

Tiny playable stub.

+`, + }); + } + } + if (/delegate|subagent/i.test(prompt) && !toolOutput) { + return buildToolCallEventsWithArgs("sessions_spawn", { + task: "Inspect the QA workspace and return one concise protocol note.", + label: "qa-sidecar", + thread: false, + }); + } + if ( + /(worked, failed, blocked|worked\/failed\/blocked|source and docs)/i.test(prompt) && + !toolOutput + ) { + return buildToolCallEventsWithArgs("read", { path: "QA_SCENARIO_PLAN.md" }); + } if (!toolOutput && /\b(read|inspect|repo|docs|scenario|kickoff)\b/i.test(prompt)) { return buildToolCallEvents(prompt); } - return buildAssistantEvents(buildAssistantText(input)); + return buildAssistantEvents(buildAssistantText(input, body)); } export async function startQaMockOpenAiServer(params?: { host?: string; port?: number }) { const host = params?.host ?? "127.0.0.1"; let lastRequest: MockOpenAiRequestSnapshot | null = null; + const requests: MockOpenAiRequestSnapshot[] = []; const server = createServer(async (req, res) => { const url = new URL(req.url ?? "/", "http://127.0.0.1"); if (req.method === "GET" && (url.pathname === "/healthz" || url.pathname === "/readyz")) { @@ -260,6 +360,10 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n writeJson(res, 200, lastRequest ?? { ok: false, error: "no request recorded" }); return; } + if (req.method === "GET" && url.pathname === "/debug/requests") { + writeJson(res, 200, requests); + return; + } if (req.method === "POST" && url.pathname === "/v1/responses") { const raw = await readBody(req); const body = raw ? (JSON.parse(raw) as Record) : {}; @@ -269,8 +373,13 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n body, prompt: extractLastUserText(input), toolOutput: extractToolOutput(input), + model: typeof body.model === "string" ? body.model : "", }; - const events = buildResponsesPayload(input); + requests.push(lastRequest); + if (requests.length > 50) { + requests.splice(0, requests.length - 50); + } + const events = buildResponsesPayload(body); if (body.stream === false) { const completion = events.at(-1); if (!completion || completion.type !== "response.completed") { diff --git a/extensions/qa-lab/src/qa-gateway-config.ts b/extensions/qa-lab/src/qa-gateway-config.ts index 7476d04d44b..83cf35cd18f 100644 --- a/extensions/qa-lab/src/qa-gateway-config.ts +++ b/extensions/qa-lab/src/qa-gateway-config.ts @@ -1,5 +1,27 @@ import type { OpenClawConfig } from "openclaw/plugin-sdk/core"; +const DISABLED_BUNDLED_CHANNELS = Object.freeze({ + bluebubbles: { enabled: false }, + discord: { enabled: false }, + feishu: { enabled: false }, + googlechat: { enabled: false }, + imessage: { enabled: false }, + irc: { enabled: false }, + line: { enabled: false }, + mattermost: { enabled: false }, + matrix: { enabled: false }, + msteams: { enabled: false }, + qqbot: { enabled: false }, + signal: { enabled: false }, + slack: { enabled: false }, + "synology-chat": { enabled: false }, + telegram: { enabled: false }, + tlon: { enabled: false }, + whatsapp: { enabled: false }, + zalo: { enabled: false }, + zalouser: { enabled: false }, +} satisfies Record); + export function buildQaGatewayConfig(params: { bind: "loopback" | "lan"; gatewayPort: number; @@ -135,6 +157,7 @@ export function buildQaGatewayConfig(params: { }, }, channels: { + ...DISABLED_BUNDLED_CHANNELS, "qa-channel": { enabled: true, baseUrl: params.qaBusBaseUrl,