fix(qa): stabilize hermetic suite runtime

2026-04-05 23:21:04 +01:00 · 2026-04-05 23:21:04 +01:00 · 1582bbbfc5
parent 4780788bbb
commit 1582bbbfc5
3 changed files with 294 additions and 46 deletions
--- a/extensions/qa-lab/src/mock-openai-server.test.ts
+++ b/extensions/qa-lab/src/mock-openai-server.test.ts
@ -84,4 +84,120 @@ describe("qa mock openai server", () => {
      prompt: 'Please inspect "message_id" metadata first, then read `./QA_KICKOFF_TASK.md`.',
    });
  });
+
+  it("drives the Lobster Invaders write flow and memory recall responses", async () => {
+    const server = await startQaMockOpenAiServer({
+      host: "127.0.0.1",
+      port: 0,
+    });
+    cleanups.push(async () => {
+      await server.stop();
+    });
+
+    const lobster = await fetch(`${server.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        stream: true,
+        model: "gpt-5.4",
+        input: [
+          {
+            role: "user",
+            content: [
+              { type: "input_text", text: "Please build Lobster Invaders after reading context." },
+            ],
+          },
+          {
+            type: "function_call_output",
+            output: "QA mission: read source and docs first.",
+          },
+        ],
+      }),
+    });
+    expect(lobster.status).toBe(200);
+    const lobsterBody = await lobster.text();
+    expect(lobsterBody).toContain('"name":"write"');
+    expect(lobsterBody).toContain("lobster-invaders.html");
+
+    const recall = await fetch(`${server.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        stream: false,
+        model: "gpt-5.4-alt",
+        input: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "input_text",
+                text: "Please remember this fact for later: the QA canary code is ALPHA-7.",
+              },
+            ],
+          },
+          {
+            role: "user",
+            content: [
+              {
+                type: "input_text",
+                text: "What was the QA canary code I asked you to remember earlier?",
+              },
+            ],
+          },
+        ],
+      }),
+    });
+    expect(recall.status).toBe(200);
+    const payload = (await recall.json()) as {
+      output?: Array<{ content?: Array<{ text?: string }> }>;
+    };
+    expect(payload.output?.[0]?.content?.[0]?.text).toContain("ALPHA-7");
+
+    const requests = await fetch(`${server.baseUrl}/debug/requests`);
+    expect(requests.status).toBe(200);
+    expect((await requests.json()) as Array<{ model?: string }>).toMatchObject([
+      { model: "gpt-5.4" },
+      { model: "gpt-5.4-alt" },
+    ]);
+  });
+
+  it("requests non-threaded subagent handoff for QA channel runs", async () => {
+    const server = await startQaMockOpenAiServer({
+      host: "127.0.0.1",
+      port: 0,
+    });
+    cleanups.push(async () => {
+      await server.stop();
+    });
+
+    const response = await fetch(`${server.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        stream: true,
+        input: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "input_text",
+                text: "Delegate a bounded QA task to a subagent, then summarize the delegated result clearly.",
+              },
+            ],
+          },
+        ],
+      }),
+    });
+    expect(response.status).toBe(200);
+    const body = await response.text();
+    expect(body).toContain('"name":"sessions_spawn"');
+    expect(body).toContain('\\"label\\":\\"qa-sidecar\\"');
+    expect(body).toContain('\\"thread\\":false');
+  });
 });
--- a/extensions/qa-lab/src/mock-openai-server.ts
+++ b/extensions/qa-lab/src/mock-openai-server.ts
@ -25,6 +25,7 @@ type MockOpenAiRequestSnapshot = {
  body: Record<string, unknown>;
  prompt: string;
  toolOutput: string;
+  model: string;
 };

 function readBody(req: IncomingMessage): Promise<string> {
@ -91,6 +92,30 @@ function extractToolOutput(input: ResponsesInputItem[]) {
  return "";
 }

+function extractAllUserTexts(input: ResponsesInputItem[]) {
+  const texts: string[] = [];
+  for (const item of input) {
+    if (item.role !== "user" || !Array.isArray(item.content)) {
+      continue;
+    }
+    const text = item.content
+      .filter(
+        (entry): entry is { type: "input_text"; text: string } =>
+          !!entry &&
+          typeof entry === "object" &&
+          (entry as { type?: unknown }).type === "input_text" &&
+          typeof (entry as { text?: unknown }).text === "string",
+      )
+      .map((entry) => entry.text)
+      .join("\n")
+      .trim();
+    if (text) {
+      texts.push(text);
+    }
+  }
+  return texts;
+}
+
 function normalizePromptPathCandidate(candidate: string) {
  const trimmed = candidate.trim().replace(/^`+|`+$/g, "");
  if (!trimmed) {
@ -135,9 +160,95 @@ function readTargetFromPrompt(prompt: string) {
  return "repo/package.json";
 }

-function buildAssistantText(input: ResponsesInputItem[]) {
+function buildToolCallEventsWithArgs(name: string, args: Record<string, unknown>): StreamEvent[] {
+  const callId = `call_mock_${name}_1`;
+  const serialized = JSON.stringify(args);
+  return [
+    {
+      type: "response.output_item.added",
+      item: {
+        type: "function_call",
+        id: `fc_mock_${name}_1`,
+        call_id: callId,
+        name,
+        arguments: "",
+      },
+    },
+    { type: "response.function_call_arguments.delta", delta: serialized },
+    {
+      type: "response.output_item.done",
+      item: {
+        type: "function_call",
+        id: `fc_mock_${name}_1`,
+        call_id: callId,
+        name,
+        arguments: serialized,
+      },
+    },
+    {
+      type: "response.completed",
+      response: {
+        id: `resp_mock_${name}_1`,
+        status: "completed",
+        output: [
+          {
+            type: "function_call",
+            id: `fc_mock_${name}_1`,
+            call_id: callId,
+            name,
+            arguments: serialized,
+          },
+        ],
+        usage: { input_tokens: 64, output_tokens: 16, total_tokens: 80 },
+      },
+    },
+  ];
+}
+
+function extractRememberedFact(userTexts: string[]) {
+  for (const text of userTexts) {
+    const qaCanaryMatch = /\bqa canary code is\s+([A-Za-z0-9-]+)/i.exec(text);
+    if (qaCanaryMatch?.[1]) {
+      return qaCanaryMatch[1];
+    }
+  }
+  for (const text of userTexts) {
+    const match = /remember(?: this fact for later)?:\s*([A-Za-z0-9-]+)/i.exec(text);
+    if (match?.[1]) {
+      return match[1];
+    }
+  }
+  return null;
+}
+
+function buildAssistantText(input: ResponsesInputItem[], body: Record<string, unknown>) {
  const prompt = extractLastUserText(input);
  const toolOutput = extractToolOutput(input);
+  const userTexts = extractAllUserTexts(input);
+  const rememberedFact = extractRememberedFact(userTexts);
+  const model = typeof body.model === "string" ? body.model : "";
+
+  if (/what was the qa canary code/i.test(prompt) && rememberedFact) {
+    return `Protocol note: the QA canary code was ${rememberedFact}.`;
+  }
+  if (/remember this fact/i.test(prompt) && rememberedFact) {
+    return `Protocol note: acknowledged. I will remember ${rememberedFact}.`;
+  }
+  if (/switch(?:ing)? models?/i.test(prompt)) {
+    return `Protocol note: model switch acknowledged. Continuing on ${model || "the requested model"}.`;
+  }
+  if (toolOutput && /delegate|subagent/i.test(prompt)) {
+    return `Protocol note: delegated result acknowledged. The bounded subagent task returned and is folded back into the main thread.`;
+  }
+  if (toolOutput && /worked, failed, blocked|worked\/failed\/blocked|follow-up/i.test(prompt)) {
+    return `Worked:\n- Read seeded QA material.\n- Expanded the report structure.\nFailed:\n- None observed in mock mode.\nBlocked:\n- No live provider evidence in this lane.\nFollow-up:\n- Re-run with a real model for qualitative coverage.`;
+  }
+  if (toolOutput && /lobster invaders/i.test(prompt)) {
+    if (toolOutput.includes("QA mission") || toolOutput.includes("Testing")) {
+      return "";
+    }
+    return `Protocol note: Lobster Invaders built at lobster-invaders.html.`;
+  }
  if (toolOutput) {
    const snippet = toolOutput.replace(/\s+/g, " ").trim().slice(0, 220);
    return `Protocol note: I reviewed the requested material. Evidence snippet: ${snippet || "no content"}`;
@ -150,48 +261,7 @@ function buildAssistantText(input: ResponsesInputItem[]) {

 function buildToolCallEvents(prompt: string): StreamEvent[] {
  const targetPath = readTargetFromPrompt(prompt);
-  const callId = "call_mock_read_1";
-  const args = JSON.stringify({ path: targetPath });
-  return [
-    {
-      type: "response.output_item.added",
-      item: {
-        type: "function_call",
-        id: "fc_mock_read_1",
-        call_id: callId,
-        name: "read",
-        arguments: "",
-      },
-    },
-    { type: "response.function_call_arguments.delta", delta: args },
-    {
-      type: "response.output_item.done",
-      item: {
-        type: "function_call",
-        id: "fc_mock_read_1",
-        call_id: callId,
-        name: "read",
-        arguments: args,
-      },
-    },
-    {
-      type: "response.completed",
-      response: {
-        id: "resp_mock_tool_1",
-        status: "completed",
-        output: [
-          {
-            type: "function_call",
-            id: "fc_mock_read_1",
-            call_id: callId,
-            name: "read",
-            arguments: args,
-          },
-        ],
-        usage: { input_tokens: 64, output_tokens: 16, total_tokens: 80 },
-      },
-    },
-  ];
+  return buildToolCallEventsWithArgs("read", { path: targetPath });
 }

 function buildAssistantEvents(text: string): StreamEvent[] {
@ -229,18 +299,48 @@ function buildAssistantEvents(text: string): StreamEvent[] {
  ];
 }

-function buildResponsesPayload(input: ResponsesInputItem[]) {
+function buildResponsesPayload(body: Record<string, unknown>) {
+  const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : [];
  const prompt = extractLastUserText(input);
  const toolOutput = extractToolOutput(input);
+  if (/lobster invaders/i.test(prompt)) {
+    if (!toolOutput) {
+      return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
+    }
+    if (toolOutput.includes("QA mission") || toolOutput.includes("Testing")) {
+      return buildToolCallEventsWithArgs("write", {
+        path: "lobster-invaders.html",
+        content: `<!doctype html>
+<html lang="en">
+  <head><meta charset="utf-8" /><title>Lobster Invaders</title></head>
+  <body><h1>Lobster Invaders</h1><p>Tiny playable stub.</p></body>
+</html>`,
+      });
+    }
+  }
+  if (/delegate|subagent/i.test(prompt) && !toolOutput) {
+    return buildToolCallEventsWithArgs("sessions_spawn", {
+      task: "Inspect the QA workspace and return one concise protocol note.",
+      label: "qa-sidecar",
+      thread: false,
+    });
+  }
+  if (
+    /(worked, failed, blocked|worked\/failed\/blocked|source and docs)/i.test(prompt) &&
+    !toolOutput
+  ) {
+    return buildToolCallEventsWithArgs("read", { path: "QA_SCENARIO_PLAN.md" });
+  }
  if (!toolOutput && /\b(read|inspect|repo|docs|scenario|kickoff)\b/i.test(prompt)) {
    return buildToolCallEvents(prompt);
  }
-  return buildAssistantEvents(buildAssistantText(input));
+  return buildAssistantEvents(buildAssistantText(input, body));
 }

 export async function startQaMockOpenAiServer(params?: { host?: string; port?: number }) {
  const host = params?.host ?? "127.0.0.1";
  let lastRequest: MockOpenAiRequestSnapshot | null = null;
+  const requests: MockOpenAiRequestSnapshot[] = [];
  const server = createServer(async (req, res) => {
    const url = new URL(req.url ?? "/", "http://127.0.0.1");
    if (req.method === "GET" && (url.pathname === "/healthz" || url.pathname === "/readyz")) {
@ -260,6 +360,10 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
      writeJson(res, 200, lastRequest ?? { ok: false, error: "no request recorded" });
      return;
    }
+    if (req.method === "GET" && url.pathname === "/debug/requests") {
+      writeJson(res, 200, requests);
+      return;
+    }
    if (req.method === "POST" && url.pathname === "/v1/responses") {
      const raw = await readBody(req);
      const body = raw ? (JSON.parse(raw) as Record<string, unknown>) : {};
@ -269,8 +373,13 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
        body,
        prompt: extractLastUserText(input),
        toolOutput: extractToolOutput(input),
+        model: typeof body.model === "string" ? body.model : "",
      };
-      const events = buildResponsesPayload(input);
+      requests.push(lastRequest);
+      if (requests.length > 50) {
+        requests.splice(0, requests.length - 50);
+      }
+      const events = buildResponsesPayload(body);
      if (body.stream === false) {
        const completion = events.at(-1);
        if (!completion || completion.type !== "response.completed") {
--- a/extensions/qa-lab/src/qa-gateway-config.ts
+++ b/extensions/qa-lab/src/qa-gateway-config.ts
@ -1,5 +1,27 @@
 import type { OpenClawConfig } from "openclaw/plugin-sdk/core";

+const DISABLED_BUNDLED_CHANNELS = Object.freeze({
+  bluebubbles: { enabled: false },
+  discord: { enabled: false },
+  feishu: { enabled: false },
+  googlechat: { enabled: false },
+  imessage: { enabled: false },
+  irc: { enabled: false },
+  line: { enabled: false },
+  mattermost: { enabled: false },
+  matrix: { enabled: false },
+  msteams: { enabled: false },
+  qqbot: { enabled: false },
+  signal: { enabled: false },
+  slack: { enabled: false },
+  "synology-chat": { enabled: false },
+  telegram: { enabled: false },
+  tlon: { enabled: false },
+  whatsapp: { enabled: false },
+  zalo: { enabled: false },
+  zalouser: { enabled: false },
+} satisfies Record<string, { enabled: false }>);
+
 export function buildQaGatewayConfig(params: {
  bind: "loopback" | "lan";
  gatewayPort: number;
@ -135,6 +157,7 @@ export function buildQaGatewayConfig(params: {
      },
    },
    channels: {
+      ...DISABLED_BUNDLED_CHANNELS,
      "qa-channel": {
        enabled: true,
        baseUrl: params.qaBusBaseUrl,