fix(qa): harden new scenario suite

2026-04-06 02:38:19 +01:00 · 2026-04-06 02:38:19 +01:00 · 979409eab5
parent 80c5df6bdc
commit 979409eab5
7 changed files with 1596 additions and 26 deletions
--- a/extensions/qa-lab/src/gateway-child.ts
+++ b/extensions/qa-lab/src/gateway-child.ts
@ -24,6 +24,53 @@ async function getFreePort() {
  });
 }

+function buildQaRuntimeEnv(params: {
+  configPath: string;
+  gatewayToken: string;
+  homeDir: string;
+  stateDir: string;
+  xdgConfigHome: string;
+  xdgDataHome: string;
+  xdgCacheHome: string;
+  providerMode?: "mock-openai" | "live-openai";
+}) {
+  const env: NodeJS.ProcessEnv = {
+    ...process.env,
+    HOME: params.homeDir,
+    OPENCLAW_HOME: params.homeDir,
+    OPENCLAW_CONFIG_PATH: params.configPath,
+    OPENCLAW_STATE_DIR: params.stateDir,
+    OPENCLAW_OAUTH_DIR: path.join(params.stateDir, "credentials"),
+    OPENCLAW_GATEWAY_TOKEN: params.gatewayToken,
+    OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1",
+    OPENCLAW_SKIP_GMAIL_WATCHER: "1",
+    OPENCLAW_SKIP_CANVAS_HOST: "1",
+    OPENCLAW_NO_RESPAWN: "1",
+    OPENCLAW_TEST_FAST: "1",
+    XDG_CONFIG_HOME: params.xdgConfigHome,
+    XDG_DATA_HOME: params.xdgDataHome,
+    XDG_CACHE_HOME: params.xdgCacheHome,
+  };
+  if (params.providerMode === "mock-openai") {
+    for (const key of [
+      "OPENAI_API_KEY",
+      "OPENAI_BASE_URL",
+      "GEMINI_API_KEY",
+      "GOOGLE_API_KEY",
+      "VOYAGE_API_KEY",
+      "MISTRAL_API_KEY",
+      "AWS_ACCESS_KEY_ID",
+      "AWS_SECRET_ACCESS_KEY",
+      "AWS_SESSION_TOKEN",
+      "AWS_REGION",
+      "AWS_BEARER_TOKEN_BEDROCK",
+    ]) {
+      delete env[key];
+    }
+  }
+  return env;
+}
+
 async function waitForGatewayReady(baseUrl: string, logs: () => string, timeoutMs = 30_000) {
  const startedAt = Date.now();
  while (Date.now() - startedAt < timeoutMs) {
@ -116,23 +163,16 @@ export async function startQaGatewayChild(params: {

  const stdout: Buffer[] = [];
  const stderr: Buffer[] = [];
-  const env = {
-    ...process.env,
-    HOME: homeDir,
-    OPENCLAW_HOME: homeDir,
-    OPENCLAW_CONFIG_PATH: configPath,
-    OPENCLAW_STATE_DIR: stateDir,
-    OPENCLAW_OAUTH_DIR: path.join(stateDir, "credentials"),
-    OPENCLAW_GATEWAY_TOKEN: gatewayToken,
-    OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1",
-    OPENCLAW_SKIP_GMAIL_WATCHER: "1",
-    OPENCLAW_SKIP_CANVAS_HOST: "1",
-    OPENCLAW_NO_RESPAWN: "1",
-    OPENCLAW_TEST_FAST: "1",
-    XDG_CONFIG_HOME: xdgConfigHome,
-    XDG_DATA_HOME: xdgDataHome,
-    XDG_CACHE_HOME: xdgCacheHome,
-  };
+  const env = buildQaRuntimeEnv({
+    configPath,
+    gatewayToken,
+    homeDir,
+    stateDir,
+    xdgConfigHome,
+    xdgDataHome,
+    xdgCacheHome,
+    providerMode: params.providerMode,
+  });

  const child = spawn(
    process.execPath,
@ -176,6 +216,7 @@ export async function startQaGatewayChild(params: {
    workspaceDir,
    tempRoot,
    configPath,
+    runtimeEnv: env,
    logs,
    async call(
      method: string,
--- a/extensions/qa-lab/src/mock-openai-server.test.ts
+++ b/extensions/qa-lab/src/mock-openai-server.test.ts
@ -82,6 +82,8 @@ describe("qa mock openai server", () => {
    expect(debugResponse.status).toBe(200);
    expect(await debugResponse.json()).toMatchObject({
      prompt: 'Please inspect "message_id" metadata first, then read `./QA_KICKOFF_TASK.md`.',
+      allInputText: 'Please inspect "message_id" metadata first, then read `./QA_KICKOFF_TASK.md`.',
+      plannedToolName: "read",
    });
  });

@ -200,4 +202,202 @@ describe("qa mock openai server", () => {
    expect(body).toContain('\\"label\\":\\"qa-sidecar\\"');
    expect(body).toContain('\\"thread\\":false');
  });
+
+  it("plans memory tools and serves mock image generations", async () => {
+    const server = await startQaMockOpenAiServer({
+      host: "127.0.0.1",
+      port: 0,
+    });
+    cleanups.push(async () => {
+      await server.stop();
+    });
+
+    const memorySearch = await fetch(`${server.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        stream: true,
+        input: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "input_text",
+                text: "Memory tools check: what is the hidden project codename stored only in memory? Use memory tools first.",
+              },
+            ],
+          },
+        ],
+      }),
+    });
+    expect(memorySearch.status).toBe(200);
+    expect(await memorySearch.text()).toContain('"name":"memory_search"');
+
+    const image = await fetch(`${server.baseUrl}/v1/images/generations`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        model: "gpt-image-1",
+        prompt: "Draw a QA lighthouse",
+        n: 1,
+        size: "1024x1024",
+      }),
+    });
+    expect(image.status).toBe(200);
+    expect(await image.json()).toMatchObject({
+      data: [{ b64_json: expect.any(String) }],
+    });
+  });
+
+  it("returns exact markers for visible and hot-installed skills", async () => {
+    const server = await startQaMockOpenAiServer({
+      host: "127.0.0.1",
+      port: 0,
+    });
+    cleanups.push(async () => {
+      await server.stop();
+    });
+
+    const visible = await fetch(`${server.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        stream: false,
+        input: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "input_text",
+                text: "Visible skill marker: give me the visible skill marker exactly.",
+              },
+            ],
+          },
+        ],
+      }),
+    });
+    expect(visible.status).toBe(200);
+    expect(await visible.json()).toMatchObject({
+      output: [
+        {
+          content: [{ text: "VISIBLE-SKILL-OK" }],
+        },
+      ],
+    });
+
+    const hot = await fetch(`${server.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        stream: false,
+        input: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "input_text",
+                text: "Hot install marker: give me the hot install marker exactly.",
+              },
+            ],
+          },
+        ],
+      }),
+    });
+    expect(hot.status).toBe(200);
+    expect(await hot.json()).toMatchObject({
+      output: [
+        {
+          content: [{ text: "HOT-INSTALL-OK" }],
+        },
+      ],
+    });
+  });
+
+  it("ignores stale tool output from prior turns when planning the current turn", async () => {
+    const server = await startQaMockOpenAiServer({
+      host: "127.0.0.1",
+      port: 0,
+    });
+    cleanups.push(async () => {
+      await server.stop();
+    });
+
+    const response = await fetch(`${server.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        stream: true,
+        input: [
+          {
+            role: "user",
+            content: [{ type: "input_text", text: "Read QA_KICKOFF_TASK.md first." }],
+          },
+          {
+            type: "function_call_output",
+            output: "QA mission: read source and docs first.",
+          },
+          {
+            role: "user",
+            content: [
+              {
+                type: "input_text",
+                text: "Switch models now. Tool continuity check: reread QA_KICKOFF_TASK.md and mention the handoff in one short sentence.",
+              },
+            ],
+          },
+        ],
+      }),
+    });
+    expect(response.status).toBe(200);
+    expect(await response.text()).toContain('"name":"read"');
+  });
+
+  it("returns NO_REPLY for unmentioned group chatter", async () => {
+    const server = await startQaMockOpenAiServer({
+      host: "127.0.0.1",
+      port: 0,
+    });
+    cleanups.push(async () => {
+      await server.stop();
+    });
+
+    const response = await fetch(`${server.baseUrl}/v1/responses`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        stream: false,
+        input: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "input_text",
+                text: 'Conversation info (untrusted metadata): {"is_group_chat": true}\n\nhello team, no bot ping here',
+              },
+            ],
+          },
+        ],
+      }),
+    });
+    expect(response.status).toBe(200);
+    expect(await response.json()).toMatchObject({
+      output: [
+        {
+          content: [{ text: "NO_REPLY" }],
+        },
+      ],
+    });
+  });
 });
--- a/extensions/qa-lab/src/mock-openai-server.ts
+++ b/extensions/qa-lab/src/mock-openai-server.ts
@ -24,10 +24,15 @@ type MockOpenAiRequestSnapshot = {
  raw: string;
  body: Record<string, unknown>;
  prompt: string;
+  allInputText: string;
  toolOutput: string;
  model: string;
+  plannedToolName?: string;
 };

+const TINY_PNG_BASE64 =
+  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO7Z0nQAAAAASUVORK5CYII=";
+
 function readBody(req: IncomingMessage): Promise<string> {
  return new Promise((resolve, reject) => {
    const chunks: Buffer[] = [];
@ -82,8 +87,19 @@ function extractLastUserText(input: ResponsesInputItem[]) {
  return "";
 }

-function extractToolOutput(input: ResponsesInputItem[]) {
+function findLastUserIndex(input: ResponsesInputItem[]) {
  for (let index = input.length - 1; index >= 0; index -= 1) {
+    const item = input[index];
+    if (item.role === "user" && Array.isArray(item.content)) {
+      return index;
+    }
+  }
+  return -1;
+}
+
+function extractToolOutput(input: ResponsesInputItem[]) {
+  const lastUserIndex = findLastUserIndex(input);
+  for (let index = input.length - 1; index > lastUserIndex; index -= 1) {
    const item = input[index];
    if (item.type === "function_call_output" && typeof item.output === "string" && item.output) {
      return item.output;
@ -116,6 +132,44 @@ function extractAllUserTexts(input: ResponsesInputItem[]) {
  return texts;
 }

+function extractAllInputTexts(input: ResponsesInputItem[]) {
+  const texts: string[] = [];
+  for (const item of input) {
+    if (typeof item.output === "string" && item.output.trim()) {
+      texts.push(item.output.trim());
+    }
+    if (!Array.isArray(item.content)) {
+      continue;
+    }
+    const text = item.content
+      .filter(
+        (entry): entry is { type: "input_text"; text: string } =>
+          !!entry &&
+          typeof entry === "object" &&
+          (entry as { type?: unknown }).type === "input_text" &&
+          typeof (entry as { text?: unknown }).text === "string",
+      )
+      .map((entry) => entry.text)
+      .join("\n")
+      .trim();
+    if (text) {
+      texts.push(text);
+    }
+  }
+  return texts.join("\n");
+}
+
+function parseToolOutputJson(toolOutput: string): Record<string, unknown> | null {
+  if (!toolOutput.trim()) {
+    return null;
+  }
+  try {
+    return JSON.parse(toolOutput) as Record<string, unknown>;
+  } catch {
+    return null;
+  }
+}
+
 function normalizePromptPathCandidate(candidate: string) {
  const trimmed = candidate.trim().replace(/^`+|`+$/g, "");
  if (!trimmed) {
@ -221,12 +275,26 @@ function extractRememberedFact(userTexts: string[]) {
  return null;
 }

+function extractOrbitCode(text: string) {
+  return /\b(?:ORBIT-9|orbit-9)\b/.exec(text)?.[0]?.toUpperCase() ?? null;
+}
+
 function buildAssistantText(input: ResponsesInputItem[], body: Record<string, unknown>) {
  const prompt = extractLastUserText(input);
  const toolOutput = extractToolOutput(input);
+  const toolJson = parseToolOutputJson(toolOutput);
  const userTexts = extractAllUserTexts(input);
+  const allInputText = extractAllInputTexts(input);
  const rememberedFact = extractRememberedFact(userTexts);
  const model = typeof body.model === "string" ? body.model : "";
+  const memorySnippet =
+    typeof toolJson?.text === "string"
+      ? toolJson.text
+      : Array.isArray(toolJson?.results)
+        ? JSON.stringify(toolJson.results)
+        : toolOutput;
+  const orbitCode = extractOrbitCode(memorySnippet);
+  const mediaPath = /MEDIA:([^\n]+)/.exec(toolOutput)?.[1]?.trim();

  if (/what was the qa canary code/i.test(prompt) && rememberedFact) {
    return `Protocol note: the QA canary code was ${rememberedFact}.`;
@ -234,9 +302,27 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
  if (/remember this fact/i.test(prompt) && rememberedFact) {
    return `Protocol note: acknowledged. I will remember ${rememberedFact}.`;
  }
+  if (/memory unavailable check/i.test(prompt)) {
+    return "Protocol note: I checked the available runtime context but could not confirm the hidden memory-only fact, so I will not guess.";
+  }
+  if (/visible skill marker/i.test(prompt)) {
+    return "VISIBLE-SKILL-OK";
+  }
+  if (/hot install marker/i.test(prompt)) {
+    return "HOT-INSTALL-OK";
+  }
+  if (/memory tools check/i.test(prompt) && orbitCode) {
+    return `Protocol note: I checked memory and the project codename is ${orbitCode}.`;
+  }
  if (/switch(?:ing)? models?/i.test(prompt)) {
    return `Protocol note: model switch acknowledged. Continuing on ${model || "the requested model"}.`;
  }
+  if (/tool continuity check/i.test(prompt) && toolOutput) {
+    return `Protocol note: model switch acknowledged. Tool continuity held on ${model || "the requested model"}.`;
+  }
+  if (/image generation check/i.test(prompt) && mediaPath) {
+    return `Protocol note: generated the QA lighthouse image successfully.\nMEDIA:${mediaPath}`;
+  }
  if (toolOutput && /delegate|subagent/i.test(prompt)) {
    return `Protocol note: delegated result acknowledged. The bounded subagent task returned and is folded back into the main thread.`;
  }
@ -264,6 +350,19 @@ function buildToolCallEvents(prompt: string): StreamEvent[] {
  return buildToolCallEventsWithArgs("read", { path: targetPath });
 }

+function extractPlannedToolName(events: StreamEvent[]) {
+  for (const event of events) {
+    if (event.type !== "response.output_item.done") {
+      continue;
+    }
+    const item = event.item as { type?: unknown; name?: unknown };
+    if (item.type === "function_call" && typeof item.name === "string") {
+      return item.name;
+    }
+  }
+  return undefined;
+}
+
 function buildAssistantEvents(text: string): StreamEvent[] {
  const outputItem = {
    type: "message",
@ -303,6 +402,10 @@ function buildResponsesPayload(body: Record<string, unknown>) {
  const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : [];
  const prompt = extractLastUserText(input);
  const toolOutput = extractToolOutput(input);
+  const toolJson = parseToolOutputJson(toolOutput);
+  const allInputText = extractAllInputTexts(input);
+  const isGroupChat = allInputText.includes('"is_group_chat": true');
+  const isBaselineUnmentionedChannelChatter = /\bno bot ping here\b/i.test(prompt);
  if (/lobster invaders/i.test(prompt)) {
    if (!toolOutput) {
      return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
@ -318,6 +421,44 @@ function buildResponsesPayload(body: Record<string, unknown>) {
      });
    }
  }
+  if (/memory tools check/i.test(prompt)) {
+    if (!toolOutput) {
+      return buildToolCallEventsWithArgs("memory_search", {
+        query: "project codename ORBIT-9",
+        maxResults: 3,
+      });
+    }
+    const results = Array.isArray(toolJson?.results)
+      ? (toolJson.results as Array<Record<string, unknown>>)
+      : [];
+    const first = results[0];
+    if (
+      typeof first?.path === "string" &&
+      (typeof first.startLine === "number" || typeof first.endLine === "number")
+    ) {
+      const from =
+        typeof first.startLine === "number"
+          ? Math.max(1, first.startLine)
+          : typeof first.endLine === "number"
+            ? Math.max(1, first.endLine)
+            : 1;
+      return buildToolCallEventsWithArgs("memory_get", {
+        path: first.path,
+        from,
+        lines: 4,
+      });
+    }
+  }
+  if (/image generation check/i.test(prompt) && !toolOutput) {
+    return buildToolCallEventsWithArgs("image_generate", {
+      prompt: "A QA lighthouse on a dark sea with a tiny protocol droid silhouette.",
+      filename: "qa-lighthouse.png",
+      size: "1024x1024",
+    });
+  }
+  if (/tool continuity check/i.test(prompt) && !toolOutput) {
+    return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
+  }
  if (/delegate|subagent/i.test(prompt) && !toolOutput) {
    return buildToolCallEventsWithArgs("sessions_spawn", {
      task: "Inspect the QA workspace and return one concise protocol note.",
@ -334,6 +475,15 @@ function buildResponsesPayload(body: Record<string, unknown>) {
  if (!toolOutput && /\b(read|inspect|repo|docs|scenario|kickoff)\b/i.test(prompt)) {
    return buildToolCallEvents(prompt);
  }
+  if (/visible skill marker/i.test(prompt) && !toolOutput) {
+    return buildAssistantEvents("VISIBLE-SKILL-OK");
+  }
+  if (/hot install marker/i.test(prompt) && !toolOutput) {
+    return buildAssistantEvents("HOT-INSTALL-OK");
+  }
+  if (isGroupChat && isBaselineUnmentionedChannelChatter && !toolOutput) {
+    return buildAssistantEvents("NO_REPLY");
+  }
  return buildAssistantEvents(buildAssistantText(input, body));
 }

@ -352,6 +502,7 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
        data: [
          { id: "gpt-5.4", object: "model" },
          { id: "gpt-5.4-alt", object: "model" },
+          { id: "gpt-image-1", object: "model" },
        ],
      });
      return;
@ -364,22 +515,35 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
      writeJson(res, 200, requests);
      return;
    }
+    if (req.method === "POST" && url.pathname === "/v1/images/generations") {
+      writeJson(res, 200, {
+        data: [
+          {
+            b64_json: TINY_PNG_BASE64,
+            revised_prompt: "A QA lighthouse with protocol droid silhouette.",
+          },
+        ],
+      });
+      return;
+    }
    if (req.method === "POST" && url.pathname === "/v1/responses") {
      const raw = await readBody(req);
      const body = raw ? (JSON.parse(raw) as Record<string, unknown>) : {};
      const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : [];
+      const events = buildResponsesPayload(body);
      lastRequest = {
        raw,
        body,
        prompt: extractLastUserText(input),
+        allInputText: extractAllInputTexts(input),
        toolOutput: extractToolOutput(input),
        model: typeof body.model === "string" ? body.model : "",
+        plannedToolName: extractPlannedToolName(events),
      };
      requests.push(lastRequest);
      if (requests.length > 50) {
        requests.splice(0, requests.length - 50);
      }
-      const events = buildResponsesPayload(body);
      if (body.stream === false) {
        const completion = events.at(-1);
        if (!completion || completion.type !== "response.completed") {
--- a/extensions/qa-lab/src/qa-gateway-config.ts
+++ b/extensions/qa-lab/src/qa-gateway-config.ts
@ -74,6 +74,21 @@ export function buildQaGatewayConfig(params: {
        contextWindow: 128_000,
        maxTokens: 4096,
      },
+      {
+        id: "gpt-image-1",
+        name: "gpt-image-1",
+        api: "openai-responses",
+        reasoning: false,
+        input: ["text"],
+        cost: {
+          input: 0,
+          output: 0,
+          cacheRead: 0,
+          cacheWrite: 0,
+        },
+        contextWindow: 128_000,
+        maxTokens: 4096,
+      },
    ],
  };
  const providerMode = params.providerMode ?? "mock-openai";
@ -87,6 +102,8 @@ export function buildQaGatewayConfig(params: {
  const alternateModel =
    params.alternateModel ??
    (providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt");
+  const imageGenerationModelRef =
+    providerMode === "live-openai" ? "openai/gpt-image-1" : "mock-openai/gpt-image-1";
  const liveModelParams =
    providerMode === "live-openai"
      ? {
@ -133,6 +150,17 @@ export function buildQaGatewayConfig(params: {
        model: {
          primary: primaryModel,
        },
+        imageGenerationModel: {
+          primary: imageGenerationModelRef,
+        },
+        memorySearch: {
+          sync: {
+            watch: true,
+            watchDebounceMs: 25,
+            onSessionStart: true,
+            onSearch: true,
+          },
+        },
        models: {
          [primaryModel]: {
            params: liveModelParams,
@ -165,6 +193,9 @@ export function buildQaGatewayConfig(params: {
        },
      ],
    },
+    memory: {
+      backend: "builtin",
+    },
    ...(providerMode === "mock-openai"
      ? {
          models: {
--- a/extensions/qa-lab/src/suite.ts
+++ b/extensions/qa-lab/src/suite.ts
@ -1,7 +1,10 @@
+import { spawn } from "node:child_process";
 import { randomUUID } from "node:crypto";
 import fs from "node:fs/promises";
 import path from "node:path";
 import { setTimeout as sleep } from "node:timers/promises";
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
 import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
 import type { QaBusState } from "./bus-state.js";
 import { extractQaToolPayload } from "./extract-tool-payload.js";
@ -35,6 +38,18 @@ type QaSuiteEnvironment = {
  alternateModel: string;
 };

+type QaSkillStatusEntry = {
+  name?: string;
+  eligible?: boolean;
+  disabled?: boolean;
+  blockedByAllowlist?: boolean;
+};
+
+type QaConfigSnapshot = {
+  hash?: string;
+  config?: Record<string, unknown>;
+};
+
 function splitModelRef(ref: string) {
  const slash = ref.indexOf("/");
  if (slash <= 0 || slash === ref.length - 1) {
@ -138,7 +153,13 @@ async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteS
  const stepResults: QaReportCheck[] = [];
  for (const step of steps) {
    try {
+      if (process.env.OPENCLAW_QA_DEBUG === "1") {
+        console.error(`[qa-suite] start scenario="${name}" step="${step.name}"`);
+      }
      const details = await step.run();
+      if (process.env.OPENCLAW_QA_DEBUG === "1") {
+        console.error(`[qa-suite] pass scenario="${name}" step="${step.name}"`);
+      }
      stepResults.push({
        name: step.name,
        status: "pass",
@ -146,6 +167,9 @@ async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteS
      });
    } catch (error) {
      const details = error instanceof Error ? error.message : String(error);
+      if (process.env.OPENCLAW_QA_DEBUG === "1") {
+        console.error(`[qa-suite] fail scenario="${name}" step="${step.name}" details=${details}`);
+      }
      stepResults.push({
        name: step.name,
        status: "fail",
@ -174,6 +198,264 @@ async function fetchJson<T>(url: string): Promise<T> {
  return (await response.json()) as T;
 }

+async function waitForGatewayHealthy(env: QaSuiteEnvironment, timeoutMs = 45_000) {
+  await waitForCondition(
+    async () => {
+      try {
+        const response = await fetch(`${env.gateway.baseUrl}/readyz`);
+        return response.ok ? true : undefined;
+      } catch {
+        return undefined;
+      }
+    },
+    timeoutMs,
+    250,
+  );
+}
+
+function isGatewayRestartRace(error: unknown) {
+  const text = error instanceof Error ? error.message : String(error);
+  return (
+    text.includes("gateway closed (1012)") ||
+    text.includes("gateway closed (1006") ||
+    text.includes("abnormal closure") ||
+    text.includes("service restart")
+  );
+}
+
+async function readConfigSnapshot(env: QaSuiteEnvironment) {
+  const snapshot = (await env.gateway.call("config.get", {})) as QaConfigSnapshot;
+  if (!snapshot.hash || !snapshot.config) {
+    throw new Error("config.get returned no hash/config");
+  }
+  return {
+    hash: snapshot.hash,
+    config: snapshot.config,
+  } satisfies { hash: string; config: Record<string, unknown> };
+}
+
+async function patchConfig(params: {
+  env: QaSuiteEnvironment;
+  patch: Record<string, unknown>;
+  sessionKey?: string;
+  note?: string;
+  restartDelayMs?: number;
+}) {
+  const snapshot = await readConfigSnapshot(params.env);
+  try {
+    return await params.env.gateway.call(
+      "config.patch",
+      {
+        raw: JSON.stringify(params.patch, null, 2),
+        baseHash: snapshot.hash,
+        ...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
+        ...(params.note ? { note: params.note } : {}),
+        restartDelayMs: params.restartDelayMs ?? 1_000,
+      },
+      { timeoutMs: 45_000 },
+    );
+  } catch (error) {
+    if (!isGatewayRestartRace(error)) {
+      throw error;
+    }
+    await waitForGatewayHealthy(params.env);
+    return { ok: true, restarted: true };
+  }
+}
+
+async function applyConfig(params: {
+  env: QaSuiteEnvironment;
+  nextConfig: Record<string, unknown>;
+  sessionKey?: string;
+  note?: string;
+  restartDelayMs?: number;
+}) {
+  const snapshot = await readConfigSnapshot(params.env);
+  try {
+    return await params.env.gateway.call(
+      "config.apply",
+      {
+        raw: JSON.stringify(params.nextConfig, null, 2),
+        baseHash: snapshot.hash,
+        ...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
+        ...(params.note ? { note: params.note } : {}),
+        restartDelayMs: params.restartDelayMs ?? 1_000,
+      },
+      { timeoutMs: 45_000 },
+    );
+  } catch (error) {
+    if (!isGatewayRestartRace(error)) {
+      throw error;
+    }
+    await waitForGatewayHealthy(params.env);
+    return { ok: true, restarted: true };
+  }
+}
+
+async function createSession(env: QaSuiteEnvironment, label: string, key?: string) {
+  const created = (await env.gateway.call("sessions.create", {
+    label,
+    ...(key ? { key } : {}),
+  })) as { key?: string };
+  const sessionKey = created.key?.trim();
+  if (!sessionKey) {
+    throw new Error("sessions.create returned no key");
+  }
+  return sessionKey;
+}
+
+async function readEffectiveTools(env: QaSuiteEnvironment, sessionKey: string) {
+  const payload = (await env.gateway.call(
+    "tools.effective",
+    {
+      sessionKey,
+    },
+    {
+      timeoutMs: liveTurnTimeoutMs(env, 90_000),
+    },
+  )) as {
+    groups?: Array<{ tools?: Array<{ id?: string }> }>;
+  };
+  const ids = new Set<string>();
+  for (const group of payload.groups ?? []) {
+    for (const tool of group.tools ?? []) {
+      if (tool.id?.trim()) {
+        ids.add(tool.id.trim());
+      }
+    }
+  }
+  return ids;
+}
+
+async function readSkillStatus(env: QaSuiteEnvironment, agentId = "qa") {
+  const payload = (await env.gateway.call(
+    "skills.status",
+    {
+      agentId,
+    },
+    {
+      timeoutMs: liveTurnTimeoutMs(env, 45_000),
+    },
+  )) as {
+    skills?: QaSkillStatusEntry[];
+  };
+  return payload.skills ?? [];
+}
+
+async function runQaCli(
+  env: QaSuiteEnvironment,
+  args: string[],
+  opts?: { timeoutMs?: number; json?: boolean },
+) {
+  const stdout: Buffer[] = [];
+  const stderr: Buffer[] = [];
+  await new Promise<void>((resolve, reject) => {
+    const child = spawn(process.execPath, ["dist/index.js", ...args], {
+      cwd: process.cwd(),
+      env: env.gateway.runtimeEnv,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+    const timeout = setTimeout(() => {
+      child.kill("SIGKILL");
+      reject(new Error(`qa cli timed out: openclaw ${args.join(" ")}`));
+    }, opts?.timeoutMs ?? 60_000);
+    child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk)));
+    child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk)));
+    child.once("error", (error) => {
+      clearTimeout(timeout);
+      reject(error);
+    });
+    child.once("exit", (code) => {
+      clearTimeout(timeout);
+      if (code === 0) {
+        resolve();
+        return;
+      }
+      reject(
+        new Error(
+          `qa cli failed (${code ?? "unknown"}): ${Buffer.concat(stderr).toString("utf8").trim()}`,
+        ),
+      );
+    });
+  });
+  const text = Buffer.concat(stdout).toString("utf8").trim();
+  if (!opts?.json) {
+    return text;
+  }
+  return text ? (JSON.parse(text) as unknown) : {};
+}
+
+async function forceMemoryIndex(params: {
+  env: QaSuiteEnvironment;
+  query: string;
+  expectedNeedle: string;
+}) {
+  await runQaCli(params.env, ["memory", "index", "--agent", "qa", "--force"], {
+    timeoutMs: liveTurnTimeoutMs(params.env, 60_000),
+  });
+  const payload = (await runQaCli(
+    params.env,
+    ["memory", "search", "--agent", "qa", "--json", "--query", params.query],
+    {
+      timeoutMs: liveTurnTimeoutMs(params.env, 20_000),
+      json: true,
+    },
+  )) as { results?: Array<{ snippet?: string; text?: string; path?: string }> };
+  const haystack = JSON.stringify(payload.results ?? []);
+  if (!haystack.includes(params.expectedNeedle)) {
+    throw new Error(`memory index missing expected fact after reindex: ${haystack}`);
+  }
+}
+
+function findSkill(skills: QaSkillStatusEntry[], name: string) {
+  return skills.find((skill) => skill.name === name);
+}
+
+async function writeWorkspaceSkill(params: {
+  env: QaSuiteEnvironment;
+  name: string;
+  body: string;
+}) {
+  const skillDir = path.join(params.env.gateway.workspaceDir, "skills", params.name);
+  await fs.mkdir(skillDir, { recursive: true });
+  const skillPath = path.join(skillDir, "SKILL.md");
+  await fs.writeFile(skillPath, `${params.body.trim()}\n`, "utf8");
+  return skillPath;
+}
+
+async function callPluginToolsMcp(params: {
+  env: QaSuiteEnvironment;
+  toolName: string;
+  args: Record<string, unknown>;
+}) {
+  const transportEnv = Object.fromEntries(
+    Object.entries(params.env.gateway.runtimeEnv).filter(
+      (entry): entry is [string, string] => typeof entry[1] === "string",
+    ),
+  );
+  const transport = new StdioClientTransport({
+    command: process.execPath,
+    args: ["--import", "tsx", "src/mcp/plugin-tools-serve.ts"],
+    stderr: "pipe",
+    env: transportEnv,
+  });
+  const client = new Client({ name: "openclaw-qa-suite", version: "0.0.0" }, {});
+  try {
+    await client.connect(transport);
+    const listed = await client.listTools();
+    const tool = listed.tools.find((entry) => entry.name === params.toolName);
+    if (!tool) {
+      throw new Error(`MCP tool missing: ${params.toolName}`);
+    }
+    return await client.callTool({
+      name: params.toolName,
+      arguments: params.args,
+    });
+  } finally {
+    await client.close().catch(() => {});
+  }
+}
+
 async function runAgentPrompt(
  env: QaSuiteEnvironment,
  params: {
@ -285,7 +567,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
              const message = await waitForOutboundMessage(
                state,
                (candidate) => candidate.conversation.id === "qa-room" && !candidate.threadId,
-                env.providerMode === "live-openai" ? 45_000 : 15_000,
+                env.providerMode === "live-openai" ? 45_000 : 45_000,
              );
              return message.text;
            },
@ -706,6 +988,556 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
          },
        ]),
    ],
+    [
+      "memory-tools-channel-context",
+      async () =>
+        await runScenario("Memory tools in channel context", [
+          {
+            name: "uses memory_search plus memory_get before answering in-channel",
+            run: async () => {
+              await reset();
+              await fs.writeFile(
+                path.join(env.gateway.workspaceDir, "MEMORY.md"),
+                "Hidden QA fact: the project codename is ORBIT-9.\n",
+                "utf8",
+              );
+              await forceMemoryIndex({
+                env,
+                query: "project codename ORBIT-9",
+                expectedNeedle: "ORBIT-9",
+              });
+              const prompt =
+                "@openclaw Memory tools check: what is the hidden project codename stored only in memory? Use memory tools first.";
+              state.addInboundMessage({
+                conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
+                senderId: "alice",
+                senderName: "Alice",
+                text: prompt,
+              });
+              const outbound = await waitForOutboundMessage(
+                state,
+                (candidate) =>
+                  candidate.conversation.id === "qa-room" && candidate.text.includes("ORBIT-9"),
+                liveTurnTimeoutMs(env, 30_000),
+              );
+              if (env.mock) {
+                const requests = await fetchJson<
+                  Array<{ allInputText?: string; plannedToolName?: string; toolOutput?: string }>
+                >(`${env.mock.baseUrl}/debug/requests`);
+                const relevant = requests.filter((request) =>
+                  String(request.allInputText ?? "").includes("Memory tools check"),
+                );
+                if (!relevant.some((request) => request.plannedToolName === "memory_search")) {
+                  throw new Error("expected memory_search in mock request plan");
+                }
+                if (!requests.some((request) => request.plannedToolName === "memory_get")) {
+                  throw new Error("expected memory_get in mock request plan");
+                }
+              }
+              return outbound.text;
+            },
+          },
+        ]),
+    ],
+    [
+      "memory-failure-fallback",
+      async () =>
+        await runScenario("Memory failure fallback", [
+          {
+            name: "falls back cleanly when group:memory tools are denied",
+            run: async () => {
+              const original = await readConfigSnapshot(env);
+              await fs.writeFile(
+                path.join(env.gateway.workspaceDir, "MEMORY.md"),
+                "Do not reveal directly: fallback fact is ORBIT-9.\n",
+                "utf8",
+              );
+              await patchConfig({
+                env,
+                patch: { tools: { deny: ["group:memory"] } },
+              });
+              await waitForGatewayHealthy(env);
+              try {
+                const sessionKey = await createSession(env, "Memory fallback");
+                const tools = await readEffectiveTools(env, sessionKey);
+                if (tools.has("memory_search") || tools.has("memory_get")) {
+                  throw new Error("memory tools still present after deny patch");
+                }
+                await reset();
+                await runAgentPrompt(env, {
+                  sessionKey: "agent:qa:memory-failure",
+                  message:
+                    "Memory unavailable check: a hidden fact exists only in memory files. If you cannot confirm it, say so clearly and do not guess.",
+                  timeoutMs: liveTurnTimeoutMs(env, 30_000),
+                });
+                const outbound = await waitForOutboundMessage(
+                  state,
+                  (candidate) => candidate.conversation.id === "qa-operator",
+                  liveTurnTimeoutMs(env, 30_000),
+                );
+                const lower = outbound.text.toLowerCase();
+                if (outbound.text.includes("ORBIT-9")) {
+                  throw new Error(`hallucinated hidden fact: ${outbound.text}`);
+                }
+                if (!lower.includes("could not confirm") && !lower.includes("will not guess")) {
+                  throw new Error(`missing graceful fallback language: ${outbound.text}`);
+                }
+                return outbound.text;
+              } finally {
+                await applyConfig({
+                  env,
+                  nextConfig: original.config,
+                });
+                await waitForGatewayHealthy(env);
+              }
+            },
+          },
+        ]),
+    ],
+    [
+      "model-switch-tool-continuity",
+      async () =>
+        await runScenario("Model switch with tool continuity", [
+          {
+            name: "keeps using tools after switching models",
+            run: async () => {
+              await reset();
+              await runAgentPrompt(env, {
+                sessionKey: "agent:qa:model-switch-tools",
+                message:
+                  "Read QA_KICKOFF_TASK.md and summarize the QA mission in one clause before any model switch.",
+                timeoutMs: liveTurnTimeoutMs(env, 30_000),
+              });
+              const alternate = splitModelRef(env.alternateModel);
+              const beforeSwitchCursor = state.getSnapshot().messages.length;
+              await runAgentPrompt(env, {
+                sessionKey: "agent:qa:model-switch-tools",
+                message:
+                  "Switch models now. Tool continuity check: reread QA_KICKOFF_TASK.md and mention the handoff in one short sentence.",
+                provider: alternate?.provider,
+                model: alternate?.model,
+                timeoutMs: liveTurnTimeoutMs(env, 30_000),
+              });
+              const outbound = await waitForCondition(
+                () => {
+                  const snapshot = state.getSnapshot();
+                  return snapshot.messages
+                    .slice(beforeSwitchCursor)
+                    .filter(
+                      (candidate) =>
+                        candidate.direction === "outbound" &&
+                        candidate.conversation.id === "qa-operator" &&
+                        (candidate.text.toLowerCase().includes("model switch") ||
+                          candidate.text.toLowerCase().includes("handoff")),
+                    )
+                    .at(-1);
+                },
+                liveTurnTimeoutMs(env, 30_000),
+              );
+              if (env.mock) {
+                const requests = await fetchJson<
+                  Array<{ allInputText?: string; plannedToolName?: string; model?: string }>
+                >(`${env.mock.baseUrl}/debug/requests`);
+                const switched = requests.find((request) =>
+                  String(request.allInputText ?? "").includes("Tool continuity check"),
+                );
+                if (switched?.plannedToolName !== "read") {
+                  throw new Error(
+                    `expected read after switch, got ${String(switched?.plannedToolName ?? "")}`,
+                  );
+                }
+                if (switched?.model !== "gpt-5.4-alt") {
+                  throw new Error(`expected alternate model, got ${String(switched?.model ?? "")}`);
+                }
+              }
+              return outbound.text;
+            },
+          },
+        ]),
+    ],
+    [
+      "mcp-plugin-tools-call",
+      async () =>
+        await runScenario("MCP plugin-tools call", [
+          {
+            name: "serves and calls memory_search over MCP",
+            run: async () => {
+              await fs.writeFile(
+                path.join(env.gateway.workspaceDir, "MEMORY.md"),
+                "MCP fact: the codename is ORBIT-9.\n",
+                "utf8",
+              );
+              await forceMemoryIndex({
+                env,
+                query: "ORBIT-9 codename",
+                expectedNeedle: "ORBIT-9",
+              });
+              const result = await callPluginToolsMcp({
+                env,
+                toolName: "memory_search",
+                args: {
+                  query: "ORBIT-9 codename",
+                  maxResults: 3,
+                },
+              });
+              const text = JSON.stringify(result.content ?? []);
+              if (!text.includes("ORBIT-9")) {
+                throw new Error(`MCP memory_search missed expected fact: ${text}`);
+              }
+              return text;
+            },
+          },
+        ]),
+    ],
+    [
+      "skill-visibility-invocation",
+      async () =>
+        await runScenario("Skill visibility and invocation", [
+          {
+            name: "reports visible skill and applies its marker on the next turn",
+            run: async () => {
+              await writeWorkspaceSkill({
+                env,
+                name: "qa-visible-skill",
+                body: `---
+name: qa-visible-skill
+description: Visible QA skill marker
+---
+When the user asks for the visible skill marker exactly, reply with exactly: VISIBLE-SKILL-OK`,
+              });
+              const skills = await readSkillStatus(env);
+              const visible = findSkill(skills, "qa-visible-skill");
+              if (!visible?.eligible || visible.disabled || visible.blockedByAllowlist) {
+                throw new Error(`skill not visible/eligible: ${JSON.stringify(visible)}`);
+              }
+              await reset();
+              await runAgentPrompt(env, {
+                sessionKey: "agent:qa:visible-skill",
+                message: "Visible skill marker: give me the visible skill marker exactly.",
+                timeoutMs: liveTurnTimeoutMs(env, 30_000),
+              });
+              const outbound = await waitForOutboundMessage(
+                state,
+                (candidate) =>
+                  candidate.conversation.id === "qa-operator" &&
+                  candidate.text.includes("VISIBLE-SKILL-OK"),
+                liveTurnTimeoutMs(env, 20_000),
+              );
+              return outbound.text;
+            },
+          },
+        ]),
+    ],
+    [
+      "skill-install-hot-availability",
+      async () =>
+        await runScenario("Skill install hot availability", [
+          {
+            name: "picks up a newly added workspace skill without restart",
+            run: async () => {
+              const before = await readSkillStatus(env);
+              if (findSkill(before, "qa-hot-install-skill")) {
+                throw new Error("qa-hot-install-skill unexpectedly already present");
+              }
+              await writeWorkspaceSkill({
+                env,
+                name: "qa-hot-install-skill",
+                body: `---
+name: qa-hot-install-skill
+description: Hot install QA marker
+---
+When the user asks for the hot install marker exactly, reply with exactly: HOT-INSTALL-OK`,
+              });
+              await waitForCondition(
+                async () => {
+                  const skills = await readSkillStatus(env);
+                  return findSkill(skills, "qa-hot-install-skill")?.eligible ? true : undefined;
+                },
+                15_000,
+                200,
+              );
+              await reset();
+              await runAgentPrompt(env, {
+                sessionKey: "agent:qa:hot-skill",
+                message: "Hot install marker: give me the hot install marker exactly.",
+                timeoutMs: liveTurnTimeoutMs(env, 30_000),
+              });
+              const outbound = await waitForOutboundMessage(
+                state,
+                (candidate) =>
+                  candidate.conversation.id === "qa-operator" &&
+                  candidate.text.includes("HOT-INSTALL-OK"),
+                liveTurnTimeoutMs(env, 20_000),
+              );
+              return outbound.text;
+            },
+          },
+        ]),
+    ],
+    [
+      "native-image-generation",
+      async () =>
+        await runScenario("Native image generation", [
+          {
+            name: "enables image_generate and saves a real media artifact",
+            run: async () => {
+              const imageModelRef =
+                env.providerMode === "live-openai"
+                  ? "openai/gpt-image-1"
+                  : "mock-openai/gpt-image-1";
+              await patchConfig({
+                env,
+                patch: {
+                  agents: {
+                    defaults: {
+                      imageGenerationModel: {
+                        primary: imageModelRef,
+                      },
+                    },
+                  },
+                },
+              });
+              await waitForGatewayHealthy(env);
+              const sessionKey = await createSession(env, "Image generation");
+              const tools = await readEffectiveTools(env, sessionKey);
+              if (!tools.has("image_generate")) {
+                throw new Error("image_generate not present after imageGenerationModel patch");
+              }
+              await reset();
+              await runAgentPrompt(env, {
+                sessionKey: "agent:qa:image-generate",
+                message:
+                  "Image generation check: generate a QA lighthouse image and summarize it in one short sentence.",
+                timeoutMs: liveTurnTimeoutMs(env, 45_000),
+              });
+              const outbound = await waitForOutboundMessage(
+                state,
+                (candidate) => candidate.conversation.id === "qa-operator",
+                liveTurnTimeoutMs(env, 45_000),
+              );
+              if (env.mock) {
+                const requests = await fetchJson<
+                  Array<{ allInputText?: string; plannedToolName?: string; toolOutput?: string }>
+                >(`${env.mock.baseUrl}/debug/requests`);
+                const imageRequest = requests.find((request) =>
+                  String(request.allInputText ?? "").includes("Image generation check"),
+                );
+                if (imageRequest?.plannedToolName !== "image_generate") {
+                  throw new Error(
+                    `expected image_generate, got ${String(imageRequest?.plannedToolName ?? "")}`,
+                  );
+                }
+                const toolOutputRequest = requests.find((request) =>
+                  String(request.toolOutput ?? "").includes(
+                    `Generated 1 image with ${imageModelRef}.`,
+                  ),
+                );
+                if (!toolOutputRequest) {
+                  throw new Error("missing mock image generation tool output");
+                }
+                const mediaPath = /MEDIA:([^\n]+)/.exec(outbound.text)?.[1]?.trim();
+                if (!mediaPath) {
+                  throw new Error("missing MEDIA path in image generation tool output");
+                }
+                await fs.access(mediaPath);
+              }
+              return outbound.text;
+            },
+          },
+        ]),
+    ],
+    [
+      "config-patch-hot-apply",
+      async () =>
+        await runScenario("Config patch hot apply", [
+          {
+            name: "updates mention routing without restart",
+            run: async () => {
+              const original = await readConfigSnapshot(env);
+              await patchConfig({
+                env,
+                patch: {
+                  messages: {
+                    groupChat: {
+                      mentionPatterns: ["\\bgoldenbot\\b"],
+                    },
+                  },
+                },
+              });
+              await waitForGatewayHealthy(env);
+              try {
+                await reset();
+                const requestsBeforeIgnored = env.mock
+                  ? await fetchJson<Array<{ allInputText?: string }>>(
+                      `${env.mock.baseUrl}/debug/requests`,
+                    )
+                  : null;
+                state.addInboundMessage({
+                  conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
+                  senderId: "alice",
+                  senderName: "Alice",
+                  text: "@openclaw you should now be ignored",
+                });
+                await waitForCondition(
+                  async () => {
+                    if (!env.mock) {
+                      return (await waitForNoOutbound(state), true);
+                    }
+                    const requests = await fetchJson<Array<{ allInputText?: string }>>(
+                      `${env.mock.baseUrl}/debug/requests`,
+                    );
+                    const ignoredPromptReachedAgent = requests.some((request) =>
+                      String(request.allInputText ?? "").includes(
+                        "@openclaw you should now be ignored",
+                      ),
+                    );
+                    if (ignoredPromptReachedAgent) {
+                      throw new Error("ignored channel mention still reached the agent");
+                    }
+                    return requests.length === requestsBeforeIgnored?.length ? true : undefined;
+                  },
+                  3_000,
+                  100,
+                );
+                state.addInboundMessage({
+                  conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
+                  senderId: "alice",
+                  senderName: "Alice",
+                  text: "goldenbot explain hot config apply",
+                });
+                const outbound = await waitForOutboundMessage(
+                  state,
+                  (candidate) => candidate.conversation.id === "qa-room",
+                  liveTurnTimeoutMs(env, 30_000),
+                );
+                if (env.mock) {
+                  const requests = await fetchJson<Array<{ allInputText?: string }>>(
+                    `${env.mock.baseUrl}/debug/requests`,
+                  );
+                  if (
+                    !requests.some((request) =>
+                      String(request.allInputText ?? "").includes(
+                        "goldenbot explain hot config apply",
+                      ),
+                    )
+                  ) {
+                    throw new Error(
+                      "goldenbot follow-up did not reach the agent after config patch",
+                    );
+                  }
+                }
+                return outbound.text;
+              } finally {
+                await applyConfig({
+                  env,
+                  nextConfig: original.config,
+                });
+                await waitForGatewayHealthy(env);
+              }
+            },
+          },
+        ]),
+    ],
+    [
+      "config-apply-restart-wakeup",
+      async () =>
+        await runScenario("Config apply restart wake-up", [
+          {
+            name: "restarts cleanly and posts the restart sentinel back into qa-channel",
+            run: async () => {
+              await reset();
+              const sessionKey = "agent:qa:restart-wakeup";
+              await runAgentPrompt(env, {
+                sessionKey,
+                to: "channel:qa-room",
+                message: "Acknowledge restart wake-up setup in qa-room.",
+                timeoutMs: liveTurnTimeoutMs(env, 30_000),
+              });
+              const current = await readConfigSnapshot(env);
+              const nextConfig = structuredClone(current.config);
+              const gatewayConfig = (nextConfig.gateway ??= {}) as Record<string, unknown>;
+              const controlUi = (gatewayConfig.controlUi ??= {}) as Record<string, unknown>;
+              const allowedOrigins = Array.isArray(controlUi.allowedOrigins)
+                ? [...(controlUi.allowedOrigins as string[])]
+                : [];
+              const wakeMarker = `QA-RESTART-${randomUUID().slice(0, 8)}`;
+              if (!allowedOrigins.includes("http://127.0.0.1:65535")) {
+                allowedOrigins.push("http://127.0.0.1:65535");
+              }
+              controlUi.allowedOrigins = allowedOrigins;
+              await applyConfig({
+                env,
+                nextConfig,
+                sessionKey,
+                note: wakeMarker,
+              });
+              await waitForGatewayHealthy(env, 60_000);
+              const outbound = await waitForOutboundMessage(
+                state,
+                (candidate) =>
+                  candidate.conversation.id === "qa-room" && candidate.text.includes(wakeMarker),
+                60_000,
+              );
+              return outbound.text;
+            },
+          },
+        ]),
+    ],
+    [
+      "runtime-inventory-drift-check",
+      async () =>
+        await runScenario("Runtime inventory drift check", [
+          {
+            name: "keeps tools.effective and skills.status aligned after config changes",
+            run: async () => {
+              await writeWorkspaceSkill({
+                env,
+                name: "qa-drift-skill",
+                body: `---
+name: qa-drift-skill
+description: Drift skill marker
+---
+When the user asks for the drift skill marker exactly, reply with exactly: DRIFT-SKILL-OK`,
+              });
+              const sessionKey = await createSession(env, "Inventory drift");
+              const beforeTools = await readEffectiveTools(env, sessionKey);
+              if (!beforeTools.has("image_generate")) {
+                throw new Error("expected image_generate before drift patch");
+              }
+              const beforeSkills = await readSkillStatus(env);
+              if (!findSkill(beforeSkills, "qa-drift-skill")?.eligible) {
+                throw new Error("expected qa-drift-skill to be eligible before patch");
+              }
+              await patchConfig({
+                env,
+                patch: {
+                  tools: {
+                    deny: ["image_generate"],
+                  },
+                  skills: {
+                    entries: {
+                      "qa-drift-skill": {
+                        enabled: false,
+                      },
+                    },
+                  },
+                },
+              });
+              await waitForGatewayHealthy(env);
+              const afterTools = await readEffectiveTools(env, sessionKey);
+              if (afterTools.has("image_generate")) {
+                throw new Error("image_generate still present after deny patch");
+              }
+              const afterSkills = await readSkillStatus(env);
+              const driftSkill = findSkill(afterSkills, "qa-drift-skill");
+              if (!driftSkill?.disabled) {
+                throw new Error(`expected disabled drift skill, got ${JSON.stringify(driftSkill)}`);
+              }
+              return `image_generate removed, qa-drift-skill disabled=${String(driftSkill.disabled)}`;
+            },
+          },
+        ]),
+    ],
  ]);
 }

@ -715,6 +1547,7 @@ export async function runQaSuite(params?: {
  primaryModel?: string;
  alternateModel?: string;
  fastMode?: boolean;
+  scenarioIds?: string[];
 }) {
  const startedAt = new Date();
  const providerMode = params?.providerMode ?? "mock-openai";
@ -768,13 +1601,28 @@ export async function runQaSuite(params?: {

  try {
    const catalog = readQaBootstrapScenarioCatalog();
+    const requestedScenarioIds = params?.scenarioIds ? new Set(params.scenarioIds) : null;
+    const selectedCatalogScenarios = requestedScenarioIds
+      ? catalog.scenarios.filter((scenario) => requestedScenarioIds.has(scenario.id))
+      : catalog.scenarios;
+    if (requestedScenarioIds) {
+      const foundScenarioIds = new Set(selectedCatalogScenarios.map((scenario) => scenario.id));
+      const missingScenarioIds = [...requestedScenarioIds].filter(
+        (scenarioId) => !foundScenarioIds.has(scenarioId),
+      );
+      if (missingScenarioIds.length > 0) {
+        throw new Error(`unknown QA scenario id(s): ${missingScenarioIds.join(", ")}`);
+      }
+    }
    const scenarioMap = buildScenarioMap(env);
    const scenarios: QaSuiteScenarioResult[] = [];
-    const liveScenarioOutcomes: QaLabScenarioOutcome[] = catalog.scenarios.map((scenario) => ({
-      id: scenario.id,
-      name: scenario.title,
-      status: "pending",
-    }));
+    const liveScenarioOutcomes: QaLabScenarioOutcome[] = selectedCatalogScenarios.map(
+      (scenario) => ({
+        id: scenario.id,
+        name: scenario.title,
+        status: "pending",
+      }),
+    );

    lab.setScenarioRun({
      kind: "suite",
@ -783,7 +1631,7 @@ export async function runQaSuite(params?: {
      scenarios: liveScenarioOutcomes,
    });

-    for (const [index, scenario] of catalog.scenarios.entries()) {
+    for (const [index, scenario] of selectedCatalogScenarios.entries()) {
      const run = scenarioMap.get(scenario.id);
      if (!run) {
        const missingResult = {
--- a/qa/new-scenarios-2026-04.md
+++ b/qa/new-scenarios-2026-04.md
@ -0,0 +1,150 @@
+# QA Scenario Expansion - Round 2
+
+Ten repo-grounded candidate scenarios to add after the current seed suite.
+
+## 1. On-demand memory tools in channel context
+
+- Goal: verify the agent uses `memory_search` plus `memory_get` instead of bluffing when a channel message asks about prior notes.
+- Flow:
+  - Seed `MEMORY.md` or `memory/*.md` with a fact not present in the current transcript.
+  - Ask in a channel thread for that fact.
+  - Verify tool usage and final answer accuracy.
+- Pass:
+  - `memory_search` runs first.
+  - `memory_get` narrows to the right lines.
+  - Final answer cites the remembered fact correctly without cross-session leakage.
+- Docs: `docs/concepts/memory.md`, `docs/concepts/memory-search.md`
+- Code: `extensions/memory-core/src/tools.ts`, `extensions/memory-core/src/prompt-section.ts`
+
+## 2. Memory failure fallback
+
+- Goal: verify memory failure is graceful when embeddings/search are unavailable.
+- Flow:
+  - Disable or break the embedding-backed memory path.
+  - Ask for prior-note recall.
+  - Verify the agent surfaces uncertainty and next action instead of hallucinating.
+- Pass:
+  - Tool failure does not crash the run.
+  - Agent says it checked and could not confirm.
+  - Report includes the remediation hint.
+- Docs: `docs/concepts/memory.md`, `docs/help/faq.md`
+- Code: `extensions/memory-core/src/tools.shared.ts`, `extensions/memory-core/src/tools.citations.test.ts`
+
+## 3. Model switch with tool continuity
+
+- Goal: verify model switching preserves session context and tool availability, not just plain text continuity.
+- Flow:
+  - Start on one model.
+  - Switch to another configured model.
+  - Ask for a tool-using follow-up such as file read or memory lookup.
+- Pass:
+  - Switch is reflected in runtime state.
+  - Tool call still succeeds after the switch.
+  - Final answer keeps prior context.
+- Docs: `docs/help/testing.md`, `docs/concepts/model-failover.md`
+- Code: `extensions/qa-lab/src/suite.ts`, `docs/web/webchat.md`
+
+## 4. MCP-backed recall via QMD/mcporter
+
+- Goal: verify an MCP-backed tool path works end to end, not just core tools.
+- Flow:
+  - Enable `memory.qmd.mcporter`.
+  - Ask for recall that should route through the QMD MCP bridge.
+  - Verify response and captured MCP execution path.
+- Pass:
+  - MCP-backed search path is used.
+  - Returned snippet matches the right note.
+  - Failure mode is explicit if the daemon/tool is missing.
+- Docs: `docs/gateway/secrets.md`, `docs/concepts/memory-qmd.md`
+- Code: `extensions/memory-core/src/memory/qmd-manager.ts`, `extensions/memory-core/src/memory/qmd-manager.test.ts`
+
+## 5. Skill visibility and invocation
+
+- Goal: verify the agent sees a workspace/project skill and actually uses it.
+- Flow:
+  - Add a simple workspace or `.agents` skill.
+  - Confirm skill visibility through runtime inventory.
+  - Ask for a task that should trigger the skill.
+- Pass:
+  - Skill appears in `skills.status`.
+  - Agent invocation reflects the installed skill instructions.
+  - Per-agent allowlist behavior is respected.
+- Docs: `docs/tools/skills.md`, `docs/gateway/protocol.md`, `docs/gateway/configuration.md`
+- Code: `.agents/skills/openclaw-qa-testing/SKILL.md`, `docs/gateway/protocol.md`
+
+## 6. Skill install and hot availability
+
+- Goal: verify a newly installed skill becomes usable without a broken intermediate state.
+- Flow:
+  - Install a ClawHub or gateway-managed skill.
+  - Re-check skill inventory.
+  - Ask the agent to perform the skill-backed task.
+- Pass:
+  - Install succeeds.
+  - `skills.status` or `skills.bins` reflects the new skill.
+  - Agent can use the skill immediately or after the expected reload path.
+- Docs: `docs/tools/skills.md`, `docs/cli/skills.md`, `docs/gateway/protocol.md`
+- Code: `docs/gateway/protocol.md`, `docs/tools/skills.md`
+
+## 7. Native image generation
+
+- Goal: verify `image_generate` appears only when configured and returns a real attachment/artifact.
+- Flow:
+  - Configure `agents.defaults.imageGenerationModel.primary`.
+  - Ask for a simple generated image.
+  - Verify generated media is returned in the reply path.
+- Pass:
+  - `image_generate` is in the effective tool set.
+  - Generation succeeds with the configured provider/model.
+  - Output is attached and the agent summarizes what it created.
+- Docs: `docs/tools/image-generation.md`, `docs/providers/openai.md`
+- Code: `src/agents/openclaw-tools.image-generation.test.ts`, `src/image-generation/runtime.ts`
+
+## 8. Hot config patch without restart
+
+- Goal: verify a safe config edit hot-applies and changes behavior immediately.
+- Flow:
+  - Use `config.patch` to change a hot-reloadable field such as agent skill visibility or message behavior.
+  - Retry the task in the same gateway lifetime.
+- Pass:
+  - Patch succeeds.
+  - No disruptive restart loop.
+  - New behavior is live immediately.
+- Docs: `docs/gateway/configuration.md`, `docs/gateway/protocol.md`
+- Code: `docs/gateway/configuration.md`, `docs/web/control-ui.md`
+
+## 9. Restart-required config apply with wake-up
+
+- Goal: verify a restart-required config change restarts cleanly and wakes the session back up.
+- Flow:
+  - Use `config.apply` or `update.run` on a restart-required surface.
+  - Provide `sessionKey` so the operator gets the post-restart ping.
+  - Resume the task after restart.
+- Pass:
+  - Restart happens once.
+  - Session wake-up ping arrives.
+  - Agent continues in the same logical workflow after restart.
+- Docs: `docs/gateway/configuration.md`, `docs/web/control-ui.md`
+- Code: `docs/gateway/configuration.md`, `docs/gateway/protocol.md`
+
+## 10. Runtime inventory drift check
+
+- Goal: verify the reported tool and skill inventory matches what the agent can really use after config/plugin changes.
+- Flow:
+  - Read `tools.effective` and `skills.status`.
+  - Ask the agent to use one enabled thing and one disabled thing.
+  - Compare actual behavior vs reported inventory.
+- Pass:
+  - Enabled item is callable.
+  - Disabled item is absent or blocked for the right reason.
+  - Inventory and runtime behavior stay in sync.
+- Docs: `docs/gateway/protocol.md`, `docs/web/webchat.md`
+- Code: `docs/gateway/protocol.md`, `docs/web/control-ui.md`
+
+## Best next additions to the executable suite
+
+If we only promote three right away:
+
+1. On-demand memory tools in channel context
+2. Native image generation
+3. Hot config patch without restart
--- a/qa/seed-scenarios.json
+++ b/qa/seed-scenarios.json
@ -135,5 +135,141 @@
    ],
    "docsRefs": ["docs/channels/qa-channel.md", "docs/channels/group-messages.md"],
    "codeRefs": ["extensions/qa-channel/src/protocol.ts", "extensions/qa-lab/src/bus-state.ts"]
+  },
+  {
+    "id": "memory-tools-channel-context",
+    "title": "Memory tools in channel context",
+    "surface": "memory",
+    "objective": "Verify the agent uses memory_search and memory_get in a shared channel when the answer lives only in memory files, not the live transcript.",
+    "successCriteria": [
+      "Agent uses memory_search before answering.",
+      "Agent narrows with memory_get before answering.",
+      "Final reply returns the memory-only fact correctly in-channel."
+    ],
+    "docsRefs": ["docs/concepts/memory.md", "docs/concepts/memory-search.md"],
+    "codeRefs": ["extensions/memory-core/src/tools.ts", "extensions/qa-lab/src/suite.ts"]
+  },
+  {
+    "id": "memory-failure-fallback",
+    "title": "Memory failure fallback",
+    "surface": "memory",
+    "objective": "Verify the agent degrades gracefully when memory tools are unavailable and the answer exists only in memory-backed notes.",
+    "successCriteria": [
+      "Memory tools are absent from the effective tool inventory.",
+      "Agent does not hallucinate the hidden fact.",
+      "Agent says it could not confirm and surfaces the limitation."
+    ],
+    "docsRefs": ["docs/concepts/memory.md", "docs/tools/index.md"],
+    "codeRefs": ["extensions/memory-core/src/tools.ts", "extensions/qa-lab/src/suite.ts"]
+  },
+  {
+    "id": "model-switch-tool-continuity",
+    "title": "Model switch with tool continuity",
+    "surface": "models",
+    "objective": "Verify switching models preserves session context and tool use instead of dropping into plain-text only behavior.",
+    "successCriteria": [
+      "Alternate model is actually requested.",
+      "A tool call still happens after the model switch.",
+      "Final answer acknowledges the handoff and uses the tool-derived evidence."
+    ],
+    "docsRefs": ["docs/help/testing.md", "docs/concepts/model-failover.md"],
+    "codeRefs": ["extensions/qa-lab/src/suite.ts", "extensions/qa-lab/src/mock-openai-server.ts"]
+  },
+  {
+    "id": "mcp-plugin-tools-call",
+    "title": "MCP plugin-tools call",
+    "surface": "mcp",
+    "objective": "Verify OpenClaw can expose plugin tools over MCP and a real MCP client can call one successfully.",
+    "successCriteria": [
+      "Plugin tools MCP server lists memory_search.",
+      "A real MCP client calls memory_search successfully.",
+      "The returned MCP payload includes the expected memory-only fact."
+    ],
+    "docsRefs": ["docs/cli/mcp.md", "docs/gateway/protocol.md"],
+    "codeRefs": ["src/mcp/plugin-tools-serve.ts", "extensions/qa-lab/src/suite.ts"]
+  },
+  {
+    "id": "skill-visibility-invocation",
+    "title": "Skill visibility and invocation",
+    "surface": "skills",
+    "objective": "Verify a workspace skill becomes visible in skills.status and influences the next agent turn.",
+    "successCriteria": [
+      "skills.status reports the seeded skill as visible and eligible.",
+      "The next agent turn reflects the skill instruction marker.",
+      "The result stays scoped to the active QA workspace skill."
+    ],
+    "docsRefs": ["docs/tools/skills.md", "docs/gateway/protocol.md"],
+    "codeRefs": ["src/agents/skills-status.ts", "extensions/qa-lab/src/suite.ts"]
+  },
+  {
+    "id": "skill-install-hot-availability",
+    "title": "Skill install hot availability",
+    "surface": "skills",
+    "objective": "Verify a newly added workspace skill shows up without a broken intermediate state and can influence the next turn immediately.",
+    "successCriteria": [
+      "Skill is absent before install.",
+      "skills.status reports it after install without a restart.",
+      "The next agent turn reflects the new skill marker."
+    ],
+    "docsRefs": ["docs/tools/skills.md", "docs/gateway/configuration.md"],
+    "codeRefs": ["src/agents/skills-status.ts", "extensions/qa-lab/src/suite.ts"]
+  },
+  {
+    "id": "native-image-generation",
+    "title": "Native image generation",
+    "surface": "image-generation",
+    "objective": "Verify image_generate appears when configured and returns a real saved media artifact.",
+    "successCriteria": [
+      "image_generate appears in the effective tool inventory.",
+      "Agent triggers native image_generate.",
+      "Tool output returns a saved MEDIA path and the file exists."
+    ],
+    "docsRefs": ["docs/tools/image-generation.md", "docs/providers/openai.md"],
+    "codeRefs": [
+      "src/agents/tools/image-generate-tool.ts",
+      "extensions/qa-lab/src/mock-openai-server.ts"
+    ]
+  },
+  {
+    "id": "config-patch-hot-apply",
+    "title": "Config patch hot apply",
+    "surface": "config",
+    "objective": "Verify a hot-reloadable config.patch takes effect immediately without a disruptive restart.",
+    "successCriteria": [
+      "config.patch succeeds with no restart dependency.",
+      "Old mention routing behavior stops working immediately.",
+      "New mention routing behavior works in the same gateway lifetime."
+    ],
+    "docsRefs": ["docs/gateway/configuration.md", "docs/gateway/protocol.md"],
+    "codeRefs": ["src/gateway/server-methods/config.ts", "extensions/qa-lab/src/suite.ts"]
+  },
+  {
+    "id": "config-apply-restart-wakeup",
+    "title": "Config apply restart wake-up",
+    "surface": "config",
+    "objective": "Verify a restart-required config.apply restarts cleanly and delivers the post-restart wake message back into the QA channel.",
+    "successCriteria": [
+      "config.apply schedules a restart-required change.",
+      "Gateway becomes healthy again after restart.",
+      "Restart sentinel wake-up message arrives in the QA channel."
+    ],
+    "docsRefs": ["docs/gateway/configuration.md", "docs/gateway/protocol.md"],
+    "codeRefs": ["src/gateway/server-methods/config.ts", "src/gateway/server-restart-sentinel.ts"]
+  },
+  {
+    "id": "runtime-inventory-drift-check",
+    "title": "Runtime inventory drift check",
+    "surface": "inventory",
+    "objective": "Verify tools.effective and skills.status stay aligned with runtime behavior after config changes.",
+    "successCriteria": [
+      "Enabled tool appears before the config change.",
+      "After config change, disabled tool disappears from tools.effective.",
+      "Disabled skill appears in skills.status with disabled state."
+    ],
+    "docsRefs": ["docs/gateway/protocol.md", "docs/tools/skills.md", "docs/tools/index.md"],
+    "codeRefs": [
+      "src/gateway/server-methods/tools-effective.ts",
+      "src/gateway/server-methods/skills.ts"
+    ]
  }
 ]