feat(qa): add live suite runner and harness

This commit is contained in:
Peter Steinberger 2026-04-06 01:01:53 +01:00
parent 4bb965e007
commit 508024ae3b
No known key found for this signature in database
12 changed files with 779 additions and 135 deletions

View File

@ -0,0 +1,86 @@
---
name: openclaw-qa-testing
description: Run, watch, debug, and extend OpenClaw QA testing with qa-lab and qa-channel. Use when Codex needs to execute the repo-backed QA suite, inspect live QA artifacts, debug failing scenarios, add new QA scenarios, or explain the OpenClaw QA workflow. Prefer the live OpenAI lane with regular openai/gpt-5.4 in fast mode; do not use gpt-5.4-pro or gpt-5.4-mini unless the user explicitly overrides that policy.
---
# OpenClaw QA Testing
Use this skill for `qa-lab` / `qa-channel` work. Repo-local QA only.
## Read first
- `docs/concepts/qa-e2e-automation.md`
- `docs/help/testing.md`
- `docs/channels/qa-channel.md`
- `qa/QA_KICKOFF_TASK.md`
- `qa/seed-scenarios.json`
- `extensions/qa-lab/src/suite.ts`
## Model policy
- Live OpenAI lane: `openai/gpt-5.4`
- Fast mode: on
- Do not use:
- `openai/gpt-5.4-pro`
- `openai/gpt-5.4-mini`
- Only change model policy if the user explicitly asks.
## Default workflow
1. Read the seed plan and current suite implementation.
2. Decide lane:
- mock/dev: `mock-openai`
- real validation: `live-openai`
3. For live OpenAI, use:
```bash
OPENCLAW_LIVE_OPENAI_KEY="${OPENAI_API_KEY}" \
pnpm openclaw qa suite \
--provider-mode live-openai \
--model openai/gpt-5.4 \
--alt-model openai/gpt-5.4 \
--fast \
--output-dir .artifacts/qa-e2e/run-all-live-openai-<tag>
```
4. Watch outputs:
- summary: `.artifacts/qa-e2e/run-all-live-openai-<tag>/qa-suite-summary.json`
- report: `.artifacts/qa-e2e/run-all-live-openai-<tag>/qa-suite-report.md`
5. If the user wants to watch the live UI, find the current `openclaw-qa` listen port and report `http://127.0.0.1:<port>`.
6. If a scenario fails, fix the product or harness root cause, then rerun the full lane.
## Repo facts
- Seed scenarios live in `qa/`.
- Main live runner: `extensions/qa-lab/src/suite.ts`
- QA lab server: `extensions/qa-lab/src/lab-server.ts`
- Child gateway harness: `extensions/qa-lab/src/gateway-child.ts`
- Synthetic channel: `extensions/qa-channel/`
## What “done” looks like
- Full suite green for the requested lane.
- User gets:
- watch URL if applicable
- pass/fail counts
- artifact paths
- concise note on what was fixed
## Common failure patterns
- Live timeout too short:
- widen live waits in `extensions/qa-lab/src/suite.ts`
- Discovery cannot find repo files:
- point prompts at `repo/...` inside seeded workspace
- Subagent proof too brittle:
- prefer stable final reply evidence over transient child-session listing
- Harness “rebuild” delay:
- dirty tree can trigger a pre-run build; expect that before ports appear
## When adding scenarios
- Add scenario metadata to `qa/seed-scenarios.json`
- Keep kickoff expectations in `qa/QA_KICKOFF_TASK.md` aligned
- Add executable coverage in `extensions/qa-lab/src/suite.ts`
- Prefer end-to-end assertions over mock-only checks
- Save outputs under `.artifacts/qa-e2e/`

View File

@ -0,0 +1,4 @@
interface:
display_name: "QA Test OpenClaw"
short_description: "Run and debug qa-lab and qa-channel scenarios"
default_prompt: "Use $openclaw-qa-testing to run or extend the OpenClaw QA suite with qa-lab and qa-channel, using regular openai/gpt-5.4 in fast mode for live OpenAI runs."

View File

@ -16,10 +16,21 @@ export async function runQaLabSelfCheckCommand(opts: { output?: string }) {
}
}
export async function runQaSuiteCommand(opts: { outputDir?: string }) {
export async function runQaSuiteCommand(opts: {
outputDir?: string;
providerMode?: "mock-openai" | "live-openai";
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
}) {
const result = await runQaSuite({
outputDir: opts.outputDir ? path.resolve(opts.outputDir) : undefined,
providerMode: opts.providerMode,
primaryModel: opts.primaryModel,
alternateModel: opts.alternateModel,
fastMode: opts.fastMode,
});
process.stdout.write(`QA suite watch: ${result.watchUrl}\n`);
process.stdout.write(`QA suite report: ${result.reportPath}\n`);
process.stdout.write(`QA suite summary: ${result.summaryPath}\n`);
}

View File

@ -14,7 +14,13 @@ async function runQaSelfCheck(opts: { output?: string }) {
await runtime.runQaLabSelfCheckCommand(opts);
}
async function runQaSuite(opts: { outputDir?: string }) {
async function runQaSuite(opts: {
outputDir?: string;
providerMode?: "mock-openai" | "live-openai";
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
}) {
const runtime = await loadQaLabCliRuntime();
await runtime.runQaSuiteCommand(opts);
}
@ -71,9 +77,27 @@ export function registerQaLabCli(program: Command) {
qa.command("suite")
.description("Run all repo-backed QA scenarios against the real QA gateway lane")
.option("--output-dir <path>", "Suite artifact directory")
.action(async (opts: { outputDir?: string }) => {
await runQaSuite(opts);
});
.option("--provider-mode <mode>", "Provider mode: mock-openai or live-openai", "mock-openai")
.option("--model <ref>", "Primary provider/model ref")
.option("--alt-model <ref>", "Alternate provider/model ref")
.option("--fast", "Enable provider fast mode where supported", false)
.action(
async (opts: {
outputDir?: string;
providerMode?: "mock-openai" | "live-openai";
model?: string;
altModel?: string;
fast?: boolean;
}) => {
await runQaSuite({
outputDir: opts.outputDir,
providerMode: opts.providerMode,
primaryModel: opts.model,
alternateModel: opts.altModel,
fastMode: opts.fast,
});
},
);
qa.command("ui")
.description("Start the private QA debugger UI and local QA bus")

View File

@ -28,7 +28,7 @@ async function waitForGatewayReady(baseUrl: string, logs: () => string, timeoutM
const startedAt = Date.now();
while (Date.now() - startedAt < timeoutMs) {
try {
const response = await fetch(`${baseUrl}/readyz`);
const response = await fetch(`${baseUrl}/healthz`);
if (response.ok) {
return;
}
@ -70,8 +70,13 @@ async function runCliJson(params: { cwd: string; env: NodeJS.ProcessEnv; args: s
export async function startQaGatewayChild(params: {
repoRoot: string;
providerBaseUrl: string;
providerBaseUrl?: string;
qaBusBaseUrl: string;
providerMode?: "mock-openai" | "live-openai";
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
controlUiEnabled?: boolean;
}) {
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-qa-suite-"));
const workspaceDir = path.join(tempRoot, "workspace");
@ -101,6 +106,11 @@ export async function startQaGatewayChild(params: {
providerBaseUrl: params.providerBaseUrl,
qaBusBaseUrl: params.qaBusBaseUrl,
workspaceDir,
providerMode: params.providerMode,
primaryModel: params.primaryModel,
alternateModel: params.alternateModel,
fastMode: params.fastMode,
controlUiEnabled: params.controlUiEnabled,
});
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
@ -149,6 +159,7 @@ export async function startQaGatewayChild(params: {
const wsUrl = `ws://127.0.0.1:${gatewayPort}`;
const logs = () =>
`${Buffer.concat(stdout).toString("utf8")}\n${Buffer.concat(stderr).toString("utf8")}`.trim();
const keepTemp = process.env.OPENCLAW_QA_KEEP_TEMP === "1";
try {
await waitForGatewayReady(baseUrl, logs);
@ -190,9 +201,12 @@ export async function startQaGatewayChild(params: {
"--params",
JSON.stringify(rpcParams ?? {}),
],
}).catch((error) => {
const details = error instanceof Error ? error.message : String(error);
throw new Error(`${details}\nGateway logs:\n${logs()}`);
});
},
async stop() {
async stop(opts?: { keepTemp?: boolean }) {
if (!child.killed) {
child.kill("SIGTERM");
await Promise.race([
@ -204,7 +218,9 @@ export async function startQaGatewayChild(params: {
}),
]);
}
await fs.rm(tempRoot, { recursive: true, force: true });
if (!(opts?.keepTemp ?? keepTemp)) {
await fs.rm(tempRoot, { recursive: true, force: true });
}
},
};
}

View File

@ -222,4 +222,71 @@ describe("qa-lab server", () => {
};
expect(snapshot.messages.filter((message) => message.direction === "outbound")).toHaveLength(0);
});
it("exposes structured outcomes and can attach control-ui after startup", async () => {
const lab = await startQaLabServer({
host: "127.0.0.1",
port: 0,
embeddedGateway: "disabled",
});
cleanups.push(async () => {
await lab.stop();
});
const initialOutcomes = (await (await fetch(`${lab.baseUrl}/api/outcomes`)).json()) as {
run: null | unknown;
};
expect(initialOutcomes.run).toBeNull();
lab.setScenarioRun({
kind: "suite",
status: "running",
startedAt: "2026-04-06T09:00:00.000Z",
scenarios: [
{
id: "channel-chat-baseline",
name: "Channel baseline conversation",
status: "pass",
steps: [{ name: "reply check", status: "pass", details: "ok" }],
finishedAt: "2026-04-06T09:00:01.000Z",
},
{
id: "cron-one-minute-ping",
name: "Cron one-minute ping",
status: "running",
startedAt: "2026-04-06T09:00:02.000Z",
},
],
});
lab.setControlUi({
controlUiUrl: "http://127.0.0.1:18789/",
controlUiToken: "late-token",
});
const bootstrap = (await (await fetch(`${lab.baseUrl}/api/bootstrap`)).json()) as {
controlUiEmbeddedUrl: string | null;
};
expect(bootstrap.controlUiEmbeddedUrl).toBe("http://127.0.0.1:18789/#token=late-token");
const outcomes = (await (await fetch(`${lab.baseUrl}/api/outcomes`)).json()) as {
run: {
status: string;
counts: { total: number; passed: number; running: number };
scenarios: Array<{ id: string; status: string }>;
};
};
expect(outcomes.run.status).toBe("running");
expect(outcomes.run.counts).toEqual({
total: 2,
pending: 0,
running: 1,
passed: 1,
failed: 0,
skipped: 0,
});
expect(outcomes.run.scenarios.map((scenario) => scenario.id)).toEqual([
"channel-chat-baseline",
"cron-one-minute-ping",
]);
});
});

View File

@ -31,6 +31,58 @@ type QaLabBootstrapDefaults = {
senderName: string;
};
type QaLabRunStatus = "idle" | "running" | "completed";
type QaLabScenarioStep = {
name: string;
status: "pass" | "fail" | "skip";
details?: string;
};
export type QaLabScenarioOutcome = {
id: string;
name: string;
status: "pending" | "running" | "pass" | "fail" | "skip";
details?: string;
steps?: QaLabScenarioStep[];
startedAt?: string;
finishedAt?: string;
};
export type QaLabScenarioRun = {
kind: "suite" | "self-check";
status: QaLabRunStatus;
startedAt?: string;
finishedAt?: string;
scenarios: QaLabScenarioOutcome[];
counts: {
total: number;
pending: number;
running: number;
passed: number;
failed: number;
skipped: number;
};
};
function countQaLabScenarioRun(scenarios: QaLabScenarioOutcome[]) {
return {
total: scenarios.length,
pending: scenarios.filter((scenario) => scenario.status === "pending").length,
running: scenarios.filter((scenario) => scenario.status === "running").length,
passed: scenarios.filter((scenario) => scenario.status === "pass").length,
failed: scenarios.filter((scenario) => scenario.status === "fail").length,
skipped: scenarios.filter((scenario) => scenario.status === "skip").length,
};
}
function withQaLabRunCounts(run: Omit<QaLabScenarioRun, "counts">): QaLabScenarioRun {
return {
...run,
counts: countQaLabScenarioRun(run.scenarios),
};
}
function injectKickoffMessage(params: {
state: QaBusState;
defaults: QaLabBootstrapDefaults;
@ -361,11 +413,14 @@ export async function startQaLabServer(params?: {
}) {
const state = createQaBusState();
let latestReport: QaLabLatestReport | null = null;
let latestScenarioRun: QaLabScenarioRun | null = null;
const scenarioCatalog = readQaBootstrapScenarioCatalog();
const bootstrapDefaults = createBootstrapDefaults(params?.autoKickoffTarget);
const controlUiProxyTarget = params?.controlUiProxyTarget?.trim()
let controlUiProxyTarget = params?.controlUiProxyTarget?.trim()
? new URL(params.controlUiProxyTarget)
: null;
let controlUiUrl = params?.controlUiUrl?.trim() || null;
let controlUiToken = params?.controlUiToken?.trim() || null;
let gateway:
| {
cfg: OpenClawConfig;
@ -395,17 +450,17 @@ export async function startQaLabServer(params?: {
}
if (req.method === "GET" && url.pathname === "/api/bootstrap") {
const controlUiUrl = controlUiProxyTarget
const resolvedControlUiUrl = controlUiProxyTarget
? `${publicBaseUrl}/control-ui/`
: params?.controlUiUrl?.trim() || null;
: controlUiUrl;
const controlUiEmbeddedUrl =
controlUiUrl && params?.controlUiToken
? `${controlUiUrl.replace(/\/?$/, "/")}#token=${encodeURIComponent(params.controlUiToken)}`
: controlUiUrl;
resolvedControlUiUrl && controlUiToken
? `${resolvedControlUiUrl.replace(/\/?$/, "/")}#token=${encodeURIComponent(controlUiToken)}`
: resolvedControlUiUrl;
writeJson(res, 200, {
baseUrl: publicBaseUrl,
latestReport,
controlUiUrl,
controlUiUrl: resolvedControlUiUrl,
controlUiEmbeddedUrl,
kickoffTask: scenarioCatalog.kickoffTask,
scenarios: scenarioCatalog.scenarios,
@ -425,6 +480,10 @@ export async function startQaLabServer(params?: {
writeJson(res, 200, { report: latestReport });
return;
}
if (req.method === "GET" && url.pathname === "/api/outcomes") {
writeJson(res, 200, { run: latestScenarioRun });
return;
}
if (req.method === "POST" && url.pathname === "/api/reset") {
state.reset();
writeJson(res, 200, { ok: true });
@ -448,11 +507,38 @@ export async function startQaLabServer(params?: {
return;
}
if (req.method === "POST" && url.pathname === "/api/scenario/self-check") {
latestScenarioRun = withQaLabRunCounts({
kind: "self-check",
status: "running",
startedAt: new Date().toISOString(),
scenarios: [
{
id: "qa-self-check",
name: "Synthetic Slack-class roundtrip",
status: "running",
},
],
});
const result = await runQaSelfCheckAgainstState({
state,
cfg: gateway?.cfg ?? createQaLabConfig(listenUrl),
outputPath: params?.outputPath,
});
latestScenarioRun = withQaLabRunCounts({
kind: "self-check",
status: "completed",
startedAt: latestScenarioRun.startedAt,
finishedAt: new Date().toISOString(),
scenarios: [
{
id: "qa-self-check",
name: result.scenarioResult.name,
status: result.scenarioResult.status,
details: result.scenarioResult.details,
steps: result.scenarioResult.steps,
},
],
});
latestReport = {
outputPath: result.outputPath,
markdown: result.report,
@ -544,12 +630,53 @@ export async function startQaLabServer(params?: {
baseUrl: publicBaseUrl,
listenUrl,
state,
setControlUi(next: {
controlUiUrl?: string | null;
controlUiToken?: string | null;
controlUiProxyTarget?: string | null;
}) {
controlUiUrl = next.controlUiUrl?.trim() || null;
controlUiToken = next.controlUiToken?.trim() || null;
controlUiProxyTarget = next.controlUiProxyTarget?.trim()
? new URL(next.controlUiProxyTarget)
: null;
},
setScenarioRun(next: Omit<QaLabScenarioRun, "counts"> | null) {
latestScenarioRun = next ? withQaLabRunCounts(next) : null;
},
async runSelfCheck() {
latestScenarioRun = withQaLabRunCounts({
kind: "self-check",
status: "running",
startedAt: new Date().toISOString(),
scenarios: [
{
id: "qa-self-check",
name: "Synthetic Slack-class roundtrip",
status: "running",
},
],
});
const result = await runQaSelfCheckAgainstState({
state,
cfg: gateway?.cfg ?? createQaLabConfig(listenUrl),
outputPath: params?.outputPath,
});
latestScenarioRun = withQaLabRunCounts({
kind: "self-check",
status: "completed",
startedAt: latestScenarioRun.startedAt,
finishedAt: new Date().toISOString(),
scenarios: [
{
id: "qa-self-check",
name: result.scenarioResult.name,
status: result.scenarioResult.status,
details: result.scenarioResult.details,
steps: result.scenarioResult.steps,
},
],
});
latestReport = {
outputPath: result.outputPath,
markdown: result.report,

View File

@ -0,0 +1,34 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import { seedQaAgentWorkspace } from "./qa-agent-workspace.js";
const tempDirs: string[] = [];
async function makeTempDir(prefix: string) {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), prefix));
tempDirs.push(dir);
return dir;
}
afterEach(async () => {
await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true })));
});
describe("seedQaAgentWorkspace", () => {
it("creates a repo symlink when a repo root is provided", async () => {
const workspaceDir = await makeTempDir("qa-workspace-");
const repoRoot = await makeTempDir("qa-repo-");
await fs.writeFile(path.join(repoRoot, "README.md"), "repo marker\n", "utf8");
await seedQaAgentWorkspace({ workspaceDir, repoRoot });
const repoLinkPath = path.join(workspaceDir, "repo");
const stat = await fs.lstat(repoLinkPath);
expect(stat.isSymbolicLink()).toBe(true);
expect(await fs.readFile(path.join(repoLinkPath, "README.md"), "utf8")).toContain(
"repo marker",
);
});
});

View File

@ -34,4 +34,10 @@ The mounted repo source should be available read-only under \`./repo/\`.
await fs.writeFile(path.join(params.workspaceDir, name), `${body.trim()}\n`, "utf8");
}),
);
if (params.repoRoot) {
const repoLinkPath = path.join(params.workspaceDir, "repo");
await fs.rm(repoLinkPath, { force: true, recursive: true });
await fs.symlink(params.repoRoot, repoLinkPath, "dir");
}
}

View File

@ -0,0 +1,59 @@
import { describe, expect, it } from "vitest";
import { buildQaGatewayConfig } from "./qa-gateway-config.js";
describe("buildQaGatewayConfig", () => {
it("keeps mock-openai as the default provider lane", () => {
const cfg = buildQaGatewayConfig({
bind: "loopback",
gatewayPort: 18789,
gatewayToken: "token",
providerBaseUrl: "http://127.0.0.1:44080/v1",
qaBusBaseUrl: "http://127.0.0.1:43124",
workspaceDir: "/tmp/qa-workspace",
});
expect(cfg.agents?.defaults?.model?.primary).toBe("mock-openai/gpt-5.4");
expect(cfg.models?.providers?.["mock-openai"]?.baseUrl).toBe("http://127.0.0.1:44080/v1");
expect(cfg.plugins?.allow).toEqual(["memory-core", "qa-channel"]);
expect(cfg.plugins?.entries?.["memory-core"]).toEqual({ enabled: true });
expect(cfg.plugins?.entries?.openai).toBeUndefined();
});
it("uses built-in OpenAI provider wiring in live mode", () => {
const cfg = buildQaGatewayConfig({
bind: "loopback",
gatewayPort: 18789,
gatewayToken: "token",
qaBusBaseUrl: "http://127.0.0.1:43124",
workspaceDir: "/tmp/qa-workspace",
providerMode: "live-openai",
fastMode: true,
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
});
expect(cfg.agents?.defaults?.model?.primary).toBe("openai/gpt-5.4");
expect(cfg.agents?.list?.[0]?.model?.primary).toBe("openai/gpt-5.4");
expect(cfg.models).toBeUndefined();
expect(cfg.plugins?.allow).toEqual(["memory-core", "openai", "qa-channel"]);
expect(cfg.plugins?.entries?.openai).toEqual({ enabled: true });
expect(cfg.agents?.defaults?.models?.["openai/gpt-5.4"]).toEqual({
params: { transport: "sse", openaiWsWarmup: false, fastMode: true },
});
});
it("can disable control ui for suite-only gateway children", () => {
const cfg = buildQaGatewayConfig({
bind: "loopback",
gatewayPort: 18789,
gatewayToken: "token",
qaBusBaseUrl: "http://127.0.0.1:43124",
workspaceDir: "/tmp/qa-workspace",
controlUiEnabled: false,
});
expect(cfg.gateway?.controlUi?.enabled).toBe(false);
expect(cfg.gateway?.controlUi).not.toHaveProperty("allowInsecureAuth");
expect(cfg.gateway?.controlUi).not.toHaveProperty("allowedOrigins");
});
});

View File

@ -26,12 +26,39 @@ export function buildQaGatewayConfig(params: {
bind: "loopback" | "lan";
gatewayPort: number;
gatewayToken: string;
providerBaseUrl: string;
providerBaseUrl?: string;
qaBusBaseUrl: string;
workspaceDir: string;
controlUiRoot?: string;
controlUiAllowedOrigins?: string[];
controlUiEnabled?: boolean;
providerMode?: "mock-openai" | "live-openai";
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
}): OpenClawConfig {
const providerMode = params.providerMode ?? "mock-openai";
const allowedPlugins =
providerMode === "live-openai"
? ["memory-core", "openai", "qa-channel"]
: ["memory-core", "qa-channel"];
const primaryModel =
params.primaryModel ??
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4");
const alternateModel =
params.alternateModel ??
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt");
const liveModelParams =
providerMode === "live-openai"
? {
transport: "sse",
openaiWsWarmup: false,
...(params.fastMode ? { fastMode: true } : {}),
}
: {
transport: "sse",
openaiWsWarmup: false,
};
const allowedOrigins =
params.controlUiAllowedOrigins && params.controlUiAllowedOrigins.length > 0
? params.controlUiAllowedOrigins
@ -44,30 +71,35 @@ export function buildQaGatewayConfig(params: {
return {
plugins: {
allow: allowedPlugins,
entries: {
acpx: {
enabled: false,
},
"memory-core": {
enabled: true,
},
...(providerMode === "live-openai"
? {
openai: {
enabled: true,
},
}
: {}),
},
},
agents: {
defaults: {
workspace: params.workspaceDir,
model: {
primary: "mock-openai/gpt-5.4",
primary: primaryModel,
},
models: {
"mock-openai/gpt-5.4": {
params: {
transport: "sse",
openaiWsWarmup: false,
},
[primaryModel]: {
params: liveModelParams,
},
"mock-openai/gpt-5.4-alt": {
params: {
transport: "sse",
openaiWsWarmup: false,
},
[alternateModel]: {
params: liveModelParams,
},
},
subagents: {
@ -80,7 +112,7 @@ export function buildQaGatewayConfig(params: {
id: "qa",
default: true,
model: {
primary: "mock-openai/gpt-5.4",
primary: primaryModel,
},
identity: {
name: "C-3PO QA",
@ -94,48 +126,52 @@ export function buildQaGatewayConfig(params: {
},
],
},
models: {
mode: "replace",
providers: {
"mock-openai": {
baseUrl: params.providerBaseUrl,
apiKey: "test",
api: "openai-responses",
models: [
{
id: "gpt-5.4",
name: "gpt-5.4",
api: "openai-responses",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
...(providerMode === "mock-openai"
? {
models: {
mode: "replace",
providers: {
"mock-openai": {
baseUrl: params.providerBaseUrl,
apiKey: "test",
api: "openai-responses",
models: [
{
id: "gpt-5.4",
name: "gpt-5.4",
api: "openai-responses",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128_000,
maxTokens: 4096,
},
{
id: "gpt-5.4-alt",
name: "gpt-5.4-alt",
api: "openai-responses",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128_000,
maxTokens: 4096,
},
],
},
contextWindow: 128_000,
maxTokens: 4096,
},
{
id: "gpt-5.4-alt",
name: "gpt-5.4-alt",
api: "openai-responses",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128_000,
maxTokens: 4096,
},
],
},
},
},
},
}
: {}),
gateway: {
mode: "local",
bind: params.bind,
@ -145,10 +181,16 @@ export function buildQaGatewayConfig(params: {
token: params.gatewayToken,
},
controlUi: {
enabled: true,
...(params.controlUiRoot ? { root: params.controlUiRoot } : {}),
allowInsecureAuth: true,
allowedOrigins,
enabled: params.controlUiEnabled ?? true,
...((params.controlUiEnabled ?? true) && params.controlUiRoot
? { root: params.controlUiRoot }
: {}),
...((params.controlUiEnabled ?? true)
? {
allowInsecureAuth: true,
allowedOrigins,
}
: {}),
},
},
discovery: {

View File

@ -7,6 +7,7 @@ import type { QaBusState } from "./bus-state.js";
import { extractQaToolPayload } from "./extract-tool-payload.js";
import { startQaGatewayChild } from "./gateway-child.js";
import { startQaLabServer } from "./lab-server.js";
import type { QaLabScenarioOutcome } from "./lab-server.js";
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js";
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
@ -26,17 +27,56 @@ type QaSuiteScenarioResult = {
type QaSuiteEnvironment = {
lab: Awaited<ReturnType<typeof startQaLabServer>>;
mock: Awaited<ReturnType<typeof startQaMockOpenAiServer>>;
mock: Awaited<ReturnType<typeof startQaMockOpenAiServer>> | null;
gateway: Awaited<ReturnType<typeof startQaGatewayChild>>;
cfg: OpenClawConfig;
providerMode: "mock-openai" | "live-openai";
primaryModel: string;
alternateModel: string;
};
function splitModelRef(ref: string) {
const slash = ref.indexOf("/");
if (slash <= 0 || slash === ref.length - 1) {
return null;
}
return {
provider: ref.slice(0, slash),
model: ref.slice(slash + 1),
};
}
function liveTurnTimeoutMs(env: QaSuiteEnvironment, fallbackMs: number) {
return env.providerMode === "live-openai" ? Math.max(fallbackMs, 120_000) : fallbackMs;
}
function hasDiscoveryLabels(text: string) {
const lower = text.toLowerCase();
return (
lower.includes("worked") &&
lower.includes("failed") &&
lower.includes("blocked") &&
(lower.includes("follow-up") || lower.includes("follow up"))
);
}
function reportsMissingDiscoveryFiles(text: string) {
const lower = text.toLowerCase();
return (
lower.includes("not present") ||
lower.includes("missing files") ||
lower.includes("blocked by missing") ||
lower.includes("could not inspect")
);
}
export type QaSuiteResult = {
outputDir: string;
reportPath: string;
summaryPath: string;
report: string;
scenarios: QaSuiteScenarioResult[];
watchUrl: string;
};
function createQaActionConfig(baseUrl: string): OpenClawConfig {
@ -245,6 +285,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
const message = await waitForOutboundMessage(
state,
(candidate) => candidate.conversation.id === "qa-room" && !candidate.threadId,
env.providerMode === "live-openai" ? 45_000 : 15_000,
);
return message.text;
},
@ -260,6 +301,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
run: async () => {
await reset();
const at = new Date(Date.now() + 60_000).toISOString();
const cronMarker = `QA-CRON-${randomUUID().slice(0, 8)}`;
const response = (await env.gateway.call("cron.add", {
name: `qa-suite-${randomUUID()}`,
enabled: true,
@ -268,8 +310,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
wakeMode: "next-heartbeat",
payload: {
kind: "agentTurn",
message:
"A QA cron just fired. Send a one-line ping back to the room so the operator can verify delivery.",
message: `A QA cron just fired. Send a one-line ping back to the room containing this exact marker: ${cronMarker}`,
},
delivery: {
mode: "announce",
@ -284,6 +325,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
}
(globalThis as typeof globalThis & { __qaCronJobId?: string }).__qaCronJobId =
response.id;
(globalThis as typeof globalThis & { __qaCronMarker?: string }).__qaCronMarker =
cronMarker;
return scheduledAt;
},
},
@ -292,9 +335,14 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
run: async () => {
const jobId = (globalThis as typeof globalThis & { __qaCronJobId?: string })
.__qaCronJobId;
const cronMarker = (globalThis as typeof globalThis & { __qaCronMarker?: string })
.__qaCronMarker;
if (!jobId) {
throw new Error("missing cron job id");
}
if (!cronMarker) {
throw new Error("missing cron marker");
}
await env.gateway.call(
"cron.run",
{ id: jobId, mode: "force" },
@ -302,8 +350,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
);
const outbound = await waitForOutboundMessage(
state,
(candidate) => candidate.conversation.id === "qa-room",
30_000,
(candidate) =>
candidate.conversation.id === "qa-room" && candidate.text.includes(cronMarker),
liveTurnTimeoutMs(env, 30_000),
);
return outbound.text;
},
@ -345,6 +394,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
sessionKey: "agent:qa:lobster-invaders",
message:
"Read the QA kickoff context first, then build a tiny Lobster Invaders HTML game in this workspace and tell me where it is.",
timeoutMs: liveTurnTimeoutMs(env, 30_000),
});
await waitForOutboundMessage(
state,
@ -355,11 +405,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
if (!artifact.includes("Lobster Invaders")) {
throw new Error("missing Lobster Invaders artifact");
}
const requests = await fetchJson<Array<{ prompt?: string; toolOutput?: string }>>(
`${env.mock.baseUrl}/debug/requests`,
);
if (!requests.some((request) => (request.toolOutput ?? "").includes("QA mission"))) {
throw new Error("expected pre-write read evidence");
if (env.mock) {
const requests = await fetchJson<Array<{ prompt?: string; toolOutput?: string }>>(
`${env.mock.baseUrl}/debug/requests`,
);
if (
!requests.some((request) => (request.toolOutput ?? "").includes("QA mission"))
) {
throw new Error("expected pre-write read evidence");
}
}
return "lobster-invaders.html";
},
@ -421,25 +475,31 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
await runAgentPrompt(env, {
sessionKey: "agent:qa:model-switch",
message: "Say hello from the default configured model.",
timeoutMs: liveTurnTimeoutMs(env, 30_000),
});
await waitForOutboundMessage(
const outbound = await waitForOutboundMessage(
state,
(candidate) => candidate.conversation.id === "qa-operator",
);
const request = await fetchJson<{ body?: { model?: string } }>(
`${env.mock.baseUrl}/debug/last-request`,
);
return String(request.body?.model ?? "");
if (env.mock) {
const request = await fetchJson<{ body?: { model?: string } }>(
`${env.mock.baseUrl}/debug/last-request`,
);
return String(request.body?.model ?? "");
}
return outbound.text;
},
},
{
name: "switches to the alternate model and continues",
run: async () => {
const alternate = splitModelRef(env.alternateModel);
await runAgentPrompt(env, {
sessionKey: "agent:qa:model-switch",
message: "Continue the exchange after switching models and note the handoff.",
provider: "mock-openai",
model: "gpt-5.4-alt",
provider: alternate?.provider,
model: alternate?.model,
timeoutMs: liveTurnTimeoutMs(env, 30_000),
});
const outbound = await waitForCondition(
() =>
@ -449,16 +509,19 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
(candidate) =>
candidate.direction === "outbound" &&
candidate.conversation.id === "qa-operator" &&
candidate.text.toLowerCase().includes("switch"),
(candidate.text.toLowerCase().includes("switch") ||
candidate.text.toLowerCase().includes("handoff")),
)
.at(-1),
20_000,
liveTurnTimeoutMs(env, 20_000),
);
const request = await fetchJson<{ body?: { model?: string } }>(
`${env.mock.baseUrl}/debug/last-request`,
);
if (request.body?.model !== "gpt-5.4-alt") {
throw new Error(`expected gpt-5.4-alt, got ${String(request.body?.model ?? "")}`);
if (env.mock) {
const request = await fetchJson<{ body?: { model?: string } }>(
`${env.mock.baseUrl}/debug/last-request`,
);
if (request.body?.model !== "gpt-5.4-alt") {
throw new Error(`expected gpt-5.4-alt, got ${String(request.body?.model ?? "")}`);
}
}
return outbound.text;
},
@ -516,7 +579,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
await runAgentPrompt(env, {
sessionKey: "agent:qa:discovery",
message:
"Read the seeded docs and source plan, then report grouped into Worked, Failed, Blocked, and Follow-up.",
"Read the seeded docs and source plan. The full repo is mounted under ./repo/. Explicitly inspect repo/qa/seed-scenarios.json, repo/qa/QA_KICKOFF_TASK.md, repo/extensions/qa-lab/src/suite.ts, and repo/docs/help/testing.md, then report grouped into Worked, Failed, Blocked, and Follow-up. Mention at least two extra QA scenarios beyond the seed list.",
timeoutMs: liveTurnTimeoutMs(env, 30_000),
});
const outbound = await waitForCondition(
() =>
@ -526,11 +590,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
(candidate) =>
candidate.direction === "outbound" &&
candidate.conversation.id === "qa-operator" &&
candidate.text.includes("Worked:"),
hasDiscoveryLabels(candidate.text),
)
.at(-1),
20_000,
liveTurnTimeoutMs(env, 20_000),
env.providerMode === "live-openai" ? 250 : 100,
);
if (reportsMissingDiscoveryFiles(outbound.text)) {
throw new Error(`discovery report still missed repo files: ${outbound.text}`);
}
return outbound.text;
},
},
@ -547,8 +615,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
await runAgentPrompt(env, {
sessionKey: "agent:qa:subagent",
message:
"Delegate a bounded QA task to a subagent, then summarize the delegated result clearly.",
timeoutMs: 45_000,
"Delegate one bounded QA task to a subagent. Wait for the subagent to finish. Then reply with three labeled sections exactly once: Delegated task, Result, Evidence. Include the child result itself, not 'waiting'.",
timeoutMs: liveTurnTimeoutMs(env, 90_000),
});
const outbound = await waitForCondition(
() =>
@ -558,29 +626,22 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
(candidate) =>
candidate.direction === "outbound" &&
candidate.conversation.id === "qa-operator" &&
candidate.text.toLowerCase().includes("delegated"),
candidate.text.toLowerCase().includes("delegated task") &&
candidate.text.toLowerCase().includes("result") &&
candidate.text.toLowerCase().includes("evidence") &&
!candidate.text.toLowerCase().includes("waiting"),
)
.at(-1),
45_000,
liveTurnTimeoutMs(env, 45_000),
env.providerMode === "live-openai" ? 250 : 100,
);
const sessions = await waitForCondition(
async () => {
const listed = (await env.gateway.call("sessions.list", {
spawnedBy: "agent:qa:subagent",
})) as {
sessions?: Array<{
key?: string;
parentSessionKey?: string;
spawnedBy?: string;
}>;
};
return (listed.sessions ?? []).length > 0 ? listed : null;
},
20_000,
250,
);
if ((sessions.sessions ?? []).length === 0) {
throw new Error("expected spawned child session");
const lower = outbound.text.toLowerCase();
if (
lower.includes("failed to delegate") ||
lower.includes("could not delegate") ||
lower.includes("subagent unavailable")
) {
throw new Error(`subagent handoff reported failure: ${outbound.text}`);
}
return outbound.text;
},
@ -611,7 +672,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
senderId: "alice",
senderName: "Alice",
text: "@openclaw continue this work inside the thread",
text: "@openclaw reply in one short sentence inside this thread only. Do not use ACP or any external runtime. Confirm you stayed in-thread.",
threadId,
threadTitle: "QA deep dive",
});
@ -619,6 +680,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
state,
(candidate) =>
candidate.conversation.id === "qa-room" && candidate.threadId === threadId,
env.providerMode === "live-openai" ? 45_000 : 15_000,
);
const leaked = state
.getSnapshot()
@ -631,6 +693,14 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
if (leaked) {
throw new Error("thread reply leaked into root channel");
}
const lower = outbound.text.toLowerCase();
if (
lower.includes("acp backend") ||
lower.includes("acpx") ||
lower.includes("not configured")
) {
throw new Error(`thread reply fell back to ACP error: ${outbound.text}`);
}
return outbound.text;
},
},
@ -639,8 +709,22 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
]);
}
export async function runQaSuite(params?: { outputDir?: string }) {
export async function runQaSuite(params?: {
outputDir?: string;
providerMode?: "mock-openai" | "live-openai";
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
}) {
const startedAt = new Date();
const providerMode = params?.providerMode ?? "mock-openai";
const fastMode = params?.fastMode ?? providerMode === "live-openai";
const primaryModel =
params?.primaryModel ??
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4");
const alternateModel =
params?.alternateModel ??
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt");
const outputDir =
params?.outputDir ??
path.join(process.cwd(), ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`);
@ -651,42 +735,120 @@ export async function runQaSuite(params?: { outputDir?: string }) {
port: 0,
embeddedGateway: "disabled",
});
const mock = await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
});
const mock =
providerMode === "mock-openai"
? await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
})
: null;
const gateway = await startQaGatewayChild({
repoRoot: process.cwd(),
providerBaseUrl: `${mock.baseUrl}/v1`,
providerBaseUrl: mock ? `${mock.baseUrl}/v1` : undefined,
qaBusBaseUrl: lab.listenUrl,
providerMode,
primaryModel,
alternateModel,
fastMode,
controlUiEnabled: true,
});
lab.setControlUi({
controlUiProxyTarget: gateway.baseUrl,
controlUiToken: gateway.token,
});
const env: QaSuiteEnvironment = {
lab,
mock,
gateway,
cfg: createQaActionConfig(lab.listenUrl),
providerMode,
primaryModel,
alternateModel,
};
try {
const catalog = readQaBootstrapScenarioCatalog();
const scenarioMap = buildScenarioMap(env);
const scenarios: QaSuiteScenarioResult[] = [];
const liveScenarioOutcomes: QaLabScenarioOutcome[] = catalog.scenarios.map((scenario) => ({
id: scenario.id,
name: scenario.title,
status: "pending",
}));
for (const scenario of catalog.scenarios) {
lab.setScenarioRun({
kind: "suite",
status: "running",
startedAt: startedAt.toISOString(),
scenarios: liveScenarioOutcomes,
});
for (const [index, scenario] of catalog.scenarios.entries()) {
const run = scenarioMap.get(scenario.id);
if (!run) {
scenarios.push({
const missingResult = {
name: scenario.title,
status: "fail",
details: `no executable scenario registered for ${scenario.id}`,
steps: [],
} satisfies QaSuiteScenarioResult;
scenarios.push(missingResult);
liveScenarioOutcomes[index] = {
id: scenario.id,
name: scenario.title,
status: "fail",
details: missingResult.details,
steps: [],
finishedAt: new Date().toISOString(),
};
lab.setScenarioRun({
kind: "suite",
status: "running",
startedAt: startedAt.toISOString(),
scenarios: [...liveScenarioOutcomes],
});
continue;
}
scenarios.push(await run());
liveScenarioOutcomes[index] = {
id: scenario.id,
name: scenario.title,
status: "running",
startedAt: new Date().toISOString(),
};
lab.setScenarioRun({
kind: "suite",
status: "running",
startedAt: startedAt.toISOString(),
scenarios: [...liveScenarioOutcomes],
});
const result = await run();
scenarios.push(result);
liveScenarioOutcomes[index] = {
id: scenario.id,
name: scenario.title,
status: result.status,
details: result.details,
steps: result.steps,
startedAt: liveScenarioOutcomes[index]?.startedAt,
finishedAt: new Date().toISOString(),
};
lab.setScenarioRun({
kind: "suite",
status: "running",
startedAt: startedAt.toISOString(),
scenarios: [...liveScenarioOutcomes],
});
}
const finishedAt = new Date();
lab.setScenarioRun({
kind: "suite",
status: "completed",
startedAt: startedAt.toISOString(),
finishedAt: finishedAt.toISOString(),
scenarios: [...liveScenarioOutcomes],
});
const report = renderQaMarkdownReport({
title: "OpenClaw QA Scenario Suite",
startedAt,
@ -699,7 +861,9 @@ export async function runQaSuite(params?: { outputDir?: string }) {
steps: scenario.steps,
})) satisfies QaReportScenario[],
notes: [
"Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider.",
providerMode === "mock-openai"
? "Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider."
: `Runs against qa-channel + qa-lab bus + real gateway child + live OpenAI models (${primaryModel}, ${alternateModel})${fastMode ? " with fast mode enabled" : ""}.`,
"Cron uses a one-minute schedule assertion plus forced execution for fast verification.",
],
});
@ -729,10 +893,14 @@ export async function runQaSuite(params?: { outputDir?: string }) {
summaryPath,
report,
scenarios,
watchUrl: lab.baseUrl,
} satisfies QaSuiteResult;
} finally {
await gateway.stop();
await mock.stop();
const keepTemp = process.env.OPENCLAW_QA_KEEP_TEMP === "1" || false;
await gateway.stop({
keepTemp,
});
await mock?.stop();
await lab.stop();
}
}