diff --git a/.agents/skills/openclaw-qa-testing/SKILL.md b/.agents/skills/openclaw-qa-testing/SKILL.md new file mode 100644 index 00000000000..837c15b1923 --- /dev/null +++ b/.agents/skills/openclaw-qa-testing/SKILL.md @@ -0,0 +1,86 @@ +--- +name: openclaw-qa-testing +description: Run, watch, debug, and extend OpenClaw QA testing with qa-lab and qa-channel. Use when Codex needs to execute the repo-backed QA suite, inspect live QA artifacts, debug failing scenarios, add new QA scenarios, or explain the OpenClaw QA workflow. Prefer the live OpenAI lane with regular openai/gpt-5.4 in fast mode; do not use gpt-5.4-pro or gpt-5.4-mini unless the user explicitly overrides that policy. +--- + +# OpenClaw QA Testing + +Use this skill for `qa-lab` / `qa-channel` work. Repo-local QA only. + +## Read first + +- `docs/concepts/qa-e2e-automation.md` +- `docs/help/testing.md` +- `docs/channels/qa-channel.md` +- `qa/QA_KICKOFF_TASK.md` +- `qa/seed-scenarios.json` +- `extensions/qa-lab/src/suite.ts` + +## Model policy + +- Live OpenAI lane: `openai/gpt-5.4` +- Fast mode: on +- Do not use: + - `openai/gpt-5.4-pro` + - `openai/gpt-5.4-mini` +- Only change model policy if the user explicitly asks. + +## Default workflow + +1. Read the seed plan and current suite implementation. +2. Decide lane: + - mock/dev: `mock-openai` + - real validation: `live-openai` +3. For live OpenAI, use: + +```bash +OPENCLAW_LIVE_OPENAI_KEY="${OPENAI_API_KEY}" \ +pnpm openclaw qa suite \ + --provider-mode live-openai \ + --model openai/gpt-5.4 \ + --alt-model openai/gpt-5.4 \ + --fast \ + --output-dir .artifacts/qa-e2e/run-all-live-openai- +``` + +4. Watch outputs: + - summary: `.artifacts/qa-e2e/run-all-live-openai-/qa-suite-summary.json` + - report: `.artifacts/qa-e2e/run-all-live-openai-/qa-suite-report.md` +5. If the user wants to watch the live UI, find the current `openclaw-qa` listen port and report `http://127.0.0.1:`. +6. If a scenario fails, fix the product or harness root cause, then rerun the full lane. + +## Repo facts + +- Seed scenarios live in `qa/`. +- Main live runner: `extensions/qa-lab/src/suite.ts` +- QA lab server: `extensions/qa-lab/src/lab-server.ts` +- Child gateway harness: `extensions/qa-lab/src/gateway-child.ts` +- Synthetic channel: `extensions/qa-channel/` + +## What “done” looks like + +- Full suite green for the requested lane. +- User gets: + - watch URL if applicable + - pass/fail counts + - artifact paths + - concise note on what was fixed + +## Common failure patterns + +- Live timeout too short: + - widen live waits in `extensions/qa-lab/src/suite.ts` +- Discovery cannot find repo files: + - point prompts at `repo/...` inside seeded workspace +- Subagent proof too brittle: + - prefer stable final reply evidence over transient child-session listing +- Harness “rebuild” delay: + - dirty tree can trigger a pre-run build; expect that before ports appear + +## When adding scenarios + +- Add scenario metadata to `qa/seed-scenarios.json` +- Keep kickoff expectations in `qa/QA_KICKOFF_TASK.md` aligned +- Add executable coverage in `extensions/qa-lab/src/suite.ts` +- Prefer end-to-end assertions over mock-only checks +- Save outputs under `.artifacts/qa-e2e/` diff --git a/.agents/skills/openclaw-qa-testing/agents/openai.yaml b/.agents/skills/openclaw-qa-testing/agents/openai.yaml new file mode 100644 index 00000000000..11323cce276 --- /dev/null +++ b/.agents/skills/openclaw-qa-testing/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "QA Test OpenClaw" + short_description: "Run and debug qa-lab and qa-channel scenarios" + default_prompt: "Use $openclaw-qa-testing to run or extend the OpenClaw QA suite with qa-lab and qa-channel, using regular openai/gpt-5.4 in fast mode for live OpenAI runs." diff --git a/extensions/qa-lab/src/cli.runtime.ts b/extensions/qa-lab/src/cli.runtime.ts index 8ef23858e03..a8c0eec6354 100644 --- a/extensions/qa-lab/src/cli.runtime.ts +++ b/extensions/qa-lab/src/cli.runtime.ts @@ -16,10 +16,21 @@ export async function runQaLabSelfCheckCommand(opts: { output?: string }) { } } -export async function runQaSuiteCommand(opts: { outputDir?: string }) { +export async function runQaSuiteCommand(opts: { + outputDir?: string; + providerMode?: "mock-openai" | "live-openai"; + primaryModel?: string; + alternateModel?: string; + fastMode?: boolean; +}) { const result = await runQaSuite({ outputDir: opts.outputDir ? path.resolve(opts.outputDir) : undefined, + providerMode: opts.providerMode, + primaryModel: opts.primaryModel, + alternateModel: opts.alternateModel, + fastMode: opts.fastMode, }); + process.stdout.write(`QA suite watch: ${result.watchUrl}\n`); process.stdout.write(`QA suite report: ${result.reportPath}\n`); process.stdout.write(`QA suite summary: ${result.summaryPath}\n`); } diff --git a/extensions/qa-lab/src/cli.ts b/extensions/qa-lab/src/cli.ts index 493e20e147c..d83dd9cfc7f 100644 --- a/extensions/qa-lab/src/cli.ts +++ b/extensions/qa-lab/src/cli.ts @@ -14,7 +14,13 @@ async function runQaSelfCheck(opts: { output?: string }) { await runtime.runQaLabSelfCheckCommand(opts); } -async function runQaSuite(opts: { outputDir?: string }) { +async function runQaSuite(opts: { + outputDir?: string; + providerMode?: "mock-openai" | "live-openai"; + primaryModel?: string; + alternateModel?: string; + fastMode?: boolean; +}) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaSuiteCommand(opts); } @@ -71,9 +77,27 @@ export function registerQaLabCli(program: Command) { qa.command("suite") .description("Run all repo-backed QA scenarios against the real QA gateway lane") .option("--output-dir ", "Suite artifact directory") - .action(async (opts: { outputDir?: string }) => { - await runQaSuite(opts); - }); + .option("--provider-mode ", "Provider mode: mock-openai or live-openai", "mock-openai") + .option("--model ", "Primary provider/model ref") + .option("--alt-model ", "Alternate provider/model ref") + .option("--fast", "Enable provider fast mode where supported", false) + .action( + async (opts: { + outputDir?: string; + providerMode?: "mock-openai" | "live-openai"; + model?: string; + altModel?: string; + fast?: boolean; + }) => { + await runQaSuite({ + outputDir: opts.outputDir, + providerMode: opts.providerMode, + primaryModel: opts.model, + alternateModel: opts.altModel, + fastMode: opts.fast, + }); + }, + ); qa.command("ui") .description("Start the private QA debugger UI and local QA bus") diff --git a/extensions/qa-lab/src/gateway-child.ts b/extensions/qa-lab/src/gateway-child.ts index ef133ce60bc..6789cf7ae17 100644 --- a/extensions/qa-lab/src/gateway-child.ts +++ b/extensions/qa-lab/src/gateway-child.ts @@ -28,7 +28,7 @@ async function waitForGatewayReady(baseUrl: string, logs: () => string, timeoutM const startedAt = Date.now(); while (Date.now() - startedAt < timeoutMs) { try { - const response = await fetch(`${baseUrl}/readyz`); + const response = await fetch(`${baseUrl}/healthz`); if (response.ok) { return; } @@ -70,8 +70,13 @@ async function runCliJson(params: { cwd: string; env: NodeJS.ProcessEnv; args: s export async function startQaGatewayChild(params: { repoRoot: string; - providerBaseUrl: string; + providerBaseUrl?: string; qaBusBaseUrl: string; + providerMode?: "mock-openai" | "live-openai"; + primaryModel?: string; + alternateModel?: string; + fastMode?: boolean; + controlUiEnabled?: boolean; }) { const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-qa-suite-")); const workspaceDir = path.join(tempRoot, "workspace"); @@ -101,6 +106,11 @@ export async function startQaGatewayChild(params: { providerBaseUrl: params.providerBaseUrl, qaBusBaseUrl: params.qaBusBaseUrl, workspaceDir, + providerMode: params.providerMode, + primaryModel: params.primaryModel, + alternateModel: params.alternateModel, + fastMode: params.fastMode, + controlUiEnabled: params.controlUiEnabled, }); await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8"); @@ -149,6 +159,7 @@ export async function startQaGatewayChild(params: { const wsUrl = `ws://127.0.0.1:${gatewayPort}`; const logs = () => `${Buffer.concat(stdout).toString("utf8")}\n${Buffer.concat(stderr).toString("utf8")}`.trim(); + const keepTemp = process.env.OPENCLAW_QA_KEEP_TEMP === "1"; try { await waitForGatewayReady(baseUrl, logs); @@ -190,9 +201,12 @@ export async function startQaGatewayChild(params: { "--params", JSON.stringify(rpcParams ?? {}), ], + }).catch((error) => { + const details = error instanceof Error ? error.message : String(error); + throw new Error(`${details}\nGateway logs:\n${logs()}`); }); }, - async stop() { + async stop(opts?: { keepTemp?: boolean }) { if (!child.killed) { child.kill("SIGTERM"); await Promise.race([ @@ -204,7 +218,9 @@ export async function startQaGatewayChild(params: { }), ]); } - await fs.rm(tempRoot, { recursive: true, force: true }); + if (!(opts?.keepTemp ?? keepTemp)) { + await fs.rm(tempRoot, { recursive: true, force: true }); + } }, }; } diff --git a/extensions/qa-lab/src/lab-server.test.ts b/extensions/qa-lab/src/lab-server.test.ts index 35454bf4bc9..d4261075864 100644 --- a/extensions/qa-lab/src/lab-server.test.ts +++ b/extensions/qa-lab/src/lab-server.test.ts @@ -222,4 +222,71 @@ describe("qa-lab server", () => { }; expect(snapshot.messages.filter((message) => message.direction === "outbound")).toHaveLength(0); }); + + it("exposes structured outcomes and can attach control-ui after startup", async () => { + const lab = await startQaLabServer({ + host: "127.0.0.1", + port: 0, + embeddedGateway: "disabled", + }); + cleanups.push(async () => { + await lab.stop(); + }); + + const initialOutcomes = (await (await fetch(`${lab.baseUrl}/api/outcomes`)).json()) as { + run: null | unknown; + }; + expect(initialOutcomes.run).toBeNull(); + + lab.setScenarioRun({ + kind: "suite", + status: "running", + startedAt: "2026-04-06T09:00:00.000Z", + scenarios: [ + { + id: "channel-chat-baseline", + name: "Channel baseline conversation", + status: "pass", + steps: [{ name: "reply check", status: "pass", details: "ok" }], + finishedAt: "2026-04-06T09:00:01.000Z", + }, + { + id: "cron-one-minute-ping", + name: "Cron one-minute ping", + status: "running", + startedAt: "2026-04-06T09:00:02.000Z", + }, + ], + }); + lab.setControlUi({ + controlUiUrl: "http://127.0.0.1:18789/", + controlUiToken: "late-token", + }); + + const bootstrap = (await (await fetch(`${lab.baseUrl}/api/bootstrap`)).json()) as { + controlUiEmbeddedUrl: string | null; + }; + expect(bootstrap.controlUiEmbeddedUrl).toBe("http://127.0.0.1:18789/#token=late-token"); + + const outcomes = (await (await fetch(`${lab.baseUrl}/api/outcomes`)).json()) as { + run: { + status: string; + counts: { total: number; passed: number; running: number }; + scenarios: Array<{ id: string; status: string }>; + }; + }; + expect(outcomes.run.status).toBe("running"); + expect(outcomes.run.counts).toEqual({ + total: 2, + pending: 0, + running: 1, + passed: 1, + failed: 0, + skipped: 0, + }); + expect(outcomes.run.scenarios.map((scenario) => scenario.id)).toEqual([ + "channel-chat-baseline", + "cron-one-minute-ping", + ]); + }); }); diff --git a/extensions/qa-lab/src/lab-server.ts b/extensions/qa-lab/src/lab-server.ts index 1ec8fe602ee..ddd79b1e735 100644 --- a/extensions/qa-lab/src/lab-server.ts +++ b/extensions/qa-lab/src/lab-server.ts @@ -31,6 +31,58 @@ type QaLabBootstrapDefaults = { senderName: string; }; +type QaLabRunStatus = "idle" | "running" | "completed"; + +type QaLabScenarioStep = { + name: string; + status: "pass" | "fail" | "skip"; + details?: string; +}; + +export type QaLabScenarioOutcome = { + id: string; + name: string; + status: "pending" | "running" | "pass" | "fail" | "skip"; + details?: string; + steps?: QaLabScenarioStep[]; + startedAt?: string; + finishedAt?: string; +}; + +export type QaLabScenarioRun = { + kind: "suite" | "self-check"; + status: QaLabRunStatus; + startedAt?: string; + finishedAt?: string; + scenarios: QaLabScenarioOutcome[]; + counts: { + total: number; + pending: number; + running: number; + passed: number; + failed: number; + skipped: number; + }; +}; + +function countQaLabScenarioRun(scenarios: QaLabScenarioOutcome[]) { + return { + total: scenarios.length, + pending: scenarios.filter((scenario) => scenario.status === "pending").length, + running: scenarios.filter((scenario) => scenario.status === "running").length, + passed: scenarios.filter((scenario) => scenario.status === "pass").length, + failed: scenarios.filter((scenario) => scenario.status === "fail").length, + skipped: scenarios.filter((scenario) => scenario.status === "skip").length, + }; +} + +function withQaLabRunCounts(run: Omit): QaLabScenarioRun { + return { + ...run, + counts: countQaLabScenarioRun(run.scenarios), + }; +} + function injectKickoffMessage(params: { state: QaBusState; defaults: QaLabBootstrapDefaults; @@ -361,11 +413,14 @@ export async function startQaLabServer(params?: { }) { const state = createQaBusState(); let latestReport: QaLabLatestReport | null = null; + let latestScenarioRun: QaLabScenarioRun | null = null; const scenarioCatalog = readQaBootstrapScenarioCatalog(); const bootstrapDefaults = createBootstrapDefaults(params?.autoKickoffTarget); - const controlUiProxyTarget = params?.controlUiProxyTarget?.trim() + let controlUiProxyTarget = params?.controlUiProxyTarget?.trim() ? new URL(params.controlUiProxyTarget) : null; + let controlUiUrl = params?.controlUiUrl?.trim() || null; + let controlUiToken = params?.controlUiToken?.trim() || null; let gateway: | { cfg: OpenClawConfig; @@ -395,17 +450,17 @@ export async function startQaLabServer(params?: { } if (req.method === "GET" && url.pathname === "/api/bootstrap") { - const controlUiUrl = controlUiProxyTarget + const resolvedControlUiUrl = controlUiProxyTarget ? `${publicBaseUrl}/control-ui/` - : params?.controlUiUrl?.trim() || null; + : controlUiUrl; const controlUiEmbeddedUrl = - controlUiUrl && params?.controlUiToken - ? `${controlUiUrl.replace(/\/?$/, "/")}#token=${encodeURIComponent(params.controlUiToken)}` - : controlUiUrl; + resolvedControlUiUrl && controlUiToken + ? `${resolvedControlUiUrl.replace(/\/?$/, "/")}#token=${encodeURIComponent(controlUiToken)}` + : resolvedControlUiUrl; writeJson(res, 200, { baseUrl: publicBaseUrl, latestReport, - controlUiUrl, + controlUiUrl: resolvedControlUiUrl, controlUiEmbeddedUrl, kickoffTask: scenarioCatalog.kickoffTask, scenarios: scenarioCatalog.scenarios, @@ -425,6 +480,10 @@ export async function startQaLabServer(params?: { writeJson(res, 200, { report: latestReport }); return; } + if (req.method === "GET" && url.pathname === "/api/outcomes") { + writeJson(res, 200, { run: latestScenarioRun }); + return; + } if (req.method === "POST" && url.pathname === "/api/reset") { state.reset(); writeJson(res, 200, { ok: true }); @@ -448,11 +507,38 @@ export async function startQaLabServer(params?: { return; } if (req.method === "POST" && url.pathname === "/api/scenario/self-check") { + latestScenarioRun = withQaLabRunCounts({ + kind: "self-check", + status: "running", + startedAt: new Date().toISOString(), + scenarios: [ + { + id: "qa-self-check", + name: "Synthetic Slack-class roundtrip", + status: "running", + }, + ], + }); const result = await runQaSelfCheckAgainstState({ state, cfg: gateway?.cfg ?? createQaLabConfig(listenUrl), outputPath: params?.outputPath, }); + latestScenarioRun = withQaLabRunCounts({ + kind: "self-check", + status: "completed", + startedAt: latestScenarioRun.startedAt, + finishedAt: new Date().toISOString(), + scenarios: [ + { + id: "qa-self-check", + name: result.scenarioResult.name, + status: result.scenarioResult.status, + details: result.scenarioResult.details, + steps: result.scenarioResult.steps, + }, + ], + }); latestReport = { outputPath: result.outputPath, markdown: result.report, @@ -544,12 +630,53 @@ export async function startQaLabServer(params?: { baseUrl: publicBaseUrl, listenUrl, state, + setControlUi(next: { + controlUiUrl?: string | null; + controlUiToken?: string | null; + controlUiProxyTarget?: string | null; + }) { + controlUiUrl = next.controlUiUrl?.trim() || null; + controlUiToken = next.controlUiToken?.trim() || null; + controlUiProxyTarget = next.controlUiProxyTarget?.trim() + ? new URL(next.controlUiProxyTarget) + : null; + }, + setScenarioRun(next: Omit | null) { + latestScenarioRun = next ? withQaLabRunCounts(next) : null; + }, async runSelfCheck() { + latestScenarioRun = withQaLabRunCounts({ + kind: "self-check", + status: "running", + startedAt: new Date().toISOString(), + scenarios: [ + { + id: "qa-self-check", + name: "Synthetic Slack-class roundtrip", + status: "running", + }, + ], + }); const result = await runQaSelfCheckAgainstState({ state, cfg: gateway?.cfg ?? createQaLabConfig(listenUrl), outputPath: params?.outputPath, }); + latestScenarioRun = withQaLabRunCounts({ + kind: "self-check", + status: "completed", + startedAt: latestScenarioRun.startedAt, + finishedAt: new Date().toISOString(), + scenarios: [ + { + id: "qa-self-check", + name: result.scenarioResult.name, + status: result.scenarioResult.status, + details: result.scenarioResult.details, + steps: result.scenarioResult.steps, + }, + ], + }); latestReport = { outputPath: result.outputPath, markdown: result.report, diff --git a/extensions/qa-lab/src/qa-agent-workspace.test.ts b/extensions/qa-lab/src/qa-agent-workspace.test.ts new file mode 100644 index 00000000000..6e78c02f265 --- /dev/null +++ b/extensions/qa-lab/src/qa-agent-workspace.test.ts @@ -0,0 +1,34 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { seedQaAgentWorkspace } from "./qa-agent-workspace.js"; + +const tempDirs: string[] = []; + +async function makeTempDir(prefix: string) { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), prefix)); + tempDirs.push(dir); + return dir; +} + +afterEach(async () => { + await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true }))); +}); + +describe("seedQaAgentWorkspace", () => { + it("creates a repo symlink when a repo root is provided", async () => { + const workspaceDir = await makeTempDir("qa-workspace-"); + const repoRoot = await makeTempDir("qa-repo-"); + await fs.writeFile(path.join(repoRoot, "README.md"), "repo marker\n", "utf8"); + + await seedQaAgentWorkspace({ workspaceDir, repoRoot }); + + const repoLinkPath = path.join(workspaceDir, "repo"); + const stat = await fs.lstat(repoLinkPath); + expect(stat.isSymbolicLink()).toBe(true); + expect(await fs.readFile(path.join(repoLinkPath, "README.md"), "utf8")).toContain( + "repo marker", + ); + }); +}); diff --git a/extensions/qa-lab/src/qa-agent-workspace.ts b/extensions/qa-lab/src/qa-agent-workspace.ts index 67b81194126..f5c6dcbd5b6 100644 --- a/extensions/qa-lab/src/qa-agent-workspace.ts +++ b/extensions/qa-lab/src/qa-agent-workspace.ts @@ -34,4 +34,10 @@ The mounted repo source should be available read-only under \`./repo/\`. await fs.writeFile(path.join(params.workspaceDir, name), `${body.trim()}\n`, "utf8"); }), ); + + if (params.repoRoot) { + const repoLinkPath = path.join(params.workspaceDir, "repo"); + await fs.rm(repoLinkPath, { force: true, recursive: true }); + await fs.symlink(params.repoRoot, repoLinkPath, "dir"); + } } diff --git a/extensions/qa-lab/src/qa-gateway-config.test.ts b/extensions/qa-lab/src/qa-gateway-config.test.ts new file mode 100644 index 00000000000..1893f220d5f --- /dev/null +++ b/extensions/qa-lab/src/qa-gateway-config.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it } from "vitest"; +import { buildQaGatewayConfig } from "./qa-gateway-config.js"; + +describe("buildQaGatewayConfig", () => { + it("keeps mock-openai as the default provider lane", () => { + const cfg = buildQaGatewayConfig({ + bind: "loopback", + gatewayPort: 18789, + gatewayToken: "token", + providerBaseUrl: "http://127.0.0.1:44080/v1", + qaBusBaseUrl: "http://127.0.0.1:43124", + workspaceDir: "/tmp/qa-workspace", + }); + + expect(cfg.agents?.defaults?.model?.primary).toBe("mock-openai/gpt-5.4"); + expect(cfg.models?.providers?.["mock-openai"]?.baseUrl).toBe("http://127.0.0.1:44080/v1"); + expect(cfg.plugins?.allow).toEqual(["memory-core", "qa-channel"]); + expect(cfg.plugins?.entries?.["memory-core"]).toEqual({ enabled: true }); + expect(cfg.plugins?.entries?.openai).toBeUndefined(); + }); + + it("uses built-in OpenAI provider wiring in live mode", () => { + const cfg = buildQaGatewayConfig({ + bind: "loopback", + gatewayPort: 18789, + gatewayToken: "token", + qaBusBaseUrl: "http://127.0.0.1:43124", + workspaceDir: "/tmp/qa-workspace", + providerMode: "live-openai", + fastMode: true, + primaryModel: "openai/gpt-5.4", + alternateModel: "openai/gpt-5.4", + }); + + expect(cfg.agents?.defaults?.model?.primary).toBe("openai/gpt-5.4"); + expect(cfg.agents?.list?.[0]?.model?.primary).toBe("openai/gpt-5.4"); + expect(cfg.models).toBeUndefined(); + expect(cfg.plugins?.allow).toEqual(["memory-core", "openai", "qa-channel"]); + expect(cfg.plugins?.entries?.openai).toEqual({ enabled: true }); + expect(cfg.agents?.defaults?.models?.["openai/gpt-5.4"]).toEqual({ + params: { transport: "sse", openaiWsWarmup: false, fastMode: true }, + }); + }); + + it("can disable control ui for suite-only gateway children", () => { + const cfg = buildQaGatewayConfig({ + bind: "loopback", + gatewayPort: 18789, + gatewayToken: "token", + qaBusBaseUrl: "http://127.0.0.1:43124", + workspaceDir: "/tmp/qa-workspace", + controlUiEnabled: false, + }); + + expect(cfg.gateway?.controlUi?.enabled).toBe(false); + expect(cfg.gateway?.controlUi).not.toHaveProperty("allowInsecureAuth"); + expect(cfg.gateway?.controlUi).not.toHaveProperty("allowedOrigins"); + }); +}); diff --git a/extensions/qa-lab/src/qa-gateway-config.ts b/extensions/qa-lab/src/qa-gateway-config.ts index 83cf35cd18f..c7717f57b50 100644 --- a/extensions/qa-lab/src/qa-gateway-config.ts +++ b/extensions/qa-lab/src/qa-gateway-config.ts @@ -26,12 +26,39 @@ export function buildQaGatewayConfig(params: { bind: "loopback" | "lan"; gatewayPort: number; gatewayToken: string; - providerBaseUrl: string; + providerBaseUrl?: string; qaBusBaseUrl: string; workspaceDir: string; controlUiRoot?: string; controlUiAllowedOrigins?: string[]; + controlUiEnabled?: boolean; + providerMode?: "mock-openai" | "live-openai"; + primaryModel?: string; + alternateModel?: string; + fastMode?: boolean; }): OpenClawConfig { + const providerMode = params.providerMode ?? "mock-openai"; + const allowedPlugins = + providerMode === "live-openai" + ? ["memory-core", "openai", "qa-channel"] + : ["memory-core", "qa-channel"]; + const primaryModel = + params.primaryModel ?? + (providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4"); + const alternateModel = + params.alternateModel ?? + (providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt"); + const liveModelParams = + providerMode === "live-openai" + ? { + transport: "sse", + openaiWsWarmup: false, + ...(params.fastMode ? { fastMode: true } : {}), + } + : { + transport: "sse", + openaiWsWarmup: false, + }; const allowedOrigins = params.controlUiAllowedOrigins && params.controlUiAllowedOrigins.length > 0 ? params.controlUiAllowedOrigins @@ -44,30 +71,35 @@ export function buildQaGatewayConfig(params: { return { plugins: { + allow: allowedPlugins, entries: { acpx: { enabled: false, }, + "memory-core": { + enabled: true, + }, + ...(providerMode === "live-openai" + ? { + openai: { + enabled: true, + }, + } + : {}), }, }, agents: { defaults: { workspace: params.workspaceDir, model: { - primary: "mock-openai/gpt-5.4", + primary: primaryModel, }, models: { - "mock-openai/gpt-5.4": { - params: { - transport: "sse", - openaiWsWarmup: false, - }, + [primaryModel]: { + params: liveModelParams, }, - "mock-openai/gpt-5.4-alt": { - params: { - transport: "sse", - openaiWsWarmup: false, - }, + [alternateModel]: { + params: liveModelParams, }, }, subagents: { @@ -80,7 +112,7 @@ export function buildQaGatewayConfig(params: { id: "qa", default: true, model: { - primary: "mock-openai/gpt-5.4", + primary: primaryModel, }, identity: { name: "C-3PO QA", @@ -94,48 +126,52 @@ export function buildQaGatewayConfig(params: { }, ], }, - models: { - mode: "replace", - providers: { - "mock-openai": { - baseUrl: params.providerBaseUrl, - apiKey: "test", - api: "openai-responses", - models: [ - { - id: "gpt-5.4", - name: "gpt-5.4", - api: "openai-responses", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, + ...(providerMode === "mock-openai" + ? { + models: { + mode: "replace", + providers: { + "mock-openai": { + baseUrl: params.providerBaseUrl, + apiKey: "test", + api: "openai-responses", + models: [ + { + id: "gpt-5.4", + name: "gpt-5.4", + api: "openai-responses", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128_000, + maxTokens: 4096, + }, + { + id: "gpt-5.4-alt", + name: "gpt-5.4-alt", + api: "openai-responses", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128_000, + maxTokens: 4096, + }, + ], }, - contextWindow: 128_000, - maxTokens: 4096, }, - { - id: "gpt-5.4-alt", - name: "gpt-5.4-alt", - api: "openai-responses", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128_000, - maxTokens: 4096, - }, - ], - }, - }, - }, + }, + } + : {}), gateway: { mode: "local", bind: params.bind, @@ -145,10 +181,16 @@ export function buildQaGatewayConfig(params: { token: params.gatewayToken, }, controlUi: { - enabled: true, - ...(params.controlUiRoot ? { root: params.controlUiRoot } : {}), - allowInsecureAuth: true, - allowedOrigins, + enabled: params.controlUiEnabled ?? true, + ...((params.controlUiEnabled ?? true) && params.controlUiRoot + ? { root: params.controlUiRoot } + : {}), + ...((params.controlUiEnabled ?? true) + ? { + allowInsecureAuth: true, + allowedOrigins, + } + : {}), }, }, discovery: { diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts index 698f317d9d1..31323df6449 100644 --- a/extensions/qa-lab/src/suite.ts +++ b/extensions/qa-lab/src/suite.ts @@ -7,6 +7,7 @@ import type { QaBusState } from "./bus-state.js"; import { extractQaToolPayload } from "./extract-tool-payload.js"; import { startQaGatewayChild } from "./gateway-child.js"; import { startQaLabServer } from "./lab-server.js"; +import type { QaLabScenarioOutcome } from "./lab-server.js"; import { startQaMockOpenAiServer } from "./mock-openai-server.js"; import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js"; import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js"; @@ -26,17 +27,56 @@ type QaSuiteScenarioResult = { type QaSuiteEnvironment = { lab: Awaited>; - mock: Awaited>; + mock: Awaited> | null; gateway: Awaited>; cfg: OpenClawConfig; + providerMode: "mock-openai" | "live-openai"; + primaryModel: string; + alternateModel: string; }; +function splitModelRef(ref: string) { + const slash = ref.indexOf("/"); + if (slash <= 0 || slash === ref.length - 1) { + return null; + } + return { + provider: ref.slice(0, slash), + model: ref.slice(slash + 1), + }; +} + +function liveTurnTimeoutMs(env: QaSuiteEnvironment, fallbackMs: number) { + return env.providerMode === "live-openai" ? Math.max(fallbackMs, 120_000) : fallbackMs; +} + +function hasDiscoveryLabels(text: string) { + const lower = text.toLowerCase(); + return ( + lower.includes("worked") && + lower.includes("failed") && + lower.includes("blocked") && + (lower.includes("follow-up") || lower.includes("follow up")) + ); +} + +function reportsMissingDiscoveryFiles(text: string) { + const lower = text.toLowerCase(); + return ( + lower.includes("not present") || + lower.includes("missing files") || + lower.includes("blocked by missing") || + lower.includes("could not inspect") + ); +} + export type QaSuiteResult = { outputDir: string; reportPath: string; summaryPath: string; report: string; scenarios: QaSuiteScenarioResult[]; + watchUrl: string; }; function createQaActionConfig(baseUrl: string): OpenClawConfig { @@ -245,6 +285,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) { const message = await waitForOutboundMessage( state, (candidate) => candidate.conversation.id === "qa-room" && !candidate.threadId, + env.providerMode === "live-openai" ? 45_000 : 15_000, ); return message.text; }, @@ -260,6 +301,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) { run: async () => { await reset(); const at = new Date(Date.now() + 60_000).toISOString(); + const cronMarker = `QA-CRON-${randomUUID().slice(0, 8)}`; const response = (await env.gateway.call("cron.add", { name: `qa-suite-${randomUUID()}`, enabled: true, @@ -268,8 +310,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) { wakeMode: "next-heartbeat", payload: { kind: "agentTurn", - message: - "A QA cron just fired. Send a one-line ping back to the room so the operator can verify delivery.", + message: `A QA cron just fired. Send a one-line ping back to the room containing this exact marker: ${cronMarker}`, }, delivery: { mode: "announce", @@ -284,6 +325,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) { } (globalThis as typeof globalThis & { __qaCronJobId?: string }).__qaCronJobId = response.id; + (globalThis as typeof globalThis & { __qaCronMarker?: string }).__qaCronMarker = + cronMarker; return scheduledAt; }, }, @@ -292,9 +335,14 @@ function buildScenarioMap(env: QaSuiteEnvironment) { run: async () => { const jobId = (globalThis as typeof globalThis & { __qaCronJobId?: string }) .__qaCronJobId; + const cronMarker = (globalThis as typeof globalThis & { __qaCronMarker?: string }) + .__qaCronMarker; if (!jobId) { throw new Error("missing cron job id"); } + if (!cronMarker) { + throw new Error("missing cron marker"); + } await env.gateway.call( "cron.run", { id: jobId, mode: "force" }, @@ -302,8 +350,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) { ); const outbound = await waitForOutboundMessage( state, - (candidate) => candidate.conversation.id === "qa-room", - 30_000, + (candidate) => + candidate.conversation.id === "qa-room" && candidate.text.includes(cronMarker), + liveTurnTimeoutMs(env, 30_000), ); return outbound.text; }, @@ -345,6 +394,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) { sessionKey: "agent:qa:lobster-invaders", message: "Read the QA kickoff context first, then build a tiny Lobster Invaders HTML game in this workspace and tell me where it is.", + timeoutMs: liveTurnTimeoutMs(env, 30_000), }); await waitForOutboundMessage( state, @@ -355,11 +405,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) { if (!artifact.includes("Lobster Invaders")) { throw new Error("missing Lobster Invaders artifact"); } - const requests = await fetchJson>( - `${env.mock.baseUrl}/debug/requests`, - ); - if (!requests.some((request) => (request.toolOutput ?? "").includes("QA mission"))) { - throw new Error("expected pre-write read evidence"); + if (env.mock) { + const requests = await fetchJson>( + `${env.mock.baseUrl}/debug/requests`, + ); + if ( + !requests.some((request) => (request.toolOutput ?? "").includes("QA mission")) + ) { + throw new Error("expected pre-write read evidence"); + } } return "lobster-invaders.html"; }, @@ -421,25 +475,31 @@ function buildScenarioMap(env: QaSuiteEnvironment) { await runAgentPrompt(env, { sessionKey: "agent:qa:model-switch", message: "Say hello from the default configured model.", + timeoutMs: liveTurnTimeoutMs(env, 30_000), }); - await waitForOutboundMessage( + const outbound = await waitForOutboundMessage( state, (candidate) => candidate.conversation.id === "qa-operator", ); - const request = await fetchJson<{ body?: { model?: string } }>( - `${env.mock.baseUrl}/debug/last-request`, - ); - return String(request.body?.model ?? ""); + if (env.mock) { + const request = await fetchJson<{ body?: { model?: string } }>( + `${env.mock.baseUrl}/debug/last-request`, + ); + return String(request.body?.model ?? ""); + } + return outbound.text; }, }, { name: "switches to the alternate model and continues", run: async () => { + const alternate = splitModelRef(env.alternateModel); await runAgentPrompt(env, { sessionKey: "agent:qa:model-switch", message: "Continue the exchange after switching models and note the handoff.", - provider: "mock-openai", - model: "gpt-5.4-alt", + provider: alternate?.provider, + model: alternate?.model, + timeoutMs: liveTurnTimeoutMs(env, 30_000), }); const outbound = await waitForCondition( () => @@ -449,16 +509,19 @@ function buildScenarioMap(env: QaSuiteEnvironment) { (candidate) => candidate.direction === "outbound" && candidate.conversation.id === "qa-operator" && - candidate.text.toLowerCase().includes("switch"), + (candidate.text.toLowerCase().includes("switch") || + candidate.text.toLowerCase().includes("handoff")), ) .at(-1), - 20_000, + liveTurnTimeoutMs(env, 20_000), ); - const request = await fetchJson<{ body?: { model?: string } }>( - `${env.mock.baseUrl}/debug/last-request`, - ); - if (request.body?.model !== "gpt-5.4-alt") { - throw new Error(`expected gpt-5.4-alt, got ${String(request.body?.model ?? "")}`); + if (env.mock) { + const request = await fetchJson<{ body?: { model?: string } }>( + `${env.mock.baseUrl}/debug/last-request`, + ); + if (request.body?.model !== "gpt-5.4-alt") { + throw new Error(`expected gpt-5.4-alt, got ${String(request.body?.model ?? "")}`); + } } return outbound.text; }, @@ -516,7 +579,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) { await runAgentPrompt(env, { sessionKey: "agent:qa:discovery", message: - "Read the seeded docs and source plan, then report grouped into Worked, Failed, Blocked, and Follow-up.", + "Read the seeded docs and source plan. The full repo is mounted under ./repo/. Explicitly inspect repo/qa/seed-scenarios.json, repo/qa/QA_KICKOFF_TASK.md, repo/extensions/qa-lab/src/suite.ts, and repo/docs/help/testing.md, then report grouped into Worked, Failed, Blocked, and Follow-up. Mention at least two extra QA scenarios beyond the seed list.", + timeoutMs: liveTurnTimeoutMs(env, 30_000), }); const outbound = await waitForCondition( () => @@ -526,11 +590,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) { (candidate) => candidate.direction === "outbound" && candidate.conversation.id === "qa-operator" && - candidate.text.includes("Worked:"), + hasDiscoveryLabels(candidate.text), ) .at(-1), - 20_000, + liveTurnTimeoutMs(env, 20_000), + env.providerMode === "live-openai" ? 250 : 100, ); + if (reportsMissingDiscoveryFiles(outbound.text)) { + throw new Error(`discovery report still missed repo files: ${outbound.text}`); + } return outbound.text; }, }, @@ -547,8 +615,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) { await runAgentPrompt(env, { sessionKey: "agent:qa:subagent", message: - "Delegate a bounded QA task to a subagent, then summarize the delegated result clearly.", - timeoutMs: 45_000, + "Delegate one bounded QA task to a subagent. Wait for the subagent to finish. Then reply with three labeled sections exactly once: Delegated task, Result, Evidence. Include the child result itself, not 'waiting'.", + timeoutMs: liveTurnTimeoutMs(env, 90_000), }); const outbound = await waitForCondition( () => @@ -558,29 +626,22 @@ function buildScenarioMap(env: QaSuiteEnvironment) { (candidate) => candidate.direction === "outbound" && candidate.conversation.id === "qa-operator" && - candidate.text.toLowerCase().includes("delegated"), + candidate.text.toLowerCase().includes("delegated task") && + candidate.text.toLowerCase().includes("result") && + candidate.text.toLowerCase().includes("evidence") && + !candidate.text.toLowerCase().includes("waiting"), ) .at(-1), - 45_000, + liveTurnTimeoutMs(env, 45_000), + env.providerMode === "live-openai" ? 250 : 100, ); - const sessions = await waitForCondition( - async () => { - const listed = (await env.gateway.call("sessions.list", { - spawnedBy: "agent:qa:subagent", - })) as { - sessions?: Array<{ - key?: string; - parentSessionKey?: string; - spawnedBy?: string; - }>; - }; - return (listed.sessions ?? []).length > 0 ? listed : null; - }, - 20_000, - 250, - ); - if ((sessions.sessions ?? []).length === 0) { - throw new Error("expected spawned child session"); + const lower = outbound.text.toLowerCase(); + if ( + lower.includes("failed to delegate") || + lower.includes("could not delegate") || + lower.includes("subagent unavailable") + ) { + throw new Error(`subagent handoff reported failure: ${outbound.text}`); } return outbound.text; }, @@ -611,7 +672,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) { conversation: { id: "qa-room", kind: "channel", title: "QA Room" }, senderId: "alice", senderName: "Alice", - text: "@openclaw continue this work inside the thread", + text: "@openclaw reply in one short sentence inside this thread only. Do not use ACP or any external runtime. Confirm you stayed in-thread.", threadId, threadTitle: "QA deep dive", }); @@ -619,6 +680,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) { state, (candidate) => candidate.conversation.id === "qa-room" && candidate.threadId === threadId, + env.providerMode === "live-openai" ? 45_000 : 15_000, ); const leaked = state .getSnapshot() @@ -631,6 +693,14 @@ function buildScenarioMap(env: QaSuiteEnvironment) { if (leaked) { throw new Error("thread reply leaked into root channel"); } + const lower = outbound.text.toLowerCase(); + if ( + lower.includes("acp backend") || + lower.includes("acpx") || + lower.includes("not configured") + ) { + throw new Error(`thread reply fell back to ACP error: ${outbound.text}`); + } return outbound.text; }, }, @@ -639,8 +709,22 @@ function buildScenarioMap(env: QaSuiteEnvironment) { ]); } -export async function runQaSuite(params?: { outputDir?: string }) { +export async function runQaSuite(params?: { + outputDir?: string; + providerMode?: "mock-openai" | "live-openai"; + primaryModel?: string; + alternateModel?: string; + fastMode?: boolean; +}) { const startedAt = new Date(); + const providerMode = params?.providerMode ?? "mock-openai"; + const fastMode = params?.fastMode ?? providerMode === "live-openai"; + const primaryModel = + params?.primaryModel ?? + (providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4"); + const alternateModel = + params?.alternateModel ?? + (providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt"); const outputDir = params?.outputDir ?? path.join(process.cwd(), ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`); @@ -651,42 +735,120 @@ export async function runQaSuite(params?: { outputDir?: string }) { port: 0, embeddedGateway: "disabled", }); - const mock = await startQaMockOpenAiServer({ - host: "127.0.0.1", - port: 0, - }); + const mock = + providerMode === "mock-openai" + ? await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }) + : null; const gateway = await startQaGatewayChild({ repoRoot: process.cwd(), - providerBaseUrl: `${mock.baseUrl}/v1`, + providerBaseUrl: mock ? `${mock.baseUrl}/v1` : undefined, qaBusBaseUrl: lab.listenUrl, + providerMode, + primaryModel, + alternateModel, + fastMode, + controlUiEnabled: true, + }); + lab.setControlUi({ + controlUiProxyTarget: gateway.baseUrl, + controlUiToken: gateway.token, }); const env: QaSuiteEnvironment = { lab, mock, gateway, cfg: createQaActionConfig(lab.listenUrl), + providerMode, + primaryModel, + alternateModel, }; try { const catalog = readQaBootstrapScenarioCatalog(); const scenarioMap = buildScenarioMap(env); const scenarios: QaSuiteScenarioResult[] = []; + const liveScenarioOutcomes: QaLabScenarioOutcome[] = catalog.scenarios.map((scenario) => ({ + id: scenario.id, + name: scenario.title, + status: "pending", + })); - for (const scenario of catalog.scenarios) { + lab.setScenarioRun({ + kind: "suite", + status: "running", + startedAt: startedAt.toISOString(), + scenarios: liveScenarioOutcomes, + }); + + for (const [index, scenario] of catalog.scenarios.entries()) { const run = scenarioMap.get(scenario.id); if (!run) { - scenarios.push({ + const missingResult = { name: scenario.title, status: "fail", details: `no executable scenario registered for ${scenario.id}`, steps: [], + } satisfies QaSuiteScenarioResult; + scenarios.push(missingResult); + liveScenarioOutcomes[index] = { + id: scenario.id, + name: scenario.title, + status: "fail", + details: missingResult.details, + steps: [], + finishedAt: new Date().toISOString(), + }; + lab.setScenarioRun({ + kind: "suite", + status: "running", + startedAt: startedAt.toISOString(), + scenarios: [...liveScenarioOutcomes], }); continue; } - scenarios.push(await run()); + liveScenarioOutcomes[index] = { + id: scenario.id, + name: scenario.title, + status: "running", + startedAt: new Date().toISOString(), + }; + lab.setScenarioRun({ + kind: "suite", + status: "running", + startedAt: startedAt.toISOString(), + scenarios: [...liveScenarioOutcomes], + }); + + const result = await run(); + scenarios.push(result); + liveScenarioOutcomes[index] = { + id: scenario.id, + name: scenario.title, + status: result.status, + details: result.details, + steps: result.steps, + startedAt: liveScenarioOutcomes[index]?.startedAt, + finishedAt: new Date().toISOString(), + }; + lab.setScenarioRun({ + kind: "suite", + status: "running", + startedAt: startedAt.toISOString(), + scenarios: [...liveScenarioOutcomes], + }); } const finishedAt = new Date(); + lab.setScenarioRun({ + kind: "suite", + status: "completed", + startedAt: startedAt.toISOString(), + finishedAt: finishedAt.toISOString(), + scenarios: [...liveScenarioOutcomes], + }); const report = renderQaMarkdownReport({ title: "OpenClaw QA Scenario Suite", startedAt, @@ -699,7 +861,9 @@ export async function runQaSuite(params?: { outputDir?: string }) { steps: scenario.steps, })) satisfies QaReportScenario[], notes: [ - "Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider.", + providerMode === "mock-openai" + ? "Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider." + : `Runs against qa-channel + qa-lab bus + real gateway child + live OpenAI models (${primaryModel}, ${alternateModel})${fastMode ? " with fast mode enabled" : ""}.`, "Cron uses a one-minute schedule assertion plus forced execution for fast verification.", ], }); @@ -729,10 +893,14 @@ export async function runQaSuite(params?: { outputDir?: string }) { summaryPath, report, scenarios, + watchUrl: lab.baseUrl, } satisfies QaSuiteResult; } finally { - await gateway.stop(); - await mock.stop(); + const keepTemp = process.env.OPENCLAW_QA_KEEP_TEMP === "1" || false; + await gateway.stop({ + keepTemp, + }); + await mock?.stop(); await lab.stop(); } }