mirror of https://github.com/openclaw/openclaw.git
feat(qa): add live suite runner and harness
This commit is contained in:
parent
4bb965e007
commit
508024ae3b
|
|
@ -0,0 +1,86 @@
|
|||
---
|
||||
name: openclaw-qa-testing
|
||||
description: Run, watch, debug, and extend OpenClaw QA testing with qa-lab and qa-channel. Use when Codex needs to execute the repo-backed QA suite, inspect live QA artifacts, debug failing scenarios, add new QA scenarios, or explain the OpenClaw QA workflow. Prefer the live OpenAI lane with regular openai/gpt-5.4 in fast mode; do not use gpt-5.4-pro or gpt-5.4-mini unless the user explicitly overrides that policy.
|
||||
---
|
||||
|
||||
# OpenClaw QA Testing
|
||||
|
||||
Use this skill for `qa-lab` / `qa-channel` work. Repo-local QA only.
|
||||
|
||||
## Read first
|
||||
|
||||
- `docs/concepts/qa-e2e-automation.md`
|
||||
- `docs/help/testing.md`
|
||||
- `docs/channels/qa-channel.md`
|
||||
- `qa/QA_KICKOFF_TASK.md`
|
||||
- `qa/seed-scenarios.json`
|
||||
- `extensions/qa-lab/src/suite.ts`
|
||||
|
||||
## Model policy
|
||||
|
||||
- Live OpenAI lane: `openai/gpt-5.4`
|
||||
- Fast mode: on
|
||||
- Do not use:
|
||||
- `openai/gpt-5.4-pro`
|
||||
- `openai/gpt-5.4-mini`
|
||||
- Only change model policy if the user explicitly asks.
|
||||
|
||||
## Default workflow
|
||||
|
||||
1. Read the seed plan and current suite implementation.
|
||||
2. Decide lane:
|
||||
- mock/dev: `mock-openai`
|
||||
- real validation: `live-openai`
|
||||
3. For live OpenAI, use:
|
||||
|
||||
```bash
|
||||
OPENCLAW_LIVE_OPENAI_KEY="${OPENAI_API_KEY}" \
|
||||
pnpm openclaw qa suite \
|
||||
--provider-mode live-openai \
|
||||
--model openai/gpt-5.4 \
|
||||
--alt-model openai/gpt-5.4 \
|
||||
--fast \
|
||||
--output-dir .artifacts/qa-e2e/run-all-live-openai-<tag>
|
||||
```
|
||||
|
||||
4. Watch outputs:
|
||||
- summary: `.artifacts/qa-e2e/run-all-live-openai-<tag>/qa-suite-summary.json`
|
||||
- report: `.artifacts/qa-e2e/run-all-live-openai-<tag>/qa-suite-report.md`
|
||||
5. If the user wants to watch the live UI, find the current `openclaw-qa` listen port and report `http://127.0.0.1:<port>`.
|
||||
6. If a scenario fails, fix the product or harness root cause, then rerun the full lane.
|
||||
|
||||
## Repo facts
|
||||
|
||||
- Seed scenarios live in `qa/`.
|
||||
- Main live runner: `extensions/qa-lab/src/suite.ts`
|
||||
- QA lab server: `extensions/qa-lab/src/lab-server.ts`
|
||||
- Child gateway harness: `extensions/qa-lab/src/gateway-child.ts`
|
||||
- Synthetic channel: `extensions/qa-channel/`
|
||||
|
||||
## What “done” looks like
|
||||
|
||||
- Full suite green for the requested lane.
|
||||
- User gets:
|
||||
- watch URL if applicable
|
||||
- pass/fail counts
|
||||
- artifact paths
|
||||
- concise note on what was fixed
|
||||
|
||||
## Common failure patterns
|
||||
|
||||
- Live timeout too short:
|
||||
- widen live waits in `extensions/qa-lab/src/suite.ts`
|
||||
- Discovery cannot find repo files:
|
||||
- point prompts at `repo/...` inside seeded workspace
|
||||
- Subagent proof too brittle:
|
||||
- prefer stable final reply evidence over transient child-session listing
|
||||
- Harness “rebuild” delay:
|
||||
- dirty tree can trigger a pre-run build; expect that before ports appear
|
||||
|
||||
## When adding scenarios
|
||||
|
||||
- Add scenario metadata to `qa/seed-scenarios.json`
|
||||
- Keep kickoff expectations in `qa/QA_KICKOFF_TASK.md` aligned
|
||||
- Add executable coverage in `extensions/qa-lab/src/suite.ts`
|
||||
- Prefer end-to-end assertions over mock-only checks
|
||||
- Save outputs under `.artifacts/qa-e2e/`
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
interface:
|
||||
display_name: "QA Test OpenClaw"
|
||||
short_description: "Run and debug qa-lab and qa-channel scenarios"
|
||||
default_prompt: "Use $openclaw-qa-testing to run or extend the OpenClaw QA suite with qa-lab and qa-channel, using regular openai/gpt-5.4 in fast mode for live OpenAI runs."
|
||||
|
|
@ -16,10 +16,21 @@ export async function runQaLabSelfCheckCommand(opts: { output?: string }) {
|
|||
}
|
||||
}
|
||||
|
||||
export async function runQaSuiteCommand(opts: { outputDir?: string }) {
|
||||
export async function runQaSuiteCommand(opts: {
|
||||
outputDir?: string;
|
||||
providerMode?: "mock-openai" | "live-openai";
|
||||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
}) {
|
||||
const result = await runQaSuite({
|
||||
outputDir: opts.outputDir ? path.resolve(opts.outputDir) : undefined,
|
||||
providerMode: opts.providerMode,
|
||||
primaryModel: opts.primaryModel,
|
||||
alternateModel: opts.alternateModel,
|
||||
fastMode: opts.fastMode,
|
||||
});
|
||||
process.stdout.write(`QA suite watch: ${result.watchUrl}\n`);
|
||||
process.stdout.write(`QA suite report: ${result.reportPath}\n`);
|
||||
process.stdout.write(`QA suite summary: ${result.summaryPath}\n`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,13 @@ async function runQaSelfCheck(opts: { output?: string }) {
|
|||
await runtime.runQaLabSelfCheckCommand(opts);
|
||||
}
|
||||
|
||||
async function runQaSuite(opts: { outputDir?: string }) {
|
||||
async function runQaSuite(opts: {
|
||||
outputDir?: string;
|
||||
providerMode?: "mock-openai" | "live-openai";
|
||||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
}) {
|
||||
const runtime = await loadQaLabCliRuntime();
|
||||
await runtime.runQaSuiteCommand(opts);
|
||||
}
|
||||
|
|
@ -71,9 +77,27 @@ export function registerQaLabCli(program: Command) {
|
|||
qa.command("suite")
|
||||
.description("Run all repo-backed QA scenarios against the real QA gateway lane")
|
||||
.option("--output-dir <path>", "Suite artifact directory")
|
||||
.action(async (opts: { outputDir?: string }) => {
|
||||
await runQaSuite(opts);
|
||||
});
|
||||
.option("--provider-mode <mode>", "Provider mode: mock-openai or live-openai", "mock-openai")
|
||||
.option("--model <ref>", "Primary provider/model ref")
|
||||
.option("--alt-model <ref>", "Alternate provider/model ref")
|
||||
.option("--fast", "Enable provider fast mode where supported", false)
|
||||
.action(
|
||||
async (opts: {
|
||||
outputDir?: string;
|
||||
providerMode?: "mock-openai" | "live-openai";
|
||||
model?: string;
|
||||
altModel?: string;
|
||||
fast?: boolean;
|
||||
}) => {
|
||||
await runQaSuite({
|
||||
outputDir: opts.outputDir,
|
||||
providerMode: opts.providerMode,
|
||||
primaryModel: opts.model,
|
||||
alternateModel: opts.altModel,
|
||||
fastMode: opts.fast,
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
qa.command("ui")
|
||||
.description("Start the private QA debugger UI and local QA bus")
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ async function waitForGatewayReady(baseUrl: string, logs: () => string, timeoutM
|
|||
const startedAt = Date.now();
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/readyz`);
|
||||
const response = await fetch(`${baseUrl}/healthz`);
|
||||
if (response.ok) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -70,8 +70,13 @@ async function runCliJson(params: { cwd: string; env: NodeJS.ProcessEnv; args: s
|
|||
|
||||
export async function startQaGatewayChild(params: {
|
||||
repoRoot: string;
|
||||
providerBaseUrl: string;
|
||||
providerBaseUrl?: string;
|
||||
qaBusBaseUrl: string;
|
||||
providerMode?: "mock-openai" | "live-openai";
|
||||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
controlUiEnabled?: boolean;
|
||||
}) {
|
||||
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-qa-suite-"));
|
||||
const workspaceDir = path.join(tempRoot, "workspace");
|
||||
|
|
@ -101,6 +106,11 @@ export async function startQaGatewayChild(params: {
|
|||
providerBaseUrl: params.providerBaseUrl,
|
||||
qaBusBaseUrl: params.qaBusBaseUrl,
|
||||
workspaceDir,
|
||||
providerMode: params.providerMode,
|
||||
primaryModel: params.primaryModel,
|
||||
alternateModel: params.alternateModel,
|
||||
fastMode: params.fastMode,
|
||||
controlUiEnabled: params.controlUiEnabled,
|
||||
});
|
||||
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
|
||||
|
||||
|
|
@ -149,6 +159,7 @@ export async function startQaGatewayChild(params: {
|
|||
const wsUrl = `ws://127.0.0.1:${gatewayPort}`;
|
||||
const logs = () =>
|
||||
`${Buffer.concat(stdout).toString("utf8")}\n${Buffer.concat(stderr).toString("utf8")}`.trim();
|
||||
const keepTemp = process.env.OPENCLAW_QA_KEEP_TEMP === "1";
|
||||
|
||||
try {
|
||||
await waitForGatewayReady(baseUrl, logs);
|
||||
|
|
@ -190,9 +201,12 @@ export async function startQaGatewayChild(params: {
|
|||
"--params",
|
||||
JSON.stringify(rpcParams ?? {}),
|
||||
],
|
||||
}).catch((error) => {
|
||||
const details = error instanceof Error ? error.message : String(error);
|
||||
throw new Error(`${details}\nGateway logs:\n${logs()}`);
|
||||
});
|
||||
},
|
||||
async stop() {
|
||||
async stop(opts?: { keepTemp?: boolean }) {
|
||||
if (!child.killed) {
|
||||
child.kill("SIGTERM");
|
||||
await Promise.race([
|
||||
|
|
@ -204,7 +218,9 @@ export async function startQaGatewayChild(params: {
|
|||
}),
|
||||
]);
|
||||
}
|
||||
await fs.rm(tempRoot, { recursive: true, force: true });
|
||||
if (!(opts?.keepTemp ?? keepTemp)) {
|
||||
await fs.rm(tempRoot, { recursive: true, force: true });
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -222,4 +222,71 @@ describe("qa-lab server", () => {
|
|||
};
|
||||
expect(snapshot.messages.filter((message) => message.direction === "outbound")).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("exposes structured outcomes and can attach control-ui after startup", async () => {
|
||||
const lab = await startQaLabServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
embeddedGateway: "disabled",
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await lab.stop();
|
||||
});
|
||||
|
||||
const initialOutcomes = (await (await fetch(`${lab.baseUrl}/api/outcomes`)).json()) as {
|
||||
run: null | unknown;
|
||||
};
|
||||
expect(initialOutcomes.run).toBeNull();
|
||||
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
status: "running",
|
||||
startedAt: "2026-04-06T09:00:00.000Z",
|
||||
scenarios: [
|
||||
{
|
||||
id: "channel-chat-baseline",
|
||||
name: "Channel baseline conversation",
|
||||
status: "pass",
|
||||
steps: [{ name: "reply check", status: "pass", details: "ok" }],
|
||||
finishedAt: "2026-04-06T09:00:01.000Z",
|
||||
},
|
||||
{
|
||||
id: "cron-one-minute-ping",
|
||||
name: "Cron one-minute ping",
|
||||
status: "running",
|
||||
startedAt: "2026-04-06T09:00:02.000Z",
|
||||
},
|
||||
],
|
||||
});
|
||||
lab.setControlUi({
|
||||
controlUiUrl: "http://127.0.0.1:18789/",
|
||||
controlUiToken: "late-token",
|
||||
});
|
||||
|
||||
const bootstrap = (await (await fetch(`${lab.baseUrl}/api/bootstrap`)).json()) as {
|
||||
controlUiEmbeddedUrl: string | null;
|
||||
};
|
||||
expect(bootstrap.controlUiEmbeddedUrl).toBe("http://127.0.0.1:18789/#token=late-token");
|
||||
|
||||
const outcomes = (await (await fetch(`${lab.baseUrl}/api/outcomes`)).json()) as {
|
||||
run: {
|
||||
status: string;
|
||||
counts: { total: number; passed: number; running: number };
|
||||
scenarios: Array<{ id: string; status: string }>;
|
||||
};
|
||||
};
|
||||
expect(outcomes.run.status).toBe("running");
|
||||
expect(outcomes.run.counts).toEqual({
|
||||
total: 2,
|
||||
pending: 0,
|
||||
running: 1,
|
||||
passed: 1,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
});
|
||||
expect(outcomes.run.scenarios.map((scenario) => scenario.id)).toEqual([
|
||||
"channel-chat-baseline",
|
||||
"cron-one-minute-ping",
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -31,6 +31,58 @@ type QaLabBootstrapDefaults = {
|
|||
senderName: string;
|
||||
};
|
||||
|
||||
type QaLabRunStatus = "idle" | "running" | "completed";
|
||||
|
||||
type QaLabScenarioStep = {
|
||||
name: string;
|
||||
status: "pass" | "fail" | "skip";
|
||||
details?: string;
|
||||
};
|
||||
|
||||
export type QaLabScenarioOutcome = {
|
||||
id: string;
|
||||
name: string;
|
||||
status: "pending" | "running" | "pass" | "fail" | "skip";
|
||||
details?: string;
|
||||
steps?: QaLabScenarioStep[];
|
||||
startedAt?: string;
|
||||
finishedAt?: string;
|
||||
};
|
||||
|
||||
export type QaLabScenarioRun = {
|
||||
kind: "suite" | "self-check";
|
||||
status: QaLabRunStatus;
|
||||
startedAt?: string;
|
||||
finishedAt?: string;
|
||||
scenarios: QaLabScenarioOutcome[];
|
||||
counts: {
|
||||
total: number;
|
||||
pending: number;
|
||||
running: number;
|
||||
passed: number;
|
||||
failed: number;
|
||||
skipped: number;
|
||||
};
|
||||
};
|
||||
|
||||
function countQaLabScenarioRun(scenarios: QaLabScenarioOutcome[]) {
|
||||
return {
|
||||
total: scenarios.length,
|
||||
pending: scenarios.filter((scenario) => scenario.status === "pending").length,
|
||||
running: scenarios.filter((scenario) => scenario.status === "running").length,
|
||||
passed: scenarios.filter((scenario) => scenario.status === "pass").length,
|
||||
failed: scenarios.filter((scenario) => scenario.status === "fail").length,
|
||||
skipped: scenarios.filter((scenario) => scenario.status === "skip").length,
|
||||
};
|
||||
}
|
||||
|
||||
function withQaLabRunCounts(run: Omit<QaLabScenarioRun, "counts">): QaLabScenarioRun {
|
||||
return {
|
||||
...run,
|
||||
counts: countQaLabScenarioRun(run.scenarios),
|
||||
};
|
||||
}
|
||||
|
||||
function injectKickoffMessage(params: {
|
||||
state: QaBusState;
|
||||
defaults: QaLabBootstrapDefaults;
|
||||
|
|
@ -361,11 +413,14 @@ export async function startQaLabServer(params?: {
|
|||
}) {
|
||||
const state = createQaBusState();
|
||||
let latestReport: QaLabLatestReport | null = null;
|
||||
let latestScenarioRun: QaLabScenarioRun | null = null;
|
||||
const scenarioCatalog = readQaBootstrapScenarioCatalog();
|
||||
const bootstrapDefaults = createBootstrapDefaults(params?.autoKickoffTarget);
|
||||
const controlUiProxyTarget = params?.controlUiProxyTarget?.trim()
|
||||
let controlUiProxyTarget = params?.controlUiProxyTarget?.trim()
|
||||
? new URL(params.controlUiProxyTarget)
|
||||
: null;
|
||||
let controlUiUrl = params?.controlUiUrl?.trim() || null;
|
||||
let controlUiToken = params?.controlUiToken?.trim() || null;
|
||||
let gateway:
|
||||
| {
|
||||
cfg: OpenClawConfig;
|
||||
|
|
@ -395,17 +450,17 @@ export async function startQaLabServer(params?: {
|
|||
}
|
||||
|
||||
if (req.method === "GET" && url.pathname === "/api/bootstrap") {
|
||||
const controlUiUrl = controlUiProxyTarget
|
||||
const resolvedControlUiUrl = controlUiProxyTarget
|
||||
? `${publicBaseUrl}/control-ui/`
|
||||
: params?.controlUiUrl?.trim() || null;
|
||||
: controlUiUrl;
|
||||
const controlUiEmbeddedUrl =
|
||||
controlUiUrl && params?.controlUiToken
|
||||
? `${controlUiUrl.replace(/\/?$/, "/")}#token=${encodeURIComponent(params.controlUiToken)}`
|
||||
: controlUiUrl;
|
||||
resolvedControlUiUrl && controlUiToken
|
||||
? `${resolvedControlUiUrl.replace(/\/?$/, "/")}#token=${encodeURIComponent(controlUiToken)}`
|
||||
: resolvedControlUiUrl;
|
||||
writeJson(res, 200, {
|
||||
baseUrl: publicBaseUrl,
|
||||
latestReport,
|
||||
controlUiUrl,
|
||||
controlUiUrl: resolvedControlUiUrl,
|
||||
controlUiEmbeddedUrl,
|
||||
kickoffTask: scenarioCatalog.kickoffTask,
|
||||
scenarios: scenarioCatalog.scenarios,
|
||||
|
|
@ -425,6 +480,10 @@ export async function startQaLabServer(params?: {
|
|||
writeJson(res, 200, { report: latestReport });
|
||||
return;
|
||||
}
|
||||
if (req.method === "GET" && url.pathname === "/api/outcomes") {
|
||||
writeJson(res, 200, { run: latestScenarioRun });
|
||||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/api/reset") {
|
||||
state.reset();
|
||||
writeJson(res, 200, { ok: true });
|
||||
|
|
@ -448,11 +507,38 @@ export async function startQaLabServer(params?: {
|
|||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/api/scenario/self-check") {
|
||||
latestScenarioRun = withQaLabRunCounts({
|
||||
kind: "self-check",
|
||||
status: "running",
|
||||
startedAt: new Date().toISOString(),
|
||||
scenarios: [
|
||||
{
|
||||
id: "qa-self-check",
|
||||
name: "Synthetic Slack-class roundtrip",
|
||||
status: "running",
|
||||
},
|
||||
],
|
||||
});
|
||||
const result = await runQaSelfCheckAgainstState({
|
||||
state,
|
||||
cfg: gateway?.cfg ?? createQaLabConfig(listenUrl),
|
||||
outputPath: params?.outputPath,
|
||||
});
|
||||
latestScenarioRun = withQaLabRunCounts({
|
||||
kind: "self-check",
|
||||
status: "completed",
|
||||
startedAt: latestScenarioRun.startedAt,
|
||||
finishedAt: new Date().toISOString(),
|
||||
scenarios: [
|
||||
{
|
||||
id: "qa-self-check",
|
||||
name: result.scenarioResult.name,
|
||||
status: result.scenarioResult.status,
|
||||
details: result.scenarioResult.details,
|
||||
steps: result.scenarioResult.steps,
|
||||
},
|
||||
],
|
||||
});
|
||||
latestReport = {
|
||||
outputPath: result.outputPath,
|
||||
markdown: result.report,
|
||||
|
|
@ -544,12 +630,53 @@ export async function startQaLabServer(params?: {
|
|||
baseUrl: publicBaseUrl,
|
||||
listenUrl,
|
||||
state,
|
||||
setControlUi(next: {
|
||||
controlUiUrl?: string | null;
|
||||
controlUiToken?: string | null;
|
||||
controlUiProxyTarget?: string | null;
|
||||
}) {
|
||||
controlUiUrl = next.controlUiUrl?.trim() || null;
|
||||
controlUiToken = next.controlUiToken?.trim() || null;
|
||||
controlUiProxyTarget = next.controlUiProxyTarget?.trim()
|
||||
? new URL(next.controlUiProxyTarget)
|
||||
: null;
|
||||
},
|
||||
setScenarioRun(next: Omit<QaLabScenarioRun, "counts"> | null) {
|
||||
latestScenarioRun = next ? withQaLabRunCounts(next) : null;
|
||||
},
|
||||
async runSelfCheck() {
|
||||
latestScenarioRun = withQaLabRunCounts({
|
||||
kind: "self-check",
|
||||
status: "running",
|
||||
startedAt: new Date().toISOString(),
|
||||
scenarios: [
|
||||
{
|
||||
id: "qa-self-check",
|
||||
name: "Synthetic Slack-class roundtrip",
|
||||
status: "running",
|
||||
},
|
||||
],
|
||||
});
|
||||
const result = await runQaSelfCheckAgainstState({
|
||||
state,
|
||||
cfg: gateway?.cfg ?? createQaLabConfig(listenUrl),
|
||||
outputPath: params?.outputPath,
|
||||
});
|
||||
latestScenarioRun = withQaLabRunCounts({
|
||||
kind: "self-check",
|
||||
status: "completed",
|
||||
startedAt: latestScenarioRun.startedAt,
|
||||
finishedAt: new Date().toISOString(),
|
||||
scenarios: [
|
||||
{
|
||||
id: "qa-self-check",
|
||||
name: result.scenarioResult.name,
|
||||
status: result.scenarioResult.status,
|
||||
details: result.scenarioResult.details,
|
||||
steps: result.scenarioResult.steps,
|
||||
},
|
||||
],
|
||||
});
|
||||
latestReport = {
|
||||
outputPath: result.outputPath,
|
||||
markdown: result.report,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,34 @@
|
|||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { seedQaAgentWorkspace } from "./qa-agent-workspace.js";
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
|
||||
async function makeTempDir(prefix: string) {
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), prefix));
|
||||
tempDirs.push(dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
afterEach(async () => {
|
||||
await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true })));
|
||||
});
|
||||
|
||||
describe("seedQaAgentWorkspace", () => {
|
||||
it("creates a repo symlink when a repo root is provided", async () => {
|
||||
const workspaceDir = await makeTempDir("qa-workspace-");
|
||||
const repoRoot = await makeTempDir("qa-repo-");
|
||||
await fs.writeFile(path.join(repoRoot, "README.md"), "repo marker\n", "utf8");
|
||||
|
||||
await seedQaAgentWorkspace({ workspaceDir, repoRoot });
|
||||
|
||||
const repoLinkPath = path.join(workspaceDir, "repo");
|
||||
const stat = await fs.lstat(repoLinkPath);
|
||||
expect(stat.isSymbolicLink()).toBe(true);
|
||||
expect(await fs.readFile(path.join(repoLinkPath, "README.md"), "utf8")).toContain(
|
||||
"repo marker",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -34,4 +34,10 @@ The mounted repo source should be available read-only under \`./repo/\`.
|
|||
await fs.writeFile(path.join(params.workspaceDir, name), `${body.trim()}\n`, "utf8");
|
||||
}),
|
||||
);
|
||||
|
||||
if (params.repoRoot) {
|
||||
const repoLinkPath = path.join(params.workspaceDir, "repo");
|
||||
await fs.rm(repoLinkPath, { force: true, recursive: true });
|
||||
await fs.symlink(params.repoRoot, repoLinkPath, "dir");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import { buildQaGatewayConfig } from "./qa-gateway-config.js";
|
||||
|
||||
describe("buildQaGatewayConfig", () => {
|
||||
it("keeps mock-openai as the default provider lane", () => {
|
||||
const cfg = buildQaGatewayConfig({
|
||||
bind: "loopback",
|
||||
gatewayPort: 18789,
|
||||
gatewayToken: "token",
|
||||
providerBaseUrl: "http://127.0.0.1:44080/v1",
|
||||
qaBusBaseUrl: "http://127.0.0.1:43124",
|
||||
workspaceDir: "/tmp/qa-workspace",
|
||||
});
|
||||
|
||||
expect(cfg.agents?.defaults?.model?.primary).toBe("mock-openai/gpt-5.4");
|
||||
expect(cfg.models?.providers?.["mock-openai"]?.baseUrl).toBe("http://127.0.0.1:44080/v1");
|
||||
expect(cfg.plugins?.allow).toEqual(["memory-core", "qa-channel"]);
|
||||
expect(cfg.plugins?.entries?.["memory-core"]).toEqual({ enabled: true });
|
||||
expect(cfg.plugins?.entries?.openai).toBeUndefined();
|
||||
});
|
||||
|
||||
it("uses built-in OpenAI provider wiring in live mode", () => {
|
||||
const cfg = buildQaGatewayConfig({
|
||||
bind: "loopback",
|
||||
gatewayPort: 18789,
|
||||
gatewayToken: "token",
|
||||
qaBusBaseUrl: "http://127.0.0.1:43124",
|
||||
workspaceDir: "/tmp/qa-workspace",
|
||||
providerMode: "live-openai",
|
||||
fastMode: true,
|
||||
primaryModel: "openai/gpt-5.4",
|
||||
alternateModel: "openai/gpt-5.4",
|
||||
});
|
||||
|
||||
expect(cfg.agents?.defaults?.model?.primary).toBe("openai/gpt-5.4");
|
||||
expect(cfg.agents?.list?.[0]?.model?.primary).toBe("openai/gpt-5.4");
|
||||
expect(cfg.models).toBeUndefined();
|
||||
expect(cfg.plugins?.allow).toEqual(["memory-core", "openai", "qa-channel"]);
|
||||
expect(cfg.plugins?.entries?.openai).toEqual({ enabled: true });
|
||||
expect(cfg.agents?.defaults?.models?.["openai/gpt-5.4"]).toEqual({
|
||||
params: { transport: "sse", openaiWsWarmup: false, fastMode: true },
|
||||
});
|
||||
});
|
||||
|
||||
it("can disable control ui for suite-only gateway children", () => {
|
||||
const cfg = buildQaGatewayConfig({
|
||||
bind: "loopback",
|
||||
gatewayPort: 18789,
|
||||
gatewayToken: "token",
|
||||
qaBusBaseUrl: "http://127.0.0.1:43124",
|
||||
workspaceDir: "/tmp/qa-workspace",
|
||||
controlUiEnabled: false,
|
||||
});
|
||||
|
||||
expect(cfg.gateway?.controlUi?.enabled).toBe(false);
|
||||
expect(cfg.gateway?.controlUi).not.toHaveProperty("allowInsecureAuth");
|
||||
expect(cfg.gateway?.controlUi).not.toHaveProperty("allowedOrigins");
|
||||
});
|
||||
});
|
||||
|
|
@ -26,12 +26,39 @@ export function buildQaGatewayConfig(params: {
|
|||
bind: "loopback" | "lan";
|
||||
gatewayPort: number;
|
||||
gatewayToken: string;
|
||||
providerBaseUrl: string;
|
||||
providerBaseUrl?: string;
|
||||
qaBusBaseUrl: string;
|
||||
workspaceDir: string;
|
||||
controlUiRoot?: string;
|
||||
controlUiAllowedOrigins?: string[];
|
||||
controlUiEnabled?: boolean;
|
||||
providerMode?: "mock-openai" | "live-openai";
|
||||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
}): OpenClawConfig {
|
||||
const providerMode = params.providerMode ?? "mock-openai";
|
||||
const allowedPlugins =
|
||||
providerMode === "live-openai"
|
||||
? ["memory-core", "openai", "qa-channel"]
|
||||
: ["memory-core", "qa-channel"];
|
||||
const primaryModel =
|
||||
params.primaryModel ??
|
||||
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4");
|
||||
const alternateModel =
|
||||
params.alternateModel ??
|
||||
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt");
|
||||
const liveModelParams =
|
||||
providerMode === "live-openai"
|
||||
? {
|
||||
transport: "sse",
|
||||
openaiWsWarmup: false,
|
||||
...(params.fastMode ? { fastMode: true } : {}),
|
||||
}
|
||||
: {
|
||||
transport: "sse",
|
||||
openaiWsWarmup: false,
|
||||
};
|
||||
const allowedOrigins =
|
||||
params.controlUiAllowedOrigins && params.controlUiAllowedOrigins.length > 0
|
||||
? params.controlUiAllowedOrigins
|
||||
|
|
@ -44,30 +71,35 @@ export function buildQaGatewayConfig(params: {
|
|||
|
||||
return {
|
||||
plugins: {
|
||||
allow: allowedPlugins,
|
||||
entries: {
|
||||
acpx: {
|
||||
enabled: false,
|
||||
},
|
||||
"memory-core": {
|
||||
enabled: true,
|
||||
},
|
||||
...(providerMode === "live-openai"
|
||||
? {
|
||||
openai: {
|
||||
enabled: true,
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
},
|
||||
},
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: params.workspaceDir,
|
||||
model: {
|
||||
primary: "mock-openai/gpt-5.4",
|
||||
primary: primaryModel,
|
||||
},
|
||||
models: {
|
||||
"mock-openai/gpt-5.4": {
|
||||
params: {
|
||||
transport: "sse",
|
||||
openaiWsWarmup: false,
|
||||
},
|
||||
[primaryModel]: {
|
||||
params: liveModelParams,
|
||||
},
|
||||
"mock-openai/gpt-5.4-alt": {
|
||||
params: {
|
||||
transport: "sse",
|
||||
openaiWsWarmup: false,
|
||||
},
|
||||
[alternateModel]: {
|
||||
params: liveModelParams,
|
||||
},
|
||||
},
|
||||
subagents: {
|
||||
|
|
@ -80,7 +112,7 @@ export function buildQaGatewayConfig(params: {
|
|||
id: "qa",
|
||||
default: true,
|
||||
model: {
|
||||
primary: "mock-openai/gpt-5.4",
|
||||
primary: primaryModel,
|
||||
},
|
||||
identity: {
|
||||
name: "C-3PO QA",
|
||||
|
|
@ -94,48 +126,52 @@ export function buildQaGatewayConfig(params: {
|
|||
},
|
||||
],
|
||||
},
|
||||
models: {
|
||||
mode: "replace",
|
||||
providers: {
|
||||
"mock-openai": {
|
||||
baseUrl: params.providerBaseUrl,
|
||||
apiKey: "test",
|
||||
api: "openai-responses",
|
||||
models: [
|
||||
{
|
||||
id: "gpt-5.4",
|
||||
name: "gpt-5.4",
|
||||
api: "openai-responses",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
...(providerMode === "mock-openai"
|
||||
? {
|
||||
models: {
|
||||
mode: "replace",
|
||||
providers: {
|
||||
"mock-openai": {
|
||||
baseUrl: params.providerBaseUrl,
|
||||
apiKey: "test",
|
||||
api: "openai-responses",
|
||||
models: [
|
||||
{
|
||||
id: "gpt-5.4",
|
||||
name: "gpt-5.4",
|
||||
api: "openai-responses",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128_000,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
{
|
||||
id: "gpt-5.4-alt",
|
||||
name: "gpt-5.4-alt",
|
||||
api: "openai-responses",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128_000,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
],
|
||||
},
|
||||
contextWindow: 128_000,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
{
|
||||
id: "gpt-5.4-alt",
|
||||
name: "gpt-5.4-alt",
|
||||
api: "openai-responses",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128_000,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
gateway: {
|
||||
mode: "local",
|
||||
bind: params.bind,
|
||||
|
|
@ -145,10 +181,16 @@ export function buildQaGatewayConfig(params: {
|
|||
token: params.gatewayToken,
|
||||
},
|
||||
controlUi: {
|
||||
enabled: true,
|
||||
...(params.controlUiRoot ? { root: params.controlUiRoot } : {}),
|
||||
allowInsecureAuth: true,
|
||||
allowedOrigins,
|
||||
enabled: params.controlUiEnabled ?? true,
|
||||
...((params.controlUiEnabled ?? true) && params.controlUiRoot
|
||||
? { root: params.controlUiRoot }
|
||||
: {}),
|
||||
...((params.controlUiEnabled ?? true)
|
||||
? {
|
||||
allowInsecureAuth: true,
|
||||
allowedOrigins,
|
||||
}
|
||||
: {}),
|
||||
},
|
||||
},
|
||||
discovery: {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import type { QaBusState } from "./bus-state.js";
|
|||
import { extractQaToolPayload } from "./extract-tool-payload.js";
|
||||
import { startQaGatewayChild } from "./gateway-child.js";
|
||||
import { startQaLabServer } from "./lab-server.js";
|
||||
import type { QaLabScenarioOutcome } from "./lab-server.js";
|
||||
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
|
||||
import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js";
|
||||
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
|
||||
|
|
@ -26,17 +27,56 @@ type QaSuiteScenarioResult = {
|
|||
|
||||
type QaSuiteEnvironment = {
|
||||
lab: Awaited<ReturnType<typeof startQaLabServer>>;
|
||||
mock: Awaited<ReturnType<typeof startQaMockOpenAiServer>>;
|
||||
mock: Awaited<ReturnType<typeof startQaMockOpenAiServer>> | null;
|
||||
gateway: Awaited<ReturnType<typeof startQaGatewayChild>>;
|
||||
cfg: OpenClawConfig;
|
||||
providerMode: "mock-openai" | "live-openai";
|
||||
primaryModel: string;
|
||||
alternateModel: string;
|
||||
};
|
||||
|
||||
function splitModelRef(ref: string) {
|
||||
const slash = ref.indexOf("/");
|
||||
if (slash <= 0 || slash === ref.length - 1) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
provider: ref.slice(0, slash),
|
||||
model: ref.slice(slash + 1),
|
||||
};
|
||||
}
|
||||
|
||||
function liveTurnTimeoutMs(env: QaSuiteEnvironment, fallbackMs: number) {
|
||||
return env.providerMode === "live-openai" ? Math.max(fallbackMs, 120_000) : fallbackMs;
|
||||
}
|
||||
|
||||
function hasDiscoveryLabels(text: string) {
|
||||
const lower = text.toLowerCase();
|
||||
return (
|
||||
lower.includes("worked") &&
|
||||
lower.includes("failed") &&
|
||||
lower.includes("blocked") &&
|
||||
(lower.includes("follow-up") || lower.includes("follow up"))
|
||||
);
|
||||
}
|
||||
|
||||
function reportsMissingDiscoveryFiles(text: string) {
|
||||
const lower = text.toLowerCase();
|
||||
return (
|
||||
lower.includes("not present") ||
|
||||
lower.includes("missing files") ||
|
||||
lower.includes("blocked by missing") ||
|
||||
lower.includes("could not inspect")
|
||||
);
|
||||
}
|
||||
|
||||
export type QaSuiteResult = {
|
||||
outputDir: string;
|
||||
reportPath: string;
|
||||
summaryPath: string;
|
||||
report: string;
|
||||
scenarios: QaSuiteScenarioResult[];
|
||||
watchUrl: string;
|
||||
};
|
||||
|
||||
function createQaActionConfig(baseUrl: string): OpenClawConfig {
|
||||
|
|
@ -245,6 +285,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
const message = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-room" && !candidate.threadId,
|
||||
env.providerMode === "live-openai" ? 45_000 : 15_000,
|
||||
);
|
||||
return message.text;
|
||||
},
|
||||
|
|
@ -260,6 +301,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
run: async () => {
|
||||
await reset();
|
||||
const at = new Date(Date.now() + 60_000).toISOString();
|
||||
const cronMarker = `QA-CRON-${randomUUID().slice(0, 8)}`;
|
||||
const response = (await env.gateway.call("cron.add", {
|
||||
name: `qa-suite-${randomUUID()}`,
|
||||
enabled: true,
|
||||
|
|
@ -268,8 +310,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
wakeMode: "next-heartbeat",
|
||||
payload: {
|
||||
kind: "agentTurn",
|
||||
message:
|
||||
"A QA cron just fired. Send a one-line ping back to the room so the operator can verify delivery.",
|
||||
message: `A QA cron just fired. Send a one-line ping back to the room containing this exact marker: ${cronMarker}`,
|
||||
},
|
||||
delivery: {
|
||||
mode: "announce",
|
||||
|
|
@ -284,6 +325,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
}
|
||||
(globalThis as typeof globalThis & { __qaCronJobId?: string }).__qaCronJobId =
|
||||
response.id;
|
||||
(globalThis as typeof globalThis & { __qaCronMarker?: string }).__qaCronMarker =
|
||||
cronMarker;
|
||||
return scheduledAt;
|
||||
},
|
||||
},
|
||||
|
|
@ -292,9 +335,14 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
run: async () => {
|
||||
const jobId = (globalThis as typeof globalThis & { __qaCronJobId?: string })
|
||||
.__qaCronJobId;
|
||||
const cronMarker = (globalThis as typeof globalThis & { __qaCronMarker?: string })
|
||||
.__qaCronMarker;
|
||||
if (!jobId) {
|
||||
throw new Error("missing cron job id");
|
||||
}
|
||||
if (!cronMarker) {
|
||||
throw new Error("missing cron marker");
|
||||
}
|
||||
await env.gateway.call(
|
||||
"cron.run",
|
||||
{ id: jobId, mode: "force" },
|
||||
|
|
@ -302,8 +350,9 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
);
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-room",
|
||||
30_000,
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-room" && candidate.text.includes(cronMarker),
|
||||
liveTurnTimeoutMs(env, 30_000),
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
|
|
@ -345,6 +394,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
sessionKey: "agent:qa:lobster-invaders",
|
||||
message:
|
||||
"Read the QA kickoff context first, then build a tiny Lobster Invaders HTML game in this workspace and tell me where it is.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
await waitForOutboundMessage(
|
||||
state,
|
||||
|
|
@ -355,11 +405,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
if (!artifact.includes("Lobster Invaders")) {
|
||||
throw new Error("missing Lobster Invaders artifact");
|
||||
}
|
||||
const requests = await fetchJson<Array<{ prompt?: string; toolOutput?: string }>>(
|
||||
`${env.mock.baseUrl}/debug/requests`,
|
||||
);
|
||||
if (!requests.some((request) => (request.toolOutput ?? "").includes("QA mission"))) {
|
||||
throw new Error("expected pre-write read evidence");
|
||||
if (env.mock) {
|
||||
const requests = await fetchJson<Array<{ prompt?: string; toolOutput?: string }>>(
|
||||
`${env.mock.baseUrl}/debug/requests`,
|
||||
);
|
||||
if (
|
||||
!requests.some((request) => (request.toolOutput ?? "").includes("QA mission"))
|
||||
) {
|
||||
throw new Error("expected pre-write read evidence");
|
||||
}
|
||||
}
|
||||
return "lobster-invaders.html";
|
||||
},
|
||||
|
|
@ -421,25 +475,31 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:model-switch",
|
||||
message: "Say hello from the default configured model.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
await waitForOutboundMessage(
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-operator",
|
||||
);
|
||||
const request = await fetchJson<{ body?: { model?: string } }>(
|
||||
`${env.mock.baseUrl}/debug/last-request`,
|
||||
);
|
||||
return String(request.body?.model ?? "");
|
||||
if (env.mock) {
|
||||
const request = await fetchJson<{ body?: { model?: string } }>(
|
||||
`${env.mock.baseUrl}/debug/last-request`,
|
||||
);
|
||||
return String(request.body?.model ?? "");
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "switches to the alternate model and continues",
|
||||
run: async () => {
|
||||
const alternate = splitModelRef(env.alternateModel);
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:model-switch",
|
||||
message: "Continue the exchange after switching models and note the handoff.",
|
||||
provider: "mock-openai",
|
||||
model: "gpt-5.4-alt",
|
||||
provider: alternate?.provider,
|
||||
model: alternate?.model,
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
const outbound = await waitForCondition(
|
||||
() =>
|
||||
|
|
@ -449,16 +509,19 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.toLowerCase().includes("switch"),
|
||||
(candidate.text.toLowerCase().includes("switch") ||
|
||||
candidate.text.toLowerCase().includes("handoff")),
|
||||
)
|
||||
.at(-1),
|
||||
20_000,
|
||||
liveTurnTimeoutMs(env, 20_000),
|
||||
);
|
||||
const request = await fetchJson<{ body?: { model?: string } }>(
|
||||
`${env.mock.baseUrl}/debug/last-request`,
|
||||
);
|
||||
if (request.body?.model !== "gpt-5.4-alt") {
|
||||
throw new Error(`expected gpt-5.4-alt, got ${String(request.body?.model ?? "")}`);
|
||||
if (env.mock) {
|
||||
const request = await fetchJson<{ body?: { model?: string } }>(
|
||||
`${env.mock.baseUrl}/debug/last-request`,
|
||||
);
|
||||
if (request.body?.model !== "gpt-5.4-alt") {
|
||||
throw new Error(`expected gpt-5.4-alt, got ${String(request.body?.model ?? "")}`);
|
||||
}
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
|
|
@ -516,7 +579,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:discovery",
|
||||
message:
|
||||
"Read the seeded docs and source plan, then report grouped into Worked, Failed, Blocked, and Follow-up.",
|
||||
"Read the seeded docs and source plan. The full repo is mounted under ./repo/. Explicitly inspect repo/qa/seed-scenarios.json, repo/qa/QA_KICKOFF_TASK.md, repo/extensions/qa-lab/src/suite.ts, and repo/docs/help/testing.md, then report grouped into Worked, Failed, Blocked, and Follow-up. Mention at least two extra QA scenarios beyond the seed list.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
const outbound = await waitForCondition(
|
||||
() =>
|
||||
|
|
@ -526,11 +590,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.includes("Worked:"),
|
||||
hasDiscoveryLabels(candidate.text),
|
||||
)
|
||||
.at(-1),
|
||||
20_000,
|
||||
liveTurnTimeoutMs(env, 20_000),
|
||||
env.providerMode === "live-openai" ? 250 : 100,
|
||||
);
|
||||
if (reportsMissingDiscoveryFiles(outbound.text)) {
|
||||
throw new Error(`discovery report still missed repo files: ${outbound.text}`);
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
|
|
@ -547,8 +615,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:subagent",
|
||||
message:
|
||||
"Delegate a bounded QA task to a subagent, then summarize the delegated result clearly.",
|
||||
timeoutMs: 45_000,
|
||||
"Delegate one bounded QA task to a subagent. Wait for the subagent to finish. Then reply with three labeled sections exactly once: Delegated task, Result, Evidence. Include the child result itself, not 'waiting'.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 90_000),
|
||||
});
|
||||
const outbound = await waitForCondition(
|
||||
() =>
|
||||
|
|
@ -558,29 +626,22 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.toLowerCase().includes("delegated"),
|
||||
candidate.text.toLowerCase().includes("delegated task") &&
|
||||
candidate.text.toLowerCase().includes("result") &&
|
||||
candidate.text.toLowerCase().includes("evidence") &&
|
||||
!candidate.text.toLowerCase().includes("waiting"),
|
||||
)
|
||||
.at(-1),
|
||||
45_000,
|
||||
liveTurnTimeoutMs(env, 45_000),
|
||||
env.providerMode === "live-openai" ? 250 : 100,
|
||||
);
|
||||
const sessions = await waitForCondition(
|
||||
async () => {
|
||||
const listed = (await env.gateway.call("sessions.list", {
|
||||
spawnedBy: "agent:qa:subagent",
|
||||
})) as {
|
||||
sessions?: Array<{
|
||||
key?: string;
|
||||
parentSessionKey?: string;
|
||||
spawnedBy?: string;
|
||||
}>;
|
||||
};
|
||||
return (listed.sessions ?? []).length > 0 ? listed : null;
|
||||
},
|
||||
20_000,
|
||||
250,
|
||||
);
|
||||
if ((sessions.sessions ?? []).length === 0) {
|
||||
throw new Error("expected spawned child session");
|
||||
const lower = outbound.text.toLowerCase();
|
||||
if (
|
||||
lower.includes("failed to delegate") ||
|
||||
lower.includes("could not delegate") ||
|
||||
lower.includes("subagent unavailable")
|
||||
) {
|
||||
throw new Error(`subagent handoff reported failure: ${outbound.text}`);
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
|
|
@ -611,7 +672,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: "@openclaw continue this work inside the thread",
|
||||
text: "@openclaw reply in one short sentence inside this thread only. Do not use ACP or any external runtime. Confirm you stayed in-thread.",
|
||||
threadId,
|
||||
threadTitle: "QA deep dive",
|
||||
});
|
||||
|
|
@ -619,6 +680,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
state,
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-room" && candidate.threadId === threadId,
|
||||
env.providerMode === "live-openai" ? 45_000 : 15_000,
|
||||
);
|
||||
const leaked = state
|
||||
.getSnapshot()
|
||||
|
|
@ -631,6 +693,14 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
if (leaked) {
|
||||
throw new Error("thread reply leaked into root channel");
|
||||
}
|
||||
const lower = outbound.text.toLowerCase();
|
||||
if (
|
||||
lower.includes("acp backend") ||
|
||||
lower.includes("acpx") ||
|
||||
lower.includes("not configured")
|
||||
) {
|
||||
throw new Error(`thread reply fell back to ACP error: ${outbound.text}`);
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
|
|
@ -639,8 +709,22 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
]);
|
||||
}
|
||||
|
||||
export async function runQaSuite(params?: { outputDir?: string }) {
|
||||
export async function runQaSuite(params?: {
|
||||
outputDir?: string;
|
||||
providerMode?: "mock-openai" | "live-openai";
|
||||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
}) {
|
||||
const startedAt = new Date();
|
||||
const providerMode = params?.providerMode ?? "mock-openai";
|
||||
const fastMode = params?.fastMode ?? providerMode === "live-openai";
|
||||
const primaryModel =
|
||||
params?.primaryModel ??
|
||||
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4");
|
||||
const alternateModel =
|
||||
params?.alternateModel ??
|
||||
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt");
|
||||
const outputDir =
|
||||
params?.outputDir ??
|
||||
path.join(process.cwd(), ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`);
|
||||
|
|
@ -651,42 +735,120 @@ export async function runQaSuite(params?: { outputDir?: string }) {
|
|||
port: 0,
|
||||
embeddedGateway: "disabled",
|
||||
});
|
||||
const mock = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
const mock =
|
||||
providerMode === "mock-openai"
|
||||
? await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
})
|
||||
: null;
|
||||
const gateway = await startQaGatewayChild({
|
||||
repoRoot: process.cwd(),
|
||||
providerBaseUrl: `${mock.baseUrl}/v1`,
|
||||
providerBaseUrl: mock ? `${mock.baseUrl}/v1` : undefined,
|
||||
qaBusBaseUrl: lab.listenUrl,
|
||||
providerMode,
|
||||
primaryModel,
|
||||
alternateModel,
|
||||
fastMode,
|
||||
controlUiEnabled: true,
|
||||
});
|
||||
lab.setControlUi({
|
||||
controlUiProxyTarget: gateway.baseUrl,
|
||||
controlUiToken: gateway.token,
|
||||
});
|
||||
const env: QaSuiteEnvironment = {
|
||||
lab,
|
||||
mock,
|
||||
gateway,
|
||||
cfg: createQaActionConfig(lab.listenUrl),
|
||||
providerMode,
|
||||
primaryModel,
|
||||
alternateModel,
|
||||
};
|
||||
|
||||
try {
|
||||
const catalog = readQaBootstrapScenarioCatalog();
|
||||
const scenarioMap = buildScenarioMap(env);
|
||||
const scenarios: QaSuiteScenarioResult[] = [];
|
||||
const liveScenarioOutcomes: QaLabScenarioOutcome[] = catalog.scenarios.map((scenario) => ({
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: "pending",
|
||||
}));
|
||||
|
||||
for (const scenario of catalog.scenarios) {
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
status: "running",
|
||||
startedAt: startedAt.toISOString(),
|
||||
scenarios: liveScenarioOutcomes,
|
||||
});
|
||||
|
||||
for (const [index, scenario] of catalog.scenarios.entries()) {
|
||||
const run = scenarioMap.get(scenario.id);
|
||||
if (!run) {
|
||||
scenarios.push({
|
||||
const missingResult = {
|
||||
name: scenario.title,
|
||||
status: "fail",
|
||||
details: `no executable scenario registered for ${scenario.id}`,
|
||||
steps: [],
|
||||
} satisfies QaSuiteScenarioResult;
|
||||
scenarios.push(missingResult);
|
||||
liveScenarioOutcomes[index] = {
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: "fail",
|
||||
details: missingResult.details,
|
||||
steps: [],
|
||||
finishedAt: new Date().toISOString(),
|
||||
};
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
status: "running",
|
||||
startedAt: startedAt.toISOString(),
|
||||
scenarios: [...liveScenarioOutcomes],
|
||||
});
|
||||
continue;
|
||||
}
|
||||
scenarios.push(await run());
|
||||
liveScenarioOutcomes[index] = {
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: "running",
|
||||
startedAt: new Date().toISOString(),
|
||||
};
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
status: "running",
|
||||
startedAt: startedAt.toISOString(),
|
||||
scenarios: [...liveScenarioOutcomes],
|
||||
});
|
||||
|
||||
const result = await run();
|
||||
scenarios.push(result);
|
||||
liveScenarioOutcomes[index] = {
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: result.status,
|
||||
details: result.details,
|
||||
steps: result.steps,
|
||||
startedAt: liveScenarioOutcomes[index]?.startedAt,
|
||||
finishedAt: new Date().toISOString(),
|
||||
};
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
status: "running",
|
||||
startedAt: startedAt.toISOString(),
|
||||
scenarios: [...liveScenarioOutcomes],
|
||||
});
|
||||
}
|
||||
|
||||
const finishedAt = new Date();
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
status: "completed",
|
||||
startedAt: startedAt.toISOString(),
|
||||
finishedAt: finishedAt.toISOString(),
|
||||
scenarios: [...liveScenarioOutcomes],
|
||||
});
|
||||
const report = renderQaMarkdownReport({
|
||||
title: "OpenClaw QA Scenario Suite",
|
||||
startedAt,
|
||||
|
|
@ -699,7 +861,9 @@ export async function runQaSuite(params?: { outputDir?: string }) {
|
|||
steps: scenario.steps,
|
||||
})) satisfies QaReportScenario[],
|
||||
notes: [
|
||||
"Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider.",
|
||||
providerMode === "mock-openai"
|
||||
? "Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider."
|
||||
: `Runs against qa-channel + qa-lab bus + real gateway child + live OpenAI models (${primaryModel}, ${alternateModel})${fastMode ? " with fast mode enabled" : ""}.`,
|
||||
"Cron uses a one-minute schedule assertion plus forced execution for fast verification.",
|
||||
],
|
||||
});
|
||||
|
|
@ -729,10 +893,14 @@ export async function runQaSuite(params?: { outputDir?: string }) {
|
|||
summaryPath,
|
||||
report,
|
||||
scenarios,
|
||||
watchUrl: lab.baseUrl,
|
||||
} satisfies QaSuiteResult;
|
||||
} finally {
|
||||
await gateway.stop();
|
||||
await mock.stop();
|
||||
const keepTemp = process.env.OPENCLAW_QA_KEEP_TEMP === "1" || false;
|
||||
await gateway.stop({
|
||||
keepTemp,
|
||||
});
|
||||
await mock?.stop();
|
||||
await lab.stop();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue