mirror of https://github.com/openclaw/openclaw.git
fix(qa): harden new scenario suite
This commit is contained in:
parent
80c5df6bdc
commit
979409eab5
|
|
@ -24,6 +24,53 @@ async function getFreePort() {
|
|||
});
|
||||
}
|
||||
|
||||
function buildQaRuntimeEnv(params: {
|
||||
configPath: string;
|
||||
gatewayToken: string;
|
||||
homeDir: string;
|
||||
stateDir: string;
|
||||
xdgConfigHome: string;
|
||||
xdgDataHome: string;
|
||||
xdgCacheHome: string;
|
||||
providerMode?: "mock-openai" | "live-openai";
|
||||
}) {
|
||||
const env: NodeJS.ProcessEnv = {
|
||||
...process.env,
|
||||
HOME: params.homeDir,
|
||||
OPENCLAW_HOME: params.homeDir,
|
||||
OPENCLAW_CONFIG_PATH: params.configPath,
|
||||
OPENCLAW_STATE_DIR: params.stateDir,
|
||||
OPENCLAW_OAUTH_DIR: path.join(params.stateDir, "credentials"),
|
||||
OPENCLAW_GATEWAY_TOKEN: params.gatewayToken,
|
||||
OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1",
|
||||
OPENCLAW_SKIP_GMAIL_WATCHER: "1",
|
||||
OPENCLAW_SKIP_CANVAS_HOST: "1",
|
||||
OPENCLAW_NO_RESPAWN: "1",
|
||||
OPENCLAW_TEST_FAST: "1",
|
||||
XDG_CONFIG_HOME: params.xdgConfigHome,
|
||||
XDG_DATA_HOME: params.xdgDataHome,
|
||||
XDG_CACHE_HOME: params.xdgCacheHome,
|
||||
};
|
||||
if (params.providerMode === "mock-openai") {
|
||||
for (const key of [
|
||||
"OPENAI_API_KEY",
|
||||
"OPENAI_BASE_URL",
|
||||
"GEMINI_API_KEY",
|
||||
"GOOGLE_API_KEY",
|
||||
"VOYAGE_API_KEY",
|
||||
"MISTRAL_API_KEY",
|
||||
"AWS_ACCESS_KEY_ID",
|
||||
"AWS_SECRET_ACCESS_KEY",
|
||||
"AWS_SESSION_TOKEN",
|
||||
"AWS_REGION",
|
||||
"AWS_BEARER_TOKEN_BEDROCK",
|
||||
]) {
|
||||
delete env[key];
|
||||
}
|
||||
}
|
||||
return env;
|
||||
}
|
||||
|
||||
async function waitForGatewayReady(baseUrl: string, logs: () => string, timeoutMs = 30_000) {
|
||||
const startedAt = Date.now();
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
|
|
@ -116,23 +163,16 @@ export async function startQaGatewayChild(params: {
|
|||
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
const env = {
|
||||
...process.env,
|
||||
HOME: homeDir,
|
||||
OPENCLAW_HOME: homeDir,
|
||||
OPENCLAW_CONFIG_PATH: configPath,
|
||||
OPENCLAW_STATE_DIR: stateDir,
|
||||
OPENCLAW_OAUTH_DIR: path.join(stateDir, "credentials"),
|
||||
OPENCLAW_GATEWAY_TOKEN: gatewayToken,
|
||||
OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1",
|
||||
OPENCLAW_SKIP_GMAIL_WATCHER: "1",
|
||||
OPENCLAW_SKIP_CANVAS_HOST: "1",
|
||||
OPENCLAW_NO_RESPAWN: "1",
|
||||
OPENCLAW_TEST_FAST: "1",
|
||||
XDG_CONFIG_HOME: xdgConfigHome,
|
||||
XDG_DATA_HOME: xdgDataHome,
|
||||
XDG_CACHE_HOME: xdgCacheHome,
|
||||
};
|
||||
const env = buildQaRuntimeEnv({
|
||||
configPath,
|
||||
gatewayToken,
|
||||
homeDir,
|
||||
stateDir,
|
||||
xdgConfigHome,
|
||||
xdgDataHome,
|
||||
xdgCacheHome,
|
||||
providerMode: params.providerMode,
|
||||
});
|
||||
|
||||
const child = spawn(
|
||||
process.execPath,
|
||||
|
|
@ -176,6 +216,7 @@ export async function startQaGatewayChild(params: {
|
|||
workspaceDir,
|
||||
tempRoot,
|
||||
configPath,
|
||||
runtimeEnv: env,
|
||||
logs,
|
||||
async call(
|
||||
method: string,
|
||||
|
|
|
|||
|
|
@ -82,6 +82,8 @@ describe("qa mock openai server", () => {
|
|||
expect(debugResponse.status).toBe(200);
|
||||
expect(await debugResponse.json()).toMatchObject({
|
||||
prompt: 'Please inspect "message_id" metadata first, then read `./QA_KICKOFF_TASK.md`.',
|
||||
allInputText: 'Please inspect "message_id" metadata first, then read `./QA_KICKOFF_TASK.md`.',
|
||||
plannedToolName: "read",
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -200,4 +202,202 @@ describe("qa mock openai server", () => {
|
|||
expect(body).toContain('\\"label\\":\\"qa-sidecar\\"');
|
||||
expect(body).toContain('\\"thread\\":false');
|
||||
});
|
||||
|
||||
it("plans memory tools and serves mock image generations", async () => {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await server.stop();
|
||||
});
|
||||
|
||||
const memorySearch = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: true,
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "Memory tools check: what is the hidden project codename stored only in memory? Use memory tools first.",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(memorySearch.status).toBe(200);
|
||||
expect(await memorySearch.text()).toContain('"name":"memory_search"');
|
||||
|
||||
const image = await fetch(`${server.baseUrl}/v1/images/generations`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "gpt-image-1",
|
||||
prompt: "Draw a QA lighthouse",
|
||||
n: 1,
|
||||
size: "1024x1024",
|
||||
}),
|
||||
});
|
||||
expect(image.status).toBe(200);
|
||||
expect(await image.json()).toMatchObject({
|
||||
data: [{ b64_json: expect.any(String) }],
|
||||
});
|
||||
});
|
||||
|
||||
it("returns exact markers for visible and hot-installed skills", async () => {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await server.stop();
|
||||
});
|
||||
|
||||
const visible = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: false,
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "Visible skill marker: give me the visible skill marker exactly.",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(visible.status).toBe(200);
|
||||
expect(await visible.json()).toMatchObject({
|
||||
output: [
|
||||
{
|
||||
content: [{ text: "VISIBLE-SKILL-OK" }],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const hot = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: false,
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "Hot install marker: give me the hot install marker exactly.",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(hot.status).toBe(200);
|
||||
expect(await hot.json()).toMatchObject({
|
||||
output: [
|
||||
{
|
||||
content: [{ text: "HOT-INSTALL-OK" }],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores stale tool output from prior turns when planning the current turn", async () => {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await server.stop();
|
||||
});
|
||||
|
||||
const response = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: true,
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "Read QA_KICKOFF_TASK.md first." }],
|
||||
},
|
||||
{
|
||||
type: "function_call_output",
|
||||
output: "QA mission: read source and docs first.",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "Switch models now. Tool continuity check: reread QA_KICKOFF_TASK.md and mention the handoff in one short sentence.",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(response.status).toBe(200);
|
||||
expect(await response.text()).toContain('"name":"read"');
|
||||
});
|
||||
|
||||
it("returns NO_REPLY for unmentioned group chatter", async () => {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await server.stop();
|
||||
});
|
||||
|
||||
const response = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: false,
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: 'Conversation info (untrusted metadata): {"is_group_chat": true}\n\nhello team, no bot ping here',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(response.status).toBe(200);
|
||||
expect(await response.json()).toMatchObject({
|
||||
output: [
|
||||
{
|
||||
content: [{ text: "NO_REPLY" }],
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -24,10 +24,15 @@ type MockOpenAiRequestSnapshot = {
|
|||
raw: string;
|
||||
body: Record<string, unknown>;
|
||||
prompt: string;
|
||||
allInputText: string;
|
||||
toolOutput: string;
|
||||
model: string;
|
||||
plannedToolName?: string;
|
||||
};
|
||||
|
||||
const TINY_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO7Z0nQAAAAASUVORK5CYII=";
|
||||
|
||||
function readBody(req: IncomingMessage): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks: Buffer[] = [];
|
||||
|
|
@ -82,8 +87,19 @@ function extractLastUserText(input: ResponsesInputItem[]) {
|
|||
return "";
|
||||
}
|
||||
|
||||
function extractToolOutput(input: ResponsesInputItem[]) {
|
||||
function findLastUserIndex(input: ResponsesInputItem[]) {
|
||||
for (let index = input.length - 1; index >= 0; index -= 1) {
|
||||
const item = input[index];
|
||||
if (item.role === "user" && Array.isArray(item.content)) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function extractToolOutput(input: ResponsesInputItem[]) {
|
||||
const lastUserIndex = findLastUserIndex(input);
|
||||
for (let index = input.length - 1; index > lastUserIndex; index -= 1) {
|
||||
const item = input[index];
|
||||
if (item.type === "function_call_output" && typeof item.output === "string" && item.output) {
|
||||
return item.output;
|
||||
|
|
@ -116,6 +132,44 @@ function extractAllUserTexts(input: ResponsesInputItem[]) {
|
|||
return texts;
|
||||
}
|
||||
|
||||
function extractAllInputTexts(input: ResponsesInputItem[]) {
|
||||
const texts: string[] = [];
|
||||
for (const item of input) {
|
||||
if (typeof item.output === "string" && item.output.trim()) {
|
||||
texts.push(item.output.trim());
|
||||
}
|
||||
if (!Array.isArray(item.content)) {
|
||||
continue;
|
||||
}
|
||||
const text = item.content
|
||||
.filter(
|
||||
(entry): entry is { type: "input_text"; text: string } =>
|
||||
!!entry &&
|
||||
typeof entry === "object" &&
|
||||
(entry as { type?: unknown }).type === "input_text" &&
|
||||
typeof (entry as { text?: unknown }).text === "string",
|
||||
)
|
||||
.map((entry) => entry.text)
|
||||
.join("\n")
|
||||
.trim();
|
||||
if (text) {
|
||||
texts.push(text);
|
||||
}
|
||||
}
|
||||
return texts.join("\n");
|
||||
}
|
||||
|
||||
function parseToolOutputJson(toolOutput: string): Record<string, unknown> | null {
|
||||
if (!toolOutput.trim()) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return JSON.parse(toolOutput) as Record<string, unknown>;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function normalizePromptPathCandidate(candidate: string) {
|
||||
const trimmed = candidate.trim().replace(/^`+|`+$/g, "");
|
||||
if (!trimmed) {
|
||||
|
|
@ -221,12 +275,26 @@ function extractRememberedFact(userTexts: string[]) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function extractOrbitCode(text: string) {
|
||||
return /\b(?:ORBIT-9|orbit-9)\b/.exec(text)?.[0]?.toUpperCase() ?? null;
|
||||
}
|
||||
|
||||
function buildAssistantText(input: ResponsesInputItem[], body: Record<string, unknown>) {
|
||||
const prompt = extractLastUserText(input);
|
||||
const toolOutput = extractToolOutput(input);
|
||||
const toolJson = parseToolOutputJson(toolOutput);
|
||||
const userTexts = extractAllUserTexts(input);
|
||||
const allInputText = extractAllInputTexts(input);
|
||||
const rememberedFact = extractRememberedFact(userTexts);
|
||||
const model = typeof body.model === "string" ? body.model : "";
|
||||
const memorySnippet =
|
||||
typeof toolJson?.text === "string"
|
||||
? toolJson.text
|
||||
: Array.isArray(toolJson?.results)
|
||||
? JSON.stringify(toolJson.results)
|
||||
: toolOutput;
|
||||
const orbitCode = extractOrbitCode(memorySnippet);
|
||||
const mediaPath = /MEDIA:([^\n]+)/.exec(toolOutput)?.[1]?.trim();
|
||||
|
||||
if (/what was the qa canary code/i.test(prompt) && rememberedFact) {
|
||||
return `Protocol note: the QA canary code was ${rememberedFact}.`;
|
||||
|
|
@ -234,9 +302,27 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record<string, un
|
|||
if (/remember this fact/i.test(prompt) && rememberedFact) {
|
||||
return `Protocol note: acknowledged. I will remember ${rememberedFact}.`;
|
||||
}
|
||||
if (/memory unavailable check/i.test(prompt)) {
|
||||
return "Protocol note: I checked the available runtime context but could not confirm the hidden memory-only fact, so I will not guess.";
|
||||
}
|
||||
if (/visible skill marker/i.test(prompt)) {
|
||||
return "VISIBLE-SKILL-OK";
|
||||
}
|
||||
if (/hot install marker/i.test(prompt)) {
|
||||
return "HOT-INSTALL-OK";
|
||||
}
|
||||
if (/memory tools check/i.test(prompt) && orbitCode) {
|
||||
return `Protocol note: I checked memory and the project codename is ${orbitCode}.`;
|
||||
}
|
||||
if (/switch(?:ing)? models?/i.test(prompt)) {
|
||||
return `Protocol note: model switch acknowledged. Continuing on ${model || "the requested model"}.`;
|
||||
}
|
||||
if (/tool continuity check/i.test(prompt) && toolOutput) {
|
||||
return `Protocol note: model switch acknowledged. Tool continuity held on ${model || "the requested model"}.`;
|
||||
}
|
||||
if (/image generation check/i.test(prompt) && mediaPath) {
|
||||
return `Protocol note: generated the QA lighthouse image successfully.\nMEDIA:${mediaPath}`;
|
||||
}
|
||||
if (toolOutput && /delegate|subagent/i.test(prompt)) {
|
||||
return `Protocol note: delegated result acknowledged. The bounded subagent task returned and is folded back into the main thread.`;
|
||||
}
|
||||
|
|
@ -264,6 +350,19 @@ function buildToolCallEvents(prompt: string): StreamEvent[] {
|
|||
return buildToolCallEventsWithArgs("read", { path: targetPath });
|
||||
}
|
||||
|
||||
function extractPlannedToolName(events: StreamEvent[]) {
|
||||
for (const event of events) {
|
||||
if (event.type !== "response.output_item.done") {
|
||||
continue;
|
||||
}
|
||||
const item = event.item as { type?: unknown; name?: unknown };
|
||||
if (item.type === "function_call" && typeof item.name === "string") {
|
||||
return item.name;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function buildAssistantEvents(text: string): StreamEvent[] {
|
||||
const outputItem = {
|
||||
type: "message",
|
||||
|
|
@ -303,6 +402,10 @@ function buildResponsesPayload(body: Record<string, unknown>) {
|
|||
const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : [];
|
||||
const prompt = extractLastUserText(input);
|
||||
const toolOutput = extractToolOutput(input);
|
||||
const toolJson = parseToolOutputJson(toolOutput);
|
||||
const allInputText = extractAllInputTexts(input);
|
||||
const isGroupChat = allInputText.includes('"is_group_chat": true');
|
||||
const isBaselineUnmentionedChannelChatter = /\bno bot ping here\b/i.test(prompt);
|
||||
if (/lobster invaders/i.test(prompt)) {
|
||||
if (!toolOutput) {
|
||||
return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
|
||||
|
|
@ -318,6 +421,44 @@ function buildResponsesPayload(body: Record<string, unknown>) {
|
|||
});
|
||||
}
|
||||
}
|
||||
if (/memory tools check/i.test(prompt)) {
|
||||
if (!toolOutput) {
|
||||
return buildToolCallEventsWithArgs("memory_search", {
|
||||
query: "project codename ORBIT-9",
|
||||
maxResults: 3,
|
||||
});
|
||||
}
|
||||
const results = Array.isArray(toolJson?.results)
|
||||
? (toolJson.results as Array<Record<string, unknown>>)
|
||||
: [];
|
||||
const first = results[0];
|
||||
if (
|
||||
typeof first?.path === "string" &&
|
||||
(typeof first.startLine === "number" || typeof first.endLine === "number")
|
||||
) {
|
||||
const from =
|
||||
typeof first.startLine === "number"
|
||||
? Math.max(1, first.startLine)
|
||||
: typeof first.endLine === "number"
|
||||
? Math.max(1, first.endLine)
|
||||
: 1;
|
||||
return buildToolCallEventsWithArgs("memory_get", {
|
||||
path: first.path,
|
||||
from,
|
||||
lines: 4,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (/image generation check/i.test(prompt) && !toolOutput) {
|
||||
return buildToolCallEventsWithArgs("image_generate", {
|
||||
prompt: "A QA lighthouse on a dark sea with a tiny protocol droid silhouette.",
|
||||
filename: "qa-lighthouse.png",
|
||||
size: "1024x1024",
|
||||
});
|
||||
}
|
||||
if (/tool continuity check/i.test(prompt) && !toolOutput) {
|
||||
return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
|
||||
}
|
||||
if (/delegate|subagent/i.test(prompt) && !toolOutput) {
|
||||
return buildToolCallEventsWithArgs("sessions_spawn", {
|
||||
task: "Inspect the QA workspace and return one concise protocol note.",
|
||||
|
|
@ -334,6 +475,15 @@ function buildResponsesPayload(body: Record<string, unknown>) {
|
|||
if (!toolOutput && /\b(read|inspect|repo|docs|scenario|kickoff)\b/i.test(prompt)) {
|
||||
return buildToolCallEvents(prompt);
|
||||
}
|
||||
if (/visible skill marker/i.test(prompt) && !toolOutput) {
|
||||
return buildAssistantEvents("VISIBLE-SKILL-OK");
|
||||
}
|
||||
if (/hot install marker/i.test(prompt) && !toolOutput) {
|
||||
return buildAssistantEvents("HOT-INSTALL-OK");
|
||||
}
|
||||
if (isGroupChat && isBaselineUnmentionedChannelChatter && !toolOutput) {
|
||||
return buildAssistantEvents("NO_REPLY");
|
||||
}
|
||||
return buildAssistantEvents(buildAssistantText(input, body));
|
||||
}
|
||||
|
||||
|
|
@ -352,6 +502,7 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
|
|||
data: [
|
||||
{ id: "gpt-5.4", object: "model" },
|
||||
{ id: "gpt-5.4-alt", object: "model" },
|
||||
{ id: "gpt-image-1", object: "model" },
|
||||
],
|
||||
});
|
||||
return;
|
||||
|
|
@ -364,22 +515,35 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
|
|||
writeJson(res, 200, requests);
|
||||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/v1/images/generations") {
|
||||
writeJson(res, 200, {
|
||||
data: [
|
||||
{
|
||||
b64_json: TINY_PNG_BASE64,
|
||||
revised_prompt: "A QA lighthouse with protocol droid silhouette.",
|
||||
},
|
||||
],
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/v1/responses") {
|
||||
const raw = await readBody(req);
|
||||
const body = raw ? (JSON.parse(raw) as Record<string, unknown>) : {};
|
||||
const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : [];
|
||||
const events = buildResponsesPayload(body);
|
||||
lastRequest = {
|
||||
raw,
|
||||
body,
|
||||
prompt: extractLastUserText(input),
|
||||
allInputText: extractAllInputTexts(input),
|
||||
toolOutput: extractToolOutput(input),
|
||||
model: typeof body.model === "string" ? body.model : "",
|
||||
plannedToolName: extractPlannedToolName(events),
|
||||
};
|
||||
requests.push(lastRequest);
|
||||
if (requests.length > 50) {
|
||||
requests.splice(0, requests.length - 50);
|
||||
}
|
||||
const events = buildResponsesPayload(body);
|
||||
if (body.stream === false) {
|
||||
const completion = events.at(-1);
|
||||
if (!completion || completion.type !== "response.completed") {
|
||||
|
|
|
|||
|
|
@ -74,6 +74,21 @@ export function buildQaGatewayConfig(params: {
|
|||
contextWindow: 128_000,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
{
|
||||
id: "gpt-image-1",
|
||||
name: "gpt-image-1",
|
||||
api: "openai-responses",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128_000,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
],
|
||||
};
|
||||
const providerMode = params.providerMode ?? "mock-openai";
|
||||
|
|
@ -87,6 +102,8 @@ export function buildQaGatewayConfig(params: {
|
|||
const alternateModel =
|
||||
params.alternateModel ??
|
||||
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt");
|
||||
const imageGenerationModelRef =
|
||||
providerMode === "live-openai" ? "openai/gpt-image-1" : "mock-openai/gpt-image-1";
|
||||
const liveModelParams =
|
||||
providerMode === "live-openai"
|
||||
? {
|
||||
|
|
@ -133,6 +150,17 @@ export function buildQaGatewayConfig(params: {
|
|||
model: {
|
||||
primary: primaryModel,
|
||||
},
|
||||
imageGenerationModel: {
|
||||
primary: imageGenerationModelRef,
|
||||
},
|
||||
memorySearch: {
|
||||
sync: {
|
||||
watch: true,
|
||||
watchDebounceMs: 25,
|
||||
onSessionStart: true,
|
||||
onSearch: true,
|
||||
},
|
||||
},
|
||||
models: {
|
||||
[primaryModel]: {
|
||||
params: liveModelParams,
|
||||
|
|
@ -165,6 +193,9 @@ export function buildQaGatewayConfig(params: {
|
|||
},
|
||||
],
|
||||
},
|
||||
memory: {
|
||||
backend: "builtin",
|
||||
},
|
||||
...(providerMode === "mock-openai"
|
||||
? {
|
||||
models: {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
import { spawn } from "node:child_process";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { setTimeout as sleep } from "node:timers/promises";
|
||||
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
||||
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
|
||||
import type { QaBusState } from "./bus-state.js";
|
||||
import { extractQaToolPayload } from "./extract-tool-payload.js";
|
||||
|
|
@ -35,6 +38,18 @@ type QaSuiteEnvironment = {
|
|||
alternateModel: string;
|
||||
};
|
||||
|
||||
type QaSkillStatusEntry = {
|
||||
name?: string;
|
||||
eligible?: boolean;
|
||||
disabled?: boolean;
|
||||
blockedByAllowlist?: boolean;
|
||||
};
|
||||
|
||||
type QaConfigSnapshot = {
|
||||
hash?: string;
|
||||
config?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
function splitModelRef(ref: string) {
|
||||
const slash = ref.indexOf("/");
|
||||
if (slash <= 0 || slash === ref.length - 1) {
|
||||
|
|
@ -138,7 +153,13 @@ async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteS
|
|||
const stepResults: QaReportCheck[] = [];
|
||||
for (const step of steps) {
|
||||
try {
|
||||
if (process.env.OPENCLAW_QA_DEBUG === "1") {
|
||||
console.error(`[qa-suite] start scenario="${name}" step="${step.name}"`);
|
||||
}
|
||||
const details = await step.run();
|
||||
if (process.env.OPENCLAW_QA_DEBUG === "1") {
|
||||
console.error(`[qa-suite] pass scenario="${name}" step="${step.name}"`);
|
||||
}
|
||||
stepResults.push({
|
||||
name: step.name,
|
||||
status: "pass",
|
||||
|
|
@ -146,6 +167,9 @@ async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteS
|
|||
});
|
||||
} catch (error) {
|
||||
const details = error instanceof Error ? error.message : String(error);
|
||||
if (process.env.OPENCLAW_QA_DEBUG === "1") {
|
||||
console.error(`[qa-suite] fail scenario="${name}" step="${step.name}" details=${details}`);
|
||||
}
|
||||
stepResults.push({
|
||||
name: step.name,
|
||||
status: "fail",
|
||||
|
|
@ -174,6 +198,264 @@ async function fetchJson<T>(url: string): Promise<T> {
|
|||
return (await response.json()) as T;
|
||||
}
|
||||
|
||||
async function waitForGatewayHealthy(env: QaSuiteEnvironment, timeoutMs = 45_000) {
|
||||
await waitForCondition(
|
||||
async () => {
|
||||
try {
|
||||
const response = await fetch(`${env.gateway.baseUrl}/readyz`);
|
||||
return response.ok ? true : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
},
|
||||
timeoutMs,
|
||||
250,
|
||||
);
|
||||
}
|
||||
|
||||
function isGatewayRestartRace(error: unknown) {
|
||||
const text = error instanceof Error ? error.message : String(error);
|
||||
return (
|
||||
text.includes("gateway closed (1012)") ||
|
||||
text.includes("gateway closed (1006") ||
|
||||
text.includes("abnormal closure") ||
|
||||
text.includes("service restart")
|
||||
);
|
||||
}
|
||||
|
||||
async function readConfigSnapshot(env: QaSuiteEnvironment) {
|
||||
const snapshot = (await env.gateway.call("config.get", {})) as QaConfigSnapshot;
|
||||
if (!snapshot.hash || !snapshot.config) {
|
||||
throw new Error("config.get returned no hash/config");
|
||||
}
|
||||
return {
|
||||
hash: snapshot.hash,
|
||||
config: snapshot.config,
|
||||
} satisfies { hash: string; config: Record<string, unknown> };
|
||||
}
|
||||
|
||||
async function patchConfig(params: {
|
||||
env: QaSuiteEnvironment;
|
||||
patch: Record<string, unknown>;
|
||||
sessionKey?: string;
|
||||
note?: string;
|
||||
restartDelayMs?: number;
|
||||
}) {
|
||||
const snapshot = await readConfigSnapshot(params.env);
|
||||
try {
|
||||
return await params.env.gateway.call(
|
||||
"config.patch",
|
||||
{
|
||||
raw: JSON.stringify(params.patch, null, 2),
|
||||
baseHash: snapshot.hash,
|
||||
...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
|
||||
...(params.note ? { note: params.note } : {}),
|
||||
restartDelayMs: params.restartDelayMs ?? 1_000,
|
||||
},
|
||||
{ timeoutMs: 45_000 },
|
||||
);
|
||||
} catch (error) {
|
||||
if (!isGatewayRestartRace(error)) {
|
||||
throw error;
|
||||
}
|
||||
await waitForGatewayHealthy(params.env);
|
||||
return { ok: true, restarted: true };
|
||||
}
|
||||
}
|
||||
|
||||
async function applyConfig(params: {
|
||||
env: QaSuiteEnvironment;
|
||||
nextConfig: Record<string, unknown>;
|
||||
sessionKey?: string;
|
||||
note?: string;
|
||||
restartDelayMs?: number;
|
||||
}) {
|
||||
const snapshot = await readConfigSnapshot(params.env);
|
||||
try {
|
||||
return await params.env.gateway.call(
|
||||
"config.apply",
|
||||
{
|
||||
raw: JSON.stringify(params.nextConfig, null, 2),
|
||||
baseHash: snapshot.hash,
|
||||
...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
|
||||
...(params.note ? { note: params.note } : {}),
|
||||
restartDelayMs: params.restartDelayMs ?? 1_000,
|
||||
},
|
||||
{ timeoutMs: 45_000 },
|
||||
);
|
||||
} catch (error) {
|
||||
if (!isGatewayRestartRace(error)) {
|
||||
throw error;
|
||||
}
|
||||
await waitForGatewayHealthy(params.env);
|
||||
return { ok: true, restarted: true };
|
||||
}
|
||||
}
|
||||
|
||||
async function createSession(env: QaSuiteEnvironment, label: string, key?: string) {
|
||||
const created = (await env.gateway.call("sessions.create", {
|
||||
label,
|
||||
...(key ? { key } : {}),
|
||||
})) as { key?: string };
|
||||
const sessionKey = created.key?.trim();
|
||||
if (!sessionKey) {
|
||||
throw new Error("sessions.create returned no key");
|
||||
}
|
||||
return sessionKey;
|
||||
}
|
||||
|
||||
async function readEffectiveTools(env: QaSuiteEnvironment, sessionKey: string) {
|
||||
const payload = (await env.gateway.call(
|
||||
"tools.effective",
|
||||
{
|
||||
sessionKey,
|
||||
},
|
||||
{
|
||||
timeoutMs: liveTurnTimeoutMs(env, 90_000),
|
||||
},
|
||||
)) as {
|
||||
groups?: Array<{ tools?: Array<{ id?: string }> }>;
|
||||
};
|
||||
const ids = new Set<string>();
|
||||
for (const group of payload.groups ?? []) {
|
||||
for (const tool of group.tools ?? []) {
|
||||
if (tool.id?.trim()) {
|
||||
ids.add(tool.id.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
async function readSkillStatus(env: QaSuiteEnvironment, agentId = "qa") {
|
||||
const payload = (await env.gateway.call(
|
||||
"skills.status",
|
||||
{
|
||||
agentId,
|
||||
},
|
||||
{
|
||||
timeoutMs: liveTurnTimeoutMs(env, 45_000),
|
||||
},
|
||||
)) as {
|
||||
skills?: QaSkillStatusEntry[];
|
||||
};
|
||||
return payload.skills ?? [];
|
||||
}
|
||||
|
||||
async function runQaCli(
|
||||
env: QaSuiteEnvironment,
|
||||
args: string[],
|
||||
opts?: { timeoutMs?: number; json?: boolean },
|
||||
) {
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(process.execPath, ["dist/index.js", ...args], {
|
||||
cwd: process.cwd(),
|
||||
env: env.gateway.runtimeEnv,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
const timeout = setTimeout(() => {
|
||||
child.kill("SIGKILL");
|
||||
reject(new Error(`qa cli timed out: openclaw ${args.join(" ")}`));
|
||||
}, opts?.timeoutMs ?? 60_000);
|
||||
child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk)));
|
||||
child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk)));
|
||||
child.once("error", (error) => {
|
||||
clearTimeout(timeout);
|
||||
reject(error);
|
||||
});
|
||||
child.once("exit", (code) => {
|
||||
clearTimeout(timeout);
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(
|
||||
new Error(
|
||||
`qa cli failed (${code ?? "unknown"}): ${Buffer.concat(stderr).toString("utf8").trim()}`,
|
||||
),
|
||||
);
|
||||
});
|
||||
});
|
||||
const text = Buffer.concat(stdout).toString("utf8").trim();
|
||||
if (!opts?.json) {
|
||||
return text;
|
||||
}
|
||||
return text ? (JSON.parse(text) as unknown) : {};
|
||||
}
|
||||
|
||||
async function forceMemoryIndex(params: {
|
||||
env: QaSuiteEnvironment;
|
||||
query: string;
|
||||
expectedNeedle: string;
|
||||
}) {
|
||||
await runQaCli(params.env, ["memory", "index", "--agent", "qa", "--force"], {
|
||||
timeoutMs: liveTurnTimeoutMs(params.env, 60_000),
|
||||
});
|
||||
const payload = (await runQaCli(
|
||||
params.env,
|
||||
["memory", "search", "--agent", "qa", "--json", "--query", params.query],
|
||||
{
|
||||
timeoutMs: liveTurnTimeoutMs(params.env, 20_000),
|
||||
json: true,
|
||||
},
|
||||
)) as { results?: Array<{ snippet?: string; text?: string; path?: string }> };
|
||||
const haystack = JSON.stringify(payload.results ?? []);
|
||||
if (!haystack.includes(params.expectedNeedle)) {
|
||||
throw new Error(`memory index missing expected fact after reindex: ${haystack}`);
|
||||
}
|
||||
}
|
||||
|
||||
function findSkill(skills: QaSkillStatusEntry[], name: string) {
|
||||
return skills.find((skill) => skill.name === name);
|
||||
}
|
||||
|
||||
async function writeWorkspaceSkill(params: {
|
||||
env: QaSuiteEnvironment;
|
||||
name: string;
|
||||
body: string;
|
||||
}) {
|
||||
const skillDir = path.join(params.env.gateway.workspaceDir, "skills", params.name);
|
||||
await fs.mkdir(skillDir, { recursive: true });
|
||||
const skillPath = path.join(skillDir, "SKILL.md");
|
||||
await fs.writeFile(skillPath, `${params.body.trim()}\n`, "utf8");
|
||||
return skillPath;
|
||||
}
|
||||
|
||||
async function callPluginToolsMcp(params: {
|
||||
env: QaSuiteEnvironment;
|
||||
toolName: string;
|
||||
args: Record<string, unknown>;
|
||||
}) {
|
||||
const transportEnv = Object.fromEntries(
|
||||
Object.entries(params.env.gateway.runtimeEnv).filter(
|
||||
(entry): entry is [string, string] => typeof entry[1] === "string",
|
||||
),
|
||||
);
|
||||
const transport = new StdioClientTransport({
|
||||
command: process.execPath,
|
||||
args: ["--import", "tsx", "src/mcp/plugin-tools-serve.ts"],
|
||||
stderr: "pipe",
|
||||
env: transportEnv,
|
||||
});
|
||||
const client = new Client({ name: "openclaw-qa-suite", version: "0.0.0" }, {});
|
||||
try {
|
||||
await client.connect(transport);
|
||||
const listed = await client.listTools();
|
||||
const tool = listed.tools.find((entry) => entry.name === params.toolName);
|
||||
if (!tool) {
|
||||
throw new Error(`MCP tool missing: ${params.toolName}`);
|
||||
}
|
||||
return await client.callTool({
|
||||
name: params.toolName,
|
||||
arguments: params.args,
|
||||
});
|
||||
} finally {
|
||||
await client.close().catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
async function runAgentPrompt(
|
||||
env: QaSuiteEnvironment,
|
||||
params: {
|
||||
|
|
@ -285,7 +567,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
const message = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-room" && !candidate.threadId,
|
||||
env.providerMode === "live-openai" ? 45_000 : 15_000,
|
||||
env.providerMode === "live-openai" ? 45_000 : 45_000,
|
||||
);
|
||||
return message.text;
|
||||
},
|
||||
|
|
@ -706,6 +988,556 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
|
|||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"memory-tools-channel-context",
|
||||
async () =>
|
||||
await runScenario("Memory tools in channel context", [
|
||||
{
|
||||
name: "uses memory_search plus memory_get before answering in-channel",
|
||||
run: async () => {
|
||||
await reset();
|
||||
await fs.writeFile(
|
||||
path.join(env.gateway.workspaceDir, "MEMORY.md"),
|
||||
"Hidden QA fact: the project codename is ORBIT-9.\n",
|
||||
"utf8",
|
||||
);
|
||||
await forceMemoryIndex({
|
||||
env,
|
||||
query: "project codename ORBIT-9",
|
||||
expectedNeedle: "ORBIT-9",
|
||||
});
|
||||
const prompt =
|
||||
"@openclaw Memory tools check: what is the hidden project codename stored only in memory? Use memory tools first.";
|
||||
state.addInboundMessage({
|
||||
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: prompt,
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-room" && candidate.text.includes("ORBIT-9"),
|
||||
liveTurnTimeoutMs(env, 30_000),
|
||||
);
|
||||
if (env.mock) {
|
||||
const requests = await fetchJson<
|
||||
Array<{ allInputText?: string; plannedToolName?: string; toolOutput?: string }>
|
||||
>(`${env.mock.baseUrl}/debug/requests`);
|
||||
const relevant = requests.filter((request) =>
|
||||
String(request.allInputText ?? "").includes("Memory tools check"),
|
||||
);
|
||||
if (!relevant.some((request) => request.plannedToolName === "memory_search")) {
|
||||
throw new Error("expected memory_search in mock request plan");
|
||||
}
|
||||
if (!requests.some((request) => request.plannedToolName === "memory_get")) {
|
||||
throw new Error("expected memory_get in mock request plan");
|
||||
}
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"memory-failure-fallback",
|
||||
async () =>
|
||||
await runScenario("Memory failure fallback", [
|
||||
{
|
||||
name: "falls back cleanly when group:memory tools are denied",
|
||||
run: async () => {
|
||||
const original = await readConfigSnapshot(env);
|
||||
await fs.writeFile(
|
||||
path.join(env.gateway.workspaceDir, "MEMORY.md"),
|
||||
"Do not reveal directly: fallback fact is ORBIT-9.\n",
|
||||
"utf8",
|
||||
);
|
||||
await patchConfig({
|
||||
env,
|
||||
patch: { tools: { deny: ["group:memory"] } },
|
||||
});
|
||||
await waitForGatewayHealthy(env);
|
||||
try {
|
||||
const sessionKey = await createSession(env, "Memory fallback");
|
||||
const tools = await readEffectiveTools(env, sessionKey);
|
||||
if (tools.has("memory_search") || tools.has("memory_get")) {
|
||||
throw new Error("memory tools still present after deny patch");
|
||||
}
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:memory-failure",
|
||||
message:
|
||||
"Memory unavailable check: a hidden fact exists only in memory files. If you cannot confirm it, say so clearly and do not guess.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-operator",
|
||||
liveTurnTimeoutMs(env, 30_000),
|
||||
);
|
||||
const lower = outbound.text.toLowerCase();
|
||||
if (outbound.text.includes("ORBIT-9")) {
|
||||
throw new Error(`hallucinated hidden fact: ${outbound.text}`);
|
||||
}
|
||||
if (!lower.includes("could not confirm") && !lower.includes("will not guess")) {
|
||||
throw new Error(`missing graceful fallback language: ${outbound.text}`);
|
||||
}
|
||||
return outbound.text;
|
||||
} finally {
|
||||
await applyConfig({
|
||||
env,
|
||||
nextConfig: original.config,
|
||||
});
|
||||
await waitForGatewayHealthy(env);
|
||||
}
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"model-switch-tool-continuity",
|
||||
async () =>
|
||||
await runScenario("Model switch with tool continuity", [
|
||||
{
|
||||
name: "keeps using tools after switching models",
|
||||
run: async () => {
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:model-switch-tools",
|
||||
message:
|
||||
"Read QA_KICKOFF_TASK.md and summarize the QA mission in one clause before any model switch.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
const alternate = splitModelRef(env.alternateModel);
|
||||
const beforeSwitchCursor = state.getSnapshot().messages.length;
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:model-switch-tools",
|
||||
message:
|
||||
"Switch models now. Tool continuity check: reread QA_KICKOFF_TASK.md and mention the handoff in one short sentence.",
|
||||
provider: alternate?.provider,
|
||||
model: alternate?.model,
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
const outbound = await waitForCondition(
|
||||
() => {
|
||||
const snapshot = state.getSnapshot();
|
||||
return snapshot.messages
|
||||
.slice(beforeSwitchCursor)
|
||||
.filter(
|
||||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
(candidate.text.toLowerCase().includes("model switch") ||
|
||||
candidate.text.toLowerCase().includes("handoff")),
|
||||
)
|
||||
.at(-1);
|
||||
},
|
||||
liveTurnTimeoutMs(env, 30_000),
|
||||
);
|
||||
if (env.mock) {
|
||||
const requests = await fetchJson<
|
||||
Array<{ allInputText?: string; plannedToolName?: string; model?: string }>
|
||||
>(`${env.mock.baseUrl}/debug/requests`);
|
||||
const switched = requests.find((request) =>
|
||||
String(request.allInputText ?? "").includes("Tool continuity check"),
|
||||
);
|
||||
if (switched?.plannedToolName !== "read") {
|
||||
throw new Error(
|
||||
`expected read after switch, got ${String(switched?.plannedToolName ?? "")}`,
|
||||
);
|
||||
}
|
||||
if (switched?.model !== "gpt-5.4-alt") {
|
||||
throw new Error(`expected alternate model, got ${String(switched?.model ?? "")}`);
|
||||
}
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"mcp-plugin-tools-call",
|
||||
async () =>
|
||||
await runScenario("MCP plugin-tools call", [
|
||||
{
|
||||
name: "serves and calls memory_search over MCP",
|
||||
run: async () => {
|
||||
await fs.writeFile(
|
||||
path.join(env.gateway.workspaceDir, "MEMORY.md"),
|
||||
"MCP fact: the codename is ORBIT-9.\n",
|
||||
"utf8",
|
||||
);
|
||||
await forceMemoryIndex({
|
||||
env,
|
||||
query: "ORBIT-9 codename",
|
||||
expectedNeedle: "ORBIT-9",
|
||||
});
|
||||
const result = await callPluginToolsMcp({
|
||||
env,
|
||||
toolName: "memory_search",
|
||||
args: {
|
||||
query: "ORBIT-9 codename",
|
||||
maxResults: 3,
|
||||
},
|
||||
});
|
||||
const text = JSON.stringify(result.content ?? []);
|
||||
if (!text.includes("ORBIT-9")) {
|
||||
throw new Error(`MCP memory_search missed expected fact: ${text}`);
|
||||
}
|
||||
return text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"skill-visibility-invocation",
|
||||
async () =>
|
||||
await runScenario("Skill visibility and invocation", [
|
||||
{
|
||||
name: "reports visible skill and applies its marker on the next turn",
|
||||
run: async () => {
|
||||
await writeWorkspaceSkill({
|
||||
env,
|
||||
name: "qa-visible-skill",
|
||||
body: `---
|
||||
name: qa-visible-skill
|
||||
description: Visible QA skill marker
|
||||
---
|
||||
When the user asks for the visible skill marker exactly, reply with exactly: VISIBLE-SKILL-OK`,
|
||||
});
|
||||
const skills = await readSkillStatus(env);
|
||||
const visible = findSkill(skills, "qa-visible-skill");
|
||||
if (!visible?.eligible || visible.disabled || visible.blockedByAllowlist) {
|
||||
throw new Error(`skill not visible/eligible: ${JSON.stringify(visible)}`);
|
||||
}
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:visible-skill",
|
||||
message: "Visible skill marker: give me the visible skill marker exactly.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.includes("VISIBLE-SKILL-OK"),
|
||||
liveTurnTimeoutMs(env, 20_000),
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"skill-install-hot-availability",
|
||||
async () =>
|
||||
await runScenario("Skill install hot availability", [
|
||||
{
|
||||
name: "picks up a newly added workspace skill without restart",
|
||||
run: async () => {
|
||||
const before = await readSkillStatus(env);
|
||||
if (findSkill(before, "qa-hot-install-skill")) {
|
||||
throw new Error("qa-hot-install-skill unexpectedly already present");
|
||||
}
|
||||
await writeWorkspaceSkill({
|
||||
env,
|
||||
name: "qa-hot-install-skill",
|
||||
body: `---
|
||||
name: qa-hot-install-skill
|
||||
description: Hot install QA marker
|
||||
---
|
||||
When the user asks for the hot install marker exactly, reply with exactly: HOT-INSTALL-OK`,
|
||||
});
|
||||
await waitForCondition(
|
||||
async () => {
|
||||
const skills = await readSkillStatus(env);
|
||||
return findSkill(skills, "qa-hot-install-skill")?.eligible ? true : undefined;
|
||||
},
|
||||
15_000,
|
||||
200,
|
||||
);
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:hot-skill",
|
||||
message: "Hot install marker: give me the hot install marker exactly.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.includes("HOT-INSTALL-OK"),
|
||||
liveTurnTimeoutMs(env, 20_000),
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"native-image-generation",
|
||||
async () =>
|
||||
await runScenario("Native image generation", [
|
||||
{
|
||||
name: "enables image_generate and saves a real media artifact",
|
||||
run: async () => {
|
||||
const imageModelRef =
|
||||
env.providerMode === "live-openai"
|
||||
? "openai/gpt-image-1"
|
||||
: "mock-openai/gpt-image-1";
|
||||
await patchConfig({
|
||||
env,
|
||||
patch: {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: imageModelRef,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
await waitForGatewayHealthy(env);
|
||||
const sessionKey = await createSession(env, "Image generation");
|
||||
const tools = await readEffectiveTools(env, sessionKey);
|
||||
if (!tools.has("image_generate")) {
|
||||
throw new Error("image_generate not present after imageGenerationModel patch");
|
||||
}
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:image-generate",
|
||||
message:
|
||||
"Image generation check: generate a QA lighthouse image and summarize it in one short sentence.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 45_000),
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-operator",
|
||||
liveTurnTimeoutMs(env, 45_000),
|
||||
);
|
||||
if (env.mock) {
|
||||
const requests = await fetchJson<
|
||||
Array<{ allInputText?: string; plannedToolName?: string; toolOutput?: string }>
|
||||
>(`${env.mock.baseUrl}/debug/requests`);
|
||||
const imageRequest = requests.find((request) =>
|
||||
String(request.allInputText ?? "").includes("Image generation check"),
|
||||
);
|
||||
if (imageRequest?.plannedToolName !== "image_generate") {
|
||||
throw new Error(
|
||||
`expected image_generate, got ${String(imageRequest?.plannedToolName ?? "")}`,
|
||||
);
|
||||
}
|
||||
const toolOutputRequest = requests.find((request) =>
|
||||
String(request.toolOutput ?? "").includes(
|
||||
`Generated 1 image with ${imageModelRef}.`,
|
||||
),
|
||||
);
|
||||
if (!toolOutputRequest) {
|
||||
throw new Error("missing mock image generation tool output");
|
||||
}
|
||||
const mediaPath = /MEDIA:([^\n]+)/.exec(outbound.text)?.[1]?.trim();
|
||||
if (!mediaPath) {
|
||||
throw new Error("missing MEDIA path in image generation tool output");
|
||||
}
|
||||
await fs.access(mediaPath);
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"config-patch-hot-apply",
|
||||
async () =>
|
||||
await runScenario("Config patch hot apply", [
|
||||
{
|
||||
name: "updates mention routing without restart",
|
||||
run: async () => {
|
||||
const original = await readConfigSnapshot(env);
|
||||
await patchConfig({
|
||||
env,
|
||||
patch: {
|
||||
messages: {
|
||||
groupChat: {
|
||||
mentionPatterns: ["\\bgoldenbot\\b"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
await waitForGatewayHealthy(env);
|
||||
try {
|
||||
await reset();
|
||||
const requestsBeforeIgnored = env.mock
|
||||
? await fetchJson<Array<{ allInputText?: string }>>(
|
||||
`${env.mock.baseUrl}/debug/requests`,
|
||||
)
|
||||
: null;
|
||||
state.addInboundMessage({
|
||||
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: "@openclaw you should now be ignored",
|
||||
});
|
||||
await waitForCondition(
|
||||
async () => {
|
||||
if (!env.mock) {
|
||||
return (await waitForNoOutbound(state), true);
|
||||
}
|
||||
const requests = await fetchJson<Array<{ allInputText?: string }>>(
|
||||
`${env.mock.baseUrl}/debug/requests`,
|
||||
);
|
||||
const ignoredPromptReachedAgent = requests.some((request) =>
|
||||
String(request.allInputText ?? "").includes(
|
||||
"@openclaw you should now be ignored",
|
||||
),
|
||||
);
|
||||
if (ignoredPromptReachedAgent) {
|
||||
throw new Error("ignored channel mention still reached the agent");
|
||||
}
|
||||
return requests.length === requestsBeforeIgnored?.length ? true : undefined;
|
||||
},
|
||||
3_000,
|
||||
100,
|
||||
);
|
||||
state.addInboundMessage({
|
||||
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: "goldenbot explain hot config apply",
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-room",
|
||||
liveTurnTimeoutMs(env, 30_000),
|
||||
);
|
||||
if (env.mock) {
|
||||
const requests = await fetchJson<Array<{ allInputText?: string }>>(
|
||||
`${env.mock.baseUrl}/debug/requests`,
|
||||
);
|
||||
if (
|
||||
!requests.some((request) =>
|
||||
String(request.allInputText ?? "").includes(
|
||||
"goldenbot explain hot config apply",
|
||||
),
|
||||
)
|
||||
) {
|
||||
throw new Error(
|
||||
"goldenbot follow-up did not reach the agent after config patch",
|
||||
);
|
||||
}
|
||||
}
|
||||
return outbound.text;
|
||||
} finally {
|
||||
await applyConfig({
|
||||
env,
|
||||
nextConfig: original.config,
|
||||
});
|
||||
await waitForGatewayHealthy(env);
|
||||
}
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"config-apply-restart-wakeup",
|
||||
async () =>
|
||||
await runScenario("Config apply restart wake-up", [
|
||||
{
|
||||
name: "restarts cleanly and posts the restart sentinel back into qa-channel",
|
||||
run: async () => {
|
||||
await reset();
|
||||
const sessionKey = "agent:qa:restart-wakeup";
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey,
|
||||
to: "channel:qa-room",
|
||||
message: "Acknowledge restart wake-up setup in qa-room.",
|
||||
timeoutMs: liveTurnTimeoutMs(env, 30_000),
|
||||
});
|
||||
const current = await readConfigSnapshot(env);
|
||||
const nextConfig = structuredClone(current.config);
|
||||
const gatewayConfig = (nextConfig.gateway ??= {}) as Record<string, unknown>;
|
||||
const controlUi = (gatewayConfig.controlUi ??= {}) as Record<string, unknown>;
|
||||
const allowedOrigins = Array.isArray(controlUi.allowedOrigins)
|
||||
? [...(controlUi.allowedOrigins as string[])]
|
||||
: [];
|
||||
const wakeMarker = `QA-RESTART-${randomUUID().slice(0, 8)}`;
|
||||
if (!allowedOrigins.includes("http://127.0.0.1:65535")) {
|
||||
allowedOrigins.push("http://127.0.0.1:65535");
|
||||
}
|
||||
controlUi.allowedOrigins = allowedOrigins;
|
||||
await applyConfig({
|
||||
env,
|
||||
nextConfig,
|
||||
sessionKey,
|
||||
note: wakeMarker,
|
||||
});
|
||||
await waitForGatewayHealthy(env, 60_000);
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-room" && candidate.text.includes(wakeMarker),
|
||||
60_000,
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"runtime-inventory-drift-check",
|
||||
async () =>
|
||||
await runScenario("Runtime inventory drift check", [
|
||||
{
|
||||
name: "keeps tools.effective and skills.status aligned after config changes",
|
||||
run: async () => {
|
||||
await writeWorkspaceSkill({
|
||||
env,
|
||||
name: "qa-drift-skill",
|
||||
body: `---
|
||||
name: qa-drift-skill
|
||||
description: Drift skill marker
|
||||
---
|
||||
When the user asks for the drift skill marker exactly, reply with exactly: DRIFT-SKILL-OK`,
|
||||
});
|
||||
const sessionKey = await createSession(env, "Inventory drift");
|
||||
const beforeTools = await readEffectiveTools(env, sessionKey);
|
||||
if (!beforeTools.has("image_generate")) {
|
||||
throw new Error("expected image_generate before drift patch");
|
||||
}
|
||||
const beforeSkills = await readSkillStatus(env);
|
||||
if (!findSkill(beforeSkills, "qa-drift-skill")?.eligible) {
|
||||
throw new Error("expected qa-drift-skill to be eligible before patch");
|
||||
}
|
||||
await patchConfig({
|
||||
env,
|
||||
patch: {
|
||||
tools: {
|
||||
deny: ["image_generate"],
|
||||
},
|
||||
skills: {
|
||||
entries: {
|
||||
"qa-drift-skill": {
|
||||
enabled: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
await waitForGatewayHealthy(env);
|
||||
const afterTools = await readEffectiveTools(env, sessionKey);
|
||||
if (afterTools.has("image_generate")) {
|
||||
throw new Error("image_generate still present after deny patch");
|
||||
}
|
||||
const afterSkills = await readSkillStatus(env);
|
||||
const driftSkill = findSkill(afterSkills, "qa-drift-skill");
|
||||
if (!driftSkill?.disabled) {
|
||||
throw new Error(`expected disabled drift skill, got ${JSON.stringify(driftSkill)}`);
|
||||
}
|
||||
return `image_generate removed, qa-drift-skill disabled=${String(driftSkill.disabled)}`;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
]);
|
||||
}
|
||||
|
||||
|
|
@ -715,6 +1547,7 @@ export async function runQaSuite(params?: {
|
|||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
scenarioIds?: string[];
|
||||
}) {
|
||||
const startedAt = new Date();
|
||||
const providerMode = params?.providerMode ?? "mock-openai";
|
||||
|
|
@ -768,13 +1601,28 @@ export async function runQaSuite(params?: {
|
|||
|
||||
try {
|
||||
const catalog = readQaBootstrapScenarioCatalog();
|
||||
const requestedScenarioIds = params?.scenarioIds ? new Set(params.scenarioIds) : null;
|
||||
const selectedCatalogScenarios = requestedScenarioIds
|
||||
? catalog.scenarios.filter((scenario) => requestedScenarioIds.has(scenario.id))
|
||||
: catalog.scenarios;
|
||||
if (requestedScenarioIds) {
|
||||
const foundScenarioIds = new Set(selectedCatalogScenarios.map((scenario) => scenario.id));
|
||||
const missingScenarioIds = [...requestedScenarioIds].filter(
|
||||
(scenarioId) => !foundScenarioIds.has(scenarioId),
|
||||
);
|
||||
if (missingScenarioIds.length > 0) {
|
||||
throw new Error(`unknown QA scenario id(s): ${missingScenarioIds.join(", ")}`);
|
||||
}
|
||||
}
|
||||
const scenarioMap = buildScenarioMap(env);
|
||||
const scenarios: QaSuiteScenarioResult[] = [];
|
||||
const liveScenarioOutcomes: QaLabScenarioOutcome[] = catalog.scenarios.map((scenario) => ({
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: "pending",
|
||||
}));
|
||||
const liveScenarioOutcomes: QaLabScenarioOutcome[] = selectedCatalogScenarios.map(
|
||||
(scenario) => ({
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: "pending",
|
||||
}),
|
||||
);
|
||||
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
|
|
@ -783,7 +1631,7 @@ export async function runQaSuite(params?: {
|
|||
scenarios: liveScenarioOutcomes,
|
||||
});
|
||||
|
||||
for (const [index, scenario] of catalog.scenarios.entries()) {
|
||||
for (const [index, scenario] of selectedCatalogScenarios.entries()) {
|
||||
const run = scenarioMap.get(scenario.id);
|
||||
if (!run) {
|
||||
const missingResult = {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,150 @@
|
|||
# QA Scenario Expansion - Round 2
|
||||
|
||||
Ten repo-grounded candidate scenarios to add after the current seed suite.
|
||||
|
||||
## 1. On-demand memory tools in channel context
|
||||
|
||||
- Goal: verify the agent uses `memory_search` plus `memory_get` instead of bluffing when a channel message asks about prior notes.
|
||||
- Flow:
|
||||
- Seed `MEMORY.md` or `memory/*.md` with a fact not present in the current transcript.
|
||||
- Ask in a channel thread for that fact.
|
||||
- Verify tool usage and final answer accuracy.
|
||||
- Pass:
|
||||
- `memory_search` runs first.
|
||||
- `memory_get` narrows to the right lines.
|
||||
- Final answer cites the remembered fact correctly without cross-session leakage.
|
||||
- Docs: `docs/concepts/memory.md`, `docs/concepts/memory-search.md`
|
||||
- Code: `extensions/memory-core/src/tools.ts`, `extensions/memory-core/src/prompt-section.ts`
|
||||
|
||||
## 2. Memory failure fallback
|
||||
|
||||
- Goal: verify memory failure is graceful when embeddings/search are unavailable.
|
||||
- Flow:
|
||||
- Disable or break the embedding-backed memory path.
|
||||
- Ask for prior-note recall.
|
||||
- Verify the agent surfaces uncertainty and next action instead of hallucinating.
|
||||
- Pass:
|
||||
- Tool failure does not crash the run.
|
||||
- Agent says it checked and could not confirm.
|
||||
- Report includes the remediation hint.
|
||||
- Docs: `docs/concepts/memory.md`, `docs/help/faq.md`
|
||||
- Code: `extensions/memory-core/src/tools.shared.ts`, `extensions/memory-core/src/tools.citations.test.ts`
|
||||
|
||||
## 3. Model switch with tool continuity
|
||||
|
||||
- Goal: verify model switching preserves session context and tool availability, not just plain text continuity.
|
||||
- Flow:
|
||||
- Start on one model.
|
||||
- Switch to another configured model.
|
||||
- Ask for a tool-using follow-up such as file read or memory lookup.
|
||||
- Pass:
|
||||
- Switch is reflected in runtime state.
|
||||
- Tool call still succeeds after the switch.
|
||||
- Final answer keeps prior context.
|
||||
- Docs: `docs/help/testing.md`, `docs/concepts/model-failover.md`
|
||||
- Code: `extensions/qa-lab/src/suite.ts`, `docs/web/webchat.md`
|
||||
|
||||
## 4. MCP-backed recall via QMD/mcporter
|
||||
|
||||
- Goal: verify an MCP-backed tool path works end to end, not just core tools.
|
||||
- Flow:
|
||||
- Enable `memory.qmd.mcporter`.
|
||||
- Ask for recall that should route through the QMD MCP bridge.
|
||||
- Verify response and captured MCP execution path.
|
||||
- Pass:
|
||||
- MCP-backed search path is used.
|
||||
- Returned snippet matches the right note.
|
||||
- Failure mode is explicit if the daemon/tool is missing.
|
||||
- Docs: `docs/gateway/secrets.md`, `docs/concepts/memory-qmd.md`
|
||||
- Code: `extensions/memory-core/src/memory/qmd-manager.ts`, `extensions/memory-core/src/memory/qmd-manager.test.ts`
|
||||
|
||||
## 5. Skill visibility and invocation
|
||||
|
||||
- Goal: verify the agent sees a workspace/project skill and actually uses it.
|
||||
- Flow:
|
||||
- Add a simple workspace or `.agents` skill.
|
||||
- Confirm skill visibility through runtime inventory.
|
||||
- Ask for a task that should trigger the skill.
|
||||
- Pass:
|
||||
- Skill appears in `skills.status`.
|
||||
- Agent invocation reflects the installed skill instructions.
|
||||
- Per-agent allowlist behavior is respected.
|
||||
- Docs: `docs/tools/skills.md`, `docs/gateway/protocol.md`, `docs/gateway/configuration.md`
|
||||
- Code: `.agents/skills/openclaw-qa-testing/SKILL.md`, `docs/gateway/protocol.md`
|
||||
|
||||
## 6. Skill install and hot availability
|
||||
|
||||
- Goal: verify a newly installed skill becomes usable without a broken intermediate state.
|
||||
- Flow:
|
||||
- Install a ClawHub or gateway-managed skill.
|
||||
- Re-check skill inventory.
|
||||
- Ask the agent to perform the skill-backed task.
|
||||
- Pass:
|
||||
- Install succeeds.
|
||||
- `skills.status` or `skills.bins` reflects the new skill.
|
||||
- Agent can use the skill immediately or after the expected reload path.
|
||||
- Docs: `docs/tools/skills.md`, `docs/cli/skills.md`, `docs/gateway/protocol.md`
|
||||
- Code: `docs/gateway/protocol.md`, `docs/tools/skills.md`
|
||||
|
||||
## 7. Native image generation
|
||||
|
||||
- Goal: verify `image_generate` appears only when configured and returns a real attachment/artifact.
|
||||
- Flow:
|
||||
- Configure `agents.defaults.imageGenerationModel.primary`.
|
||||
- Ask for a simple generated image.
|
||||
- Verify generated media is returned in the reply path.
|
||||
- Pass:
|
||||
- `image_generate` is in the effective tool set.
|
||||
- Generation succeeds with the configured provider/model.
|
||||
- Output is attached and the agent summarizes what it created.
|
||||
- Docs: `docs/tools/image-generation.md`, `docs/providers/openai.md`
|
||||
- Code: `src/agents/openclaw-tools.image-generation.test.ts`, `src/image-generation/runtime.ts`
|
||||
|
||||
## 8. Hot config patch without restart
|
||||
|
||||
- Goal: verify a safe config edit hot-applies and changes behavior immediately.
|
||||
- Flow:
|
||||
- Use `config.patch` to change a hot-reloadable field such as agent skill visibility or message behavior.
|
||||
- Retry the task in the same gateway lifetime.
|
||||
- Pass:
|
||||
- Patch succeeds.
|
||||
- No disruptive restart loop.
|
||||
- New behavior is live immediately.
|
||||
- Docs: `docs/gateway/configuration.md`, `docs/gateway/protocol.md`
|
||||
- Code: `docs/gateway/configuration.md`, `docs/web/control-ui.md`
|
||||
|
||||
## 9. Restart-required config apply with wake-up
|
||||
|
||||
- Goal: verify a restart-required config change restarts cleanly and wakes the session back up.
|
||||
- Flow:
|
||||
- Use `config.apply` or `update.run` on a restart-required surface.
|
||||
- Provide `sessionKey` so the operator gets the post-restart ping.
|
||||
- Resume the task after restart.
|
||||
- Pass:
|
||||
- Restart happens once.
|
||||
- Session wake-up ping arrives.
|
||||
- Agent continues in the same logical workflow after restart.
|
||||
- Docs: `docs/gateway/configuration.md`, `docs/web/control-ui.md`
|
||||
- Code: `docs/gateway/configuration.md`, `docs/gateway/protocol.md`
|
||||
|
||||
## 10. Runtime inventory drift check
|
||||
|
||||
- Goal: verify the reported tool and skill inventory matches what the agent can really use after config/plugin changes.
|
||||
- Flow:
|
||||
- Read `tools.effective` and `skills.status`.
|
||||
- Ask the agent to use one enabled thing and one disabled thing.
|
||||
- Compare actual behavior vs reported inventory.
|
||||
- Pass:
|
||||
- Enabled item is callable.
|
||||
- Disabled item is absent or blocked for the right reason.
|
||||
- Inventory and runtime behavior stay in sync.
|
||||
- Docs: `docs/gateway/protocol.md`, `docs/web/webchat.md`
|
||||
- Code: `docs/gateway/protocol.md`, `docs/web/control-ui.md`
|
||||
|
||||
## Best next additions to the executable suite
|
||||
|
||||
If we only promote three right away:
|
||||
|
||||
1. On-demand memory tools in channel context
|
||||
2. Native image generation
|
||||
3. Hot config patch without restart
|
||||
|
|
@ -135,5 +135,141 @@
|
|||
],
|
||||
"docsRefs": ["docs/channels/qa-channel.md", "docs/channels/group-messages.md"],
|
||||
"codeRefs": ["extensions/qa-channel/src/protocol.ts", "extensions/qa-lab/src/bus-state.ts"]
|
||||
},
|
||||
{
|
||||
"id": "memory-tools-channel-context",
|
||||
"title": "Memory tools in channel context",
|
||||
"surface": "memory",
|
||||
"objective": "Verify the agent uses memory_search and memory_get in a shared channel when the answer lives only in memory files, not the live transcript.",
|
||||
"successCriteria": [
|
||||
"Agent uses memory_search before answering.",
|
||||
"Agent narrows with memory_get before answering.",
|
||||
"Final reply returns the memory-only fact correctly in-channel."
|
||||
],
|
||||
"docsRefs": ["docs/concepts/memory.md", "docs/concepts/memory-search.md"],
|
||||
"codeRefs": ["extensions/memory-core/src/tools.ts", "extensions/qa-lab/src/suite.ts"]
|
||||
},
|
||||
{
|
||||
"id": "memory-failure-fallback",
|
||||
"title": "Memory failure fallback",
|
||||
"surface": "memory",
|
||||
"objective": "Verify the agent degrades gracefully when memory tools are unavailable and the answer exists only in memory-backed notes.",
|
||||
"successCriteria": [
|
||||
"Memory tools are absent from the effective tool inventory.",
|
||||
"Agent does not hallucinate the hidden fact.",
|
||||
"Agent says it could not confirm and surfaces the limitation."
|
||||
],
|
||||
"docsRefs": ["docs/concepts/memory.md", "docs/tools/index.md"],
|
||||
"codeRefs": ["extensions/memory-core/src/tools.ts", "extensions/qa-lab/src/suite.ts"]
|
||||
},
|
||||
{
|
||||
"id": "model-switch-tool-continuity",
|
||||
"title": "Model switch with tool continuity",
|
||||
"surface": "models",
|
||||
"objective": "Verify switching models preserves session context and tool use instead of dropping into plain-text only behavior.",
|
||||
"successCriteria": [
|
||||
"Alternate model is actually requested.",
|
||||
"A tool call still happens after the model switch.",
|
||||
"Final answer acknowledges the handoff and uses the tool-derived evidence."
|
||||
],
|
||||
"docsRefs": ["docs/help/testing.md", "docs/concepts/model-failover.md"],
|
||||
"codeRefs": ["extensions/qa-lab/src/suite.ts", "extensions/qa-lab/src/mock-openai-server.ts"]
|
||||
},
|
||||
{
|
||||
"id": "mcp-plugin-tools-call",
|
||||
"title": "MCP plugin-tools call",
|
||||
"surface": "mcp",
|
||||
"objective": "Verify OpenClaw can expose plugin tools over MCP and a real MCP client can call one successfully.",
|
||||
"successCriteria": [
|
||||
"Plugin tools MCP server lists memory_search.",
|
||||
"A real MCP client calls memory_search successfully.",
|
||||
"The returned MCP payload includes the expected memory-only fact."
|
||||
],
|
||||
"docsRefs": ["docs/cli/mcp.md", "docs/gateway/protocol.md"],
|
||||
"codeRefs": ["src/mcp/plugin-tools-serve.ts", "extensions/qa-lab/src/suite.ts"]
|
||||
},
|
||||
{
|
||||
"id": "skill-visibility-invocation",
|
||||
"title": "Skill visibility and invocation",
|
||||
"surface": "skills",
|
||||
"objective": "Verify a workspace skill becomes visible in skills.status and influences the next agent turn.",
|
||||
"successCriteria": [
|
||||
"skills.status reports the seeded skill as visible and eligible.",
|
||||
"The next agent turn reflects the skill instruction marker.",
|
||||
"The result stays scoped to the active QA workspace skill."
|
||||
],
|
||||
"docsRefs": ["docs/tools/skills.md", "docs/gateway/protocol.md"],
|
||||
"codeRefs": ["src/agents/skills-status.ts", "extensions/qa-lab/src/suite.ts"]
|
||||
},
|
||||
{
|
||||
"id": "skill-install-hot-availability",
|
||||
"title": "Skill install hot availability",
|
||||
"surface": "skills",
|
||||
"objective": "Verify a newly added workspace skill shows up without a broken intermediate state and can influence the next turn immediately.",
|
||||
"successCriteria": [
|
||||
"Skill is absent before install.",
|
||||
"skills.status reports it after install without a restart.",
|
||||
"The next agent turn reflects the new skill marker."
|
||||
],
|
||||
"docsRefs": ["docs/tools/skills.md", "docs/gateway/configuration.md"],
|
||||
"codeRefs": ["src/agents/skills-status.ts", "extensions/qa-lab/src/suite.ts"]
|
||||
},
|
||||
{
|
||||
"id": "native-image-generation",
|
||||
"title": "Native image generation",
|
||||
"surface": "image-generation",
|
||||
"objective": "Verify image_generate appears when configured and returns a real saved media artifact.",
|
||||
"successCriteria": [
|
||||
"image_generate appears in the effective tool inventory.",
|
||||
"Agent triggers native image_generate.",
|
||||
"Tool output returns a saved MEDIA path and the file exists."
|
||||
],
|
||||
"docsRefs": ["docs/tools/image-generation.md", "docs/providers/openai.md"],
|
||||
"codeRefs": [
|
||||
"src/agents/tools/image-generate-tool.ts",
|
||||
"extensions/qa-lab/src/mock-openai-server.ts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "config-patch-hot-apply",
|
||||
"title": "Config patch hot apply",
|
||||
"surface": "config",
|
||||
"objective": "Verify a hot-reloadable config.patch takes effect immediately without a disruptive restart.",
|
||||
"successCriteria": [
|
||||
"config.patch succeeds with no restart dependency.",
|
||||
"Old mention routing behavior stops working immediately.",
|
||||
"New mention routing behavior works in the same gateway lifetime."
|
||||
],
|
||||
"docsRefs": ["docs/gateway/configuration.md", "docs/gateway/protocol.md"],
|
||||
"codeRefs": ["src/gateway/server-methods/config.ts", "extensions/qa-lab/src/suite.ts"]
|
||||
},
|
||||
{
|
||||
"id": "config-apply-restart-wakeup",
|
||||
"title": "Config apply restart wake-up",
|
||||
"surface": "config",
|
||||
"objective": "Verify a restart-required config.apply restarts cleanly and delivers the post-restart wake message back into the QA channel.",
|
||||
"successCriteria": [
|
||||
"config.apply schedules a restart-required change.",
|
||||
"Gateway becomes healthy again after restart.",
|
||||
"Restart sentinel wake-up message arrives in the QA channel."
|
||||
],
|
||||
"docsRefs": ["docs/gateway/configuration.md", "docs/gateway/protocol.md"],
|
||||
"codeRefs": ["src/gateway/server-methods/config.ts", "src/gateway/server-restart-sentinel.ts"]
|
||||
},
|
||||
{
|
||||
"id": "runtime-inventory-drift-check",
|
||||
"title": "Runtime inventory drift check",
|
||||
"surface": "inventory",
|
||||
"objective": "Verify tools.effective and skills.status stay aligned with runtime behavior after config changes.",
|
||||
"successCriteria": [
|
||||
"Enabled tool appears before the config change.",
|
||||
"After config change, disabled tool disappears from tools.effective.",
|
||||
"Disabled skill appears in skills.status with disabled state."
|
||||
],
|
||||
"docsRefs": ["docs/gateway/protocol.md", "docs/tools/skills.md", "docs/tools/index.md"],
|
||||
"codeRefs": [
|
||||
"src/gateway/server-methods/tools-effective.ts",
|
||||
"src/gateway/server-methods/skills.ts"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
|||
Loading…
Reference in New Issue