test: improve live test progress feedback

This commit is contained in:
Peter Steinberger 2026-03-22 20:57:04 +00:00
parent 2a1acec6a7
commit 1efda3d041
3 changed files with 78 additions and 9 deletions

View File

@ -130,6 +130,10 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
- Prefer running narrowed subsets instead of “everything”
- Live runs will source `~/.profile` to pick up missing API keys
- API key rotation (provider-specific): set `*_API_KEYS` with comma/semicolon format or `*_API_KEY_1`, `*_API_KEY_2` (for example `OPENAI_API_KEYS`, `ANTHROPIC_API_KEYS`, `GEMINI_API_KEYS`) or per-live override via `OPENCLAW_LIVE_*_KEY`; tests retry on rate limit responses.
- Progress/heartbeat output:
- Live suites now emit progress lines to stderr so long provider calls are visibly active even when Vitest console capture is quiet.
- Tune direct-model heartbeats with `OPENCLAW_LIVE_HEARTBEAT_MS`.
- Tune gateway/probe heartbeats with `OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS`.
## Which suite should I run?

View File

@ -19,6 +19,7 @@ import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";
const LIVE = isTruthyEnvValue(process.env.LIVE) || isTruthyEnvValue(process.env.OPENCLAW_LIVE_TEST);
const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim());
const REQUIRE_PROFILE_KEYS = isTruthyEnvValue(process.env.OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS);
const LIVE_HEARTBEAT_MS = Math.max(1_000, toInt(process.env.OPENCLAW_LIVE_HEARTBEAT_MS, 30_000));
const describeLive = LIVE ? describe : describe.skip;
@ -43,7 +44,29 @@ function parseModelFilter(raw?: string): Set<string> | null {
}
function logProgress(message: string): void {
console.log(`[live] ${message}`);
process.stderr.write(`[live] ${message}\n`);
}
function formatElapsedSeconds(ms: number): string {
return `${Math.max(1, Math.round(ms / 1_000))}s`;
}
async function withLiveHeartbeat<T>(operation: Promise<T>, context: string): Promise<T> {
const startedAt = Date.now();
let heartbeatCount = 0;
const timer = setInterval(() => {
heartbeatCount += 1;
logProgress(`${context}: still running (${formatElapsedSeconds(Date.now() - startedAt)})`);
}, LIVE_HEARTBEAT_MS);
timer.unref?.();
try {
return await operation;
} finally {
clearInterval(timer);
if (heartbeatCount > 0) {
logProgress(`${context}: completed after ${formatElapsedSeconds(Date.now() - startedAt)}`);
}
}
}
function formatFailurePreview(
@ -237,6 +260,7 @@ async function completeSimpleWithTimeout<TApi extends Api>(
context: Parameters<typeof completeSimple<TApi>>[1],
options: Parameters<typeof completeSimple<TApi>>[2],
timeoutMs: number,
progressContext: string,
) {
const maxTimeoutMs = Math.max(1, timeoutMs);
const controller = new AbortController();
@ -252,13 +276,16 @@ async function completeSimpleWithTimeout<TApi extends Api>(
hardTimer.unref?.();
});
try {
return await Promise.race([
completeSimple(model, context, {
...options,
signal: controller.signal,
}),
timeout,
]);
return await withLiveHeartbeat(
Promise.race([
completeSimple(model, context, {
...options,
signal: controller.signal,
}),
timeout,
]),
progressContext,
);
} finally {
clearTimeout(abortTimer);
if (hardTimer) {
@ -271,6 +298,7 @@ async function completeOkWithRetry(params: {
model: Model<Api>;
apiKey: string;
timeoutMs: number;
progressLabel: string;
}) {
const runOnce = async (maxTokens: number) => {
const res = await completeSimpleWithTimeout(
@ -291,6 +319,7 @@ async function completeOkWithRetry(params: {
maxTokens,
},
params.timeoutMs,
`${params.progressLabel}: prompt call (maxTokens=${maxTokens})`,
);
const text = res.content
.filter((block) => block.type === "text")
@ -395,6 +424,9 @@ describeLive("live models (profile keys)", () => {
);
}
logProgress(`[live-models] running ${selectedCandidates.length} models`);
logProgress(
`[live-models] heartbeat=${formatElapsedSeconds(LIVE_HEARTBEAT_MS)} timeout=${formatElapsedSeconds(perModelTimeoutMs)}`,
);
const total = selectedCandidates.length;
for (const [index, entry] of selectedCandidates.entries()) {
@ -441,6 +473,7 @@ describeLive("live models (profile keys)", () => {
maxTokens: 128,
},
perModelTimeoutMs,
`${progressLabel}: tool-only regression first call`,
);
let toolCall = first.content.find((b) => b.type === "toolCall");
@ -470,6 +503,7 @@ describeLive("live models (profile keys)", () => {
maxTokens: 128,
},
perModelTimeoutMs,
`${progressLabel}: tool-only regression retry ${i + 1}`,
);
toolCall = first.content.find((b) => b.type === "toolCall");
@ -514,6 +548,7 @@ describeLive("live models (profile keys)", () => {
maxTokens: 256,
},
perModelTimeoutMs,
`${progressLabel}: tool-only regression followup`,
);
const secondText = second.content
@ -530,6 +565,7 @@ describeLive("live models (profile keys)", () => {
model,
apiKey,
timeoutMs: perModelTimeoutMs,
progressLabel,
});
if (ok.res.stopReason === "error") {

View File

@ -55,6 +55,10 @@ const GATEWAY_LIVE_PROBE_TIMEOUT_MS = Math.max(
30_000,
toInt(process.env.OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS, 90_000),
);
const GATEWAY_LIVE_HEARTBEAT_MS = Math.max(
1_000,
toInt(process.env.OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS, 30_000),
);
const GATEWAY_LIVE_MAX_MODELS = resolveGatewayLiveMaxModels();
const GATEWAY_LIVE_SUITE_TIMEOUT_MS = resolveGatewayLiveSuiteTimeoutMs(GATEWAY_LIVE_MAX_MODELS);
@ -108,6 +112,15 @@ function isGatewayLiveProbeTimeout(error: string): boolean {
async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: string): Promise<T> {
let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
const startedAt = Date.now();
let heartbeatCount = 0;
const heartbeat = setInterval(() => {
heartbeatCount += 1;
logProgress(
`${context}: still running (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
);
}, GATEWAY_LIVE_HEARTBEAT_MS);
heartbeat.unref?.();
try {
return await Promise.race([
operation,
@ -118,9 +131,15 @@ async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: st
}),
]);
} finally {
clearInterval(heartbeat);
if (timeoutHandle) {
clearTimeout(timeoutHandle);
}
if (heartbeatCount > 0) {
logProgress(
`${context}: completed after ${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s`,
);
}
}
}
@ -168,7 +187,7 @@ function capByProviderSpread<T>(
}
function logProgress(message: string): void {
console.log(`[live] ${message}`);
process.stderr.write(`[live] ${message}\n`);
}
function enterProductionEnvForLiveRun() {
@ -568,6 +587,7 @@ async function waitForSessionAssistantText(params: {
context: string;
}) {
const startedAt = Date.now();
let lastHeartbeatAt = startedAt;
let delayMs = 50;
while (Date.now() - startedAt < GATEWAY_LIVE_PROBE_TIMEOUT_MS) {
const assistantTexts = readSessionAssistantTexts(params.sessionKey);
@ -580,6 +600,12 @@ async function waitForSessionAssistantText(params: {
return freshText;
}
}
if (Date.now() - lastHeartbeatAt >= GATEWAY_LIVE_HEARTBEAT_MS) {
lastHeartbeatAt = Date.now();
logProgress(
`${params.context}: waiting for transcript (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
);
}
await new Promise((resolve) => setTimeout(resolve, delayMs));
delayMs = Math.min(delayMs * 2, 250);
}
@ -863,6 +889,9 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
logProgress(
`[${params.label}] running ${params.candidates.length} models (thinking=${params.thinkingLevel})`,
);
logProgress(
`[${params.label}] heartbeat=${Math.max(1, Math.round(GATEWAY_LIVE_HEARTBEAT_MS / 1_000))}s probe-timeout=${Math.max(1, Math.round(GATEWAY_LIVE_PROBE_TIMEOUT_MS / 1_000))}s`,
);
const anthropicKeys = collectAnthropicApiKeys();
if (anthropicKeys.length > 0) {
process.env.ANTHROPIC_API_KEY = anthropicKeys[0];