mirror of https://github.com/openclaw/openclaw.git
test: improve live test progress feedback
This commit is contained in:
parent
2a1acec6a7
commit
1efda3d041
|
|
@ -130,6 +130,10 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
|
|||
- Prefer running narrowed subsets instead of “everything”
|
||||
- Live runs will source `~/.profile` to pick up missing API keys
|
||||
- API key rotation (provider-specific): set `*_API_KEYS` with comma/semicolon format or `*_API_KEY_1`, `*_API_KEY_2` (for example `OPENAI_API_KEYS`, `ANTHROPIC_API_KEYS`, `GEMINI_API_KEYS`) or per-live override via `OPENCLAW_LIVE_*_KEY`; tests retry on rate limit responses.
|
||||
- Progress/heartbeat output:
|
||||
- Live suites now emit progress lines to stderr so long provider calls are visibly active even when Vitest console capture is quiet.
|
||||
- Tune direct-model heartbeats with `OPENCLAW_LIVE_HEARTBEAT_MS`.
|
||||
- Tune gateway/probe heartbeats with `OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS`.
|
||||
|
||||
## Which suite should I run?
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";
|
|||
const LIVE = isTruthyEnvValue(process.env.LIVE) || isTruthyEnvValue(process.env.OPENCLAW_LIVE_TEST);
|
||||
const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim());
|
||||
const REQUIRE_PROFILE_KEYS = isTruthyEnvValue(process.env.OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS);
|
||||
const LIVE_HEARTBEAT_MS = Math.max(1_000, toInt(process.env.OPENCLAW_LIVE_HEARTBEAT_MS, 30_000));
|
||||
|
||||
const describeLive = LIVE ? describe : describe.skip;
|
||||
|
||||
|
|
@ -43,7 +44,29 @@ function parseModelFilter(raw?: string): Set<string> | null {
|
|||
}
|
||||
|
||||
function logProgress(message: string): void {
|
||||
console.log(`[live] ${message}`);
|
||||
process.stderr.write(`[live] ${message}\n`);
|
||||
}
|
||||
|
||||
function formatElapsedSeconds(ms: number): string {
|
||||
return `${Math.max(1, Math.round(ms / 1_000))}s`;
|
||||
}
|
||||
|
||||
async function withLiveHeartbeat<T>(operation: Promise<T>, context: string): Promise<T> {
|
||||
const startedAt = Date.now();
|
||||
let heartbeatCount = 0;
|
||||
const timer = setInterval(() => {
|
||||
heartbeatCount += 1;
|
||||
logProgress(`${context}: still running (${formatElapsedSeconds(Date.now() - startedAt)})`);
|
||||
}, LIVE_HEARTBEAT_MS);
|
||||
timer.unref?.();
|
||||
try {
|
||||
return await operation;
|
||||
} finally {
|
||||
clearInterval(timer);
|
||||
if (heartbeatCount > 0) {
|
||||
logProgress(`${context}: completed after ${formatElapsedSeconds(Date.now() - startedAt)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function formatFailurePreview(
|
||||
|
|
@ -237,6 +260,7 @@ async function completeSimpleWithTimeout<TApi extends Api>(
|
|||
context: Parameters<typeof completeSimple<TApi>>[1],
|
||||
options: Parameters<typeof completeSimple<TApi>>[2],
|
||||
timeoutMs: number,
|
||||
progressContext: string,
|
||||
) {
|
||||
const maxTimeoutMs = Math.max(1, timeoutMs);
|
||||
const controller = new AbortController();
|
||||
|
|
@ -252,13 +276,16 @@ async function completeSimpleWithTimeout<TApi extends Api>(
|
|||
hardTimer.unref?.();
|
||||
});
|
||||
try {
|
||||
return await Promise.race([
|
||||
completeSimple(model, context, {
|
||||
...options,
|
||||
signal: controller.signal,
|
||||
}),
|
||||
timeout,
|
||||
]);
|
||||
return await withLiveHeartbeat(
|
||||
Promise.race([
|
||||
completeSimple(model, context, {
|
||||
...options,
|
||||
signal: controller.signal,
|
||||
}),
|
||||
timeout,
|
||||
]),
|
||||
progressContext,
|
||||
);
|
||||
} finally {
|
||||
clearTimeout(abortTimer);
|
||||
if (hardTimer) {
|
||||
|
|
@ -271,6 +298,7 @@ async function completeOkWithRetry(params: {
|
|||
model: Model<Api>;
|
||||
apiKey: string;
|
||||
timeoutMs: number;
|
||||
progressLabel: string;
|
||||
}) {
|
||||
const runOnce = async (maxTokens: number) => {
|
||||
const res = await completeSimpleWithTimeout(
|
||||
|
|
@ -291,6 +319,7 @@ async function completeOkWithRetry(params: {
|
|||
maxTokens,
|
||||
},
|
||||
params.timeoutMs,
|
||||
`${params.progressLabel}: prompt call (maxTokens=${maxTokens})`,
|
||||
);
|
||||
const text = res.content
|
||||
.filter((block) => block.type === "text")
|
||||
|
|
@ -395,6 +424,9 @@ describeLive("live models (profile keys)", () => {
|
|||
);
|
||||
}
|
||||
logProgress(`[live-models] running ${selectedCandidates.length} models`);
|
||||
logProgress(
|
||||
`[live-models] heartbeat=${formatElapsedSeconds(LIVE_HEARTBEAT_MS)} timeout=${formatElapsedSeconds(perModelTimeoutMs)}`,
|
||||
);
|
||||
const total = selectedCandidates.length;
|
||||
|
||||
for (const [index, entry] of selectedCandidates.entries()) {
|
||||
|
|
@ -441,6 +473,7 @@ describeLive("live models (profile keys)", () => {
|
|||
maxTokens: 128,
|
||||
},
|
||||
perModelTimeoutMs,
|
||||
`${progressLabel}: tool-only regression first call`,
|
||||
);
|
||||
|
||||
let toolCall = first.content.find((b) => b.type === "toolCall");
|
||||
|
|
@ -470,6 +503,7 @@ describeLive("live models (profile keys)", () => {
|
|||
maxTokens: 128,
|
||||
},
|
||||
perModelTimeoutMs,
|
||||
`${progressLabel}: tool-only regression retry ${i + 1}`,
|
||||
);
|
||||
|
||||
toolCall = first.content.find((b) => b.type === "toolCall");
|
||||
|
|
@ -514,6 +548,7 @@ describeLive("live models (profile keys)", () => {
|
|||
maxTokens: 256,
|
||||
},
|
||||
perModelTimeoutMs,
|
||||
`${progressLabel}: tool-only regression followup`,
|
||||
);
|
||||
|
||||
const secondText = second.content
|
||||
|
|
@ -530,6 +565,7 @@ describeLive("live models (profile keys)", () => {
|
|||
model,
|
||||
apiKey,
|
||||
timeoutMs: perModelTimeoutMs,
|
||||
progressLabel,
|
||||
});
|
||||
|
||||
if (ok.res.stopReason === "error") {
|
||||
|
|
|
|||
|
|
@ -55,6 +55,10 @@ const GATEWAY_LIVE_PROBE_TIMEOUT_MS = Math.max(
|
|||
30_000,
|
||||
toInt(process.env.OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS, 90_000),
|
||||
);
|
||||
const GATEWAY_LIVE_HEARTBEAT_MS = Math.max(
|
||||
1_000,
|
||||
toInt(process.env.OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS, 30_000),
|
||||
);
|
||||
const GATEWAY_LIVE_MAX_MODELS = resolveGatewayLiveMaxModels();
|
||||
const GATEWAY_LIVE_SUITE_TIMEOUT_MS = resolveGatewayLiveSuiteTimeoutMs(GATEWAY_LIVE_MAX_MODELS);
|
||||
|
||||
|
|
@ -108,6 +112,15 @@ function isGatewayLiveProbeTimeout(error: string): boolean {
|
|||
|
||||
async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: string): Promise<T> {
|
||||
let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
|
||||
const startedAt = Date.now();
|
||||
let heartbeatCount = 0;
|
||||
const heartbeat = setInterval(() => {
|
||||
heartbeatCount += 1;
|
||||
logProgress(
|
||||
`${context}: still running (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
|
||||
);
|
||||
}, GATEWAY_LIVE_HEARTBEAT_MS);
|
||||
heartbeat.unref?.();
|
||||
try {
|
||||
return await Promise.race([
|
||||
operation,
|
||||
|
|
@ -118,9 +131,15 @@ async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: st
|
|||
}),
|
||||
]);
|
||||
} finally {
|
||||
clearInterval(heartbeat);
|
||||
if (timeoutHandle) {
|
||||
clearTimeout(timeoutHandle);
|
||||
}
|
||||
if (heartbeatCount > 0) {
|
||||
logProgress(
|
||||
`${context}: completed after ${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -168,7 +187,7 @@ function capByProviderSpread<T>(
|
|||
}
|
||||
|
||||
function logProgress(message: string): void {
|
||||
console.log(`[live] ${message}`);
|
||||
process.stderr.write(`[live] ${message}\n`);
|
||||
}
|
||||
|
||||
function enterProductionEnvForLiveRun() {
|
||||
|
|
@ -568,6 +587,7 @@ async function waitForSessionAssistantText(params: {
|
|||
context: string;
|
||||
}) {
|
||||
const startedAt = Date.now();
|
||||
let lastHeartbeatAt = startedAt;
|
||||
let delayMs = 50;
|
||||
while (Date.now() - startedAt < GATEWAY_LIVE_PROBE_TIMEOUT_MS) {
|
||||
const assistantTexts = readSessionAssistantTexts(params.sessionKey);
|
||||
|
|
@ -580,6 +600,12 @@ async function waitForSessionAssistantText(params: {
|
|||
return freshText;
|
||||
}
|
||||
}
|
||||
if (Date.now() - lastHeartbeatAt >= GATEWAY_LIVE_HEARTBEAT_MS) {
|
||||
lastHeartbeatAt = Date.now();
|
||||
logProgress(
|
||||
`${params.context}: waiting for transcript (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
|
||||
);
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
delayMs = Math.min(delayMs * 2, 250);
|
||||
}
|
||||
|
|
@ -863,6 +889,9 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
|||
logProgress(
|
||||
`[${params.label}] running ${params.candidates.length} models (thinking=${params.thinkingLevel})`,
|
||||
);
|
||||
logProgress(
|
||||
`[${params.label}] heartbeat=${Math.max(1, Math.round(GATEWAY_LIVE_HEARTBEAT_MS / 1_000))}s probe-timeout=${Math.max(1, Math.round(GATEWAY_LIVE_PROBE_TIMEOUT_MS / 1_000))}s`,
|
||||
);
|
||||
const anthropicKeys = collectAnthropicApiKeys();
|
||||
if (anthropicKeys.length > 0) {
|
||||
process.env.ANTHROPIC_API_KEY = anthropicKeys[0];
|
||||
|
|
|
|||
Loading…
Reference in New Issue