test: improve live test progress feedback

2026-03-22 20:57:04 +00:00 · 2026-03-22 20:57:04 +00:00 · 1efda3d041
parent 2a1acec6a7
commit 1efda3d041
3 changed files with 78 additions and 9 deletions
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@ -130,6 +130,10 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
  - Prefer running narrowed subsets instead of “everything”
  - Live runs will source `~/.profile` to pick up missing API keys
 - API key rotation (provider-specific): set `*_API_KEYS` with comma/semicolon format or `*_API_KEY_1`, `*_API_KEY_2` (for example `OPENAI_API_KEYS`, `ANTHROPIC_API_KEYS`, `GEMINI_API_KEYS`) or per-live override via `OPENCLAW_LIVE_*_KEY`; tests retry on rate limit responses.
+- Progress/heartbeat output:
+  - Live suites now emit progress lines to stderr so long provider calls are visibly active even when Vitest console capture is quiet.
+  - Tune direct-model heartbeats with `OPENCLAW_LIVE_HEARTBEAT_MS`.
+  - Tune gateway/probe heartbeats with `OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS`.

 ## Which suite should I run?

--- a/src/agents/models.profiles.live.test.ts
+++ b/src/agents/models.profiles.live.test.ts
@ -19,6 +19,7 @@ import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";
 const LIVE = isTruthyEnvValue(process.env.LIVE) || isTruthyEnvValue(process.env.OPENCLAW_LIVE_TEST);
 const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim());
 const REQUIRE_PROFILE_KEYS = isTruthyEnvValue(process.env.OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS);
+const LIVE_HEARTBEAT_MS = Math.max(1_000, toInt(process.env.OPENCLAW_LIVE_HEARTBEAT_MS, 30_000));

 const describeLive = LIVE ? describe : describe.skip;

@ -43,7 +44,29 @@ function parseModelFilter(raw?: string): Set<string> | null {
 }

 function logProgress(message: string): void {
-  console.log(`[live] ${message}`);
+  process.stderr.write(`[live] ${message}\n`);
+}
+
+function formatElapsedSeconds(ms: number): string {
+  return `${Math.max(1, Math.round(ms / 1_000))}s`;
+}
+
+async function withLiveHeartbeat<T>(operation: Promise<T>, context: string): Promise<T> {
+  const startedAt = Date.now();
+  let heartbeatCount = 0;
+  const timer = setInterval(() => {
+    heartbeatCount += 1;
+    logProgress(`${context}: still running (${formatElapsedSeconds(Date.now() - startedAt)})`);
+  }, LIVE_HEARTBEAT_MS);
+  timer.unref?.();
+  try {
+    return await operation;
+  } finally {
+    clearInterval(timer);
+    if (heartbeatCount > 0) {
+      logProgress(`${context}: completed after ${formatElapsedSeconds(Date.now() - startedAt)}`);
+    }
+  }
 }

 function formatFailurePreview(
@ -237,6 +260,7 @@ async function completeSimpleWithTimeout<TApi extends Api>(
  context: Parameters<typeof completeSimple<TApi>>[1],
  options: Parameters<typeof completeSimple<TApi>>[2],
  timeoutMs: number,
+  progressContext: string,
 ) {
  const maxTimeoutMs = Math.max(1, timeoutMs);
  const controller = new AbortController();
@ -252,13 +276,16 @@ async function completeSimpleWithTimeout<TApi extends Api>(
    hardTimer.unref?.();
  });
  try {
-    return await Promise.race([
-      completeSimple(model, context, {
-        ...options,
-        signal: controller.signal,
-      }),
-      timeout,
-    ]);
+    return await withLiveHeartbeat(
+      Promise.race([
+        completeSimple(model, context, {
+          ...options,
+          signal: controller.signal,
+        }),
+        timeout,
+      ]),
+      progressContext,
+    );
  } finally {
    clearTimeout(abortTimer);
    if (hardTimer) {
@ -271,6 +298,7 @@ async function completeOkWithRetry(params: {
  model: Model<Api>;
  apiKey: string;
  timeoutMs: number;
+  progressLabel: string;
 }) {
  const runOnce = async (maxTokens: number) => {
    const res = await completeSimpleWithTimeout(
@ -291,6 +319,7 @@ async function completeOkWithRetry(params: {
        maxTokens,
      },
      params.timeoutMs,
+      `${params.progressLabel}: prompt call (maxTokens=${maxTokens})`,
    );
    const text = res.content
      .filter((block) => block.type === "text")
@ -395,6 +424,9 @@ describeLive("live models (profile keys)", () => {
        );
      }
      logProgress(`[live-models] running ${selectedCandidates.length} models`);
+      logProgress(
+        `[live-models] heartbeat=${formatElapsedSeconds(LIVE_HEARTBEAT_MS)} timeout=${formatElapsedSeconds(perModelTimeoutMs)}`,
+      );
      const total = selectedCandidates.length;

      for (const [index, entry] of selectedCandidates.entries()) {
@ -441,6 +473,7 @@ describeLive("live models (profile keys)", () => {
                  maxTokens: 128,
                },
                perModelTimeoutMs,
+                `${progressLabel}: tool-only regression first call`,
              );

              let toolCall = first.content.find((b) => b.type === "toolCall");
@ -470,6 +503,7 @@ describeLive("live models (profile keys)", () => {
                    maxTokens: 128,
                  },
                  perModelTimeoutMs,
+                  `${progressLabel}: tool-only regression retry ${i + 1}`,
                );

                toolCall = first.content.find((b) => b.type === "toolCall");
@ -514,6 +548,7 @@ describeLive("live models (profile keys)", () => {
                  maxTokens: 256,
                },
                perModelTimeoutMs,
+                `${progressLabel}: tool-only regression followup`,
              );

              const secondText = second.content
@ -530,6 +565,7 @@ describeLive("live models (profile keys)", () => {
              model,
              apiKey,
              timeoutMs: perModelTimeoutMs,
+              progressLabel,
            });

            if (ok.res.stopReason === "error") {
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@ -55,6 +55,10 @@ const GATEWAY_LIVE_PROBE_TIMEOUT_MS = Math.max(
  30_000,
  toInt(process.env.OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS, 90_000),
 );
+const GATEWAY_LIVE_HEARTBEAT_MS = Math.max(
+  1_000,
+  toInt(process.env.OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS, 30_000),
+);
 const GATEWAY_LIVE_MAX_MODELS = resolveGatewayLiveMaxModels();
 const GATEWAY_LIVE_SUITE_TIMEOUT_MS = resolveGatewayLiveSuiteTimeoutMs(GATEWAY_LIVE_MAX_MODELS);

@ -108,6 +112,15 @@ function isGatewayLiveProbeTimeout(error: string): boolean {

 async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: string): Promise<T> {
  let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
+  const startedAt = Date.now();
+  let heartbeatCount = 0;
+  const heartbeat = setInterval(() => {
+    heartbeatCount += 1;
+    logProgress(
+      `${context}: still running (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
+    );
+  }, GATEWAY_LIVE_HEARTBEAT_MS);
+  heartbeat.unref?.();
  try {
    return await Promise.race([
      operation,
@ -118,9 +131,15 @@ async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: st
      }),
    ]);
  } finally {
+    clearInterval(heartbeat);
    if (timeoutHandle) {
      clearTimeout(timeoutHandle);
    }
+    if (heartbeatCount > 0) {
+      logProgress(
+        `${context}: completed after ${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s`,
+      );
+    }
  }
 }

@ -168,7 +187,7 @@ function capByProviderSpread<T>(
 }

 function logProgress(message: string): void {
-  console.log(`[live] ${message}`);
+  process.stderr.write(`[live] ${message}\n`);
 }

 function enterProductionEnvForLiveRun() {
@ -568,6 +587,7 @@ async function waitForSessionAssistantText(params: {
  context: string;
 }) {
  const startedAt = Date.now();
+  let lastHeartbeatAt = startedAt;
  let delayMs = 50;
  while (Date.now() - startedAt < GATEWAY_LIVE_PROBE_TIMEOUT_MS) {
    const assistantTexts = readSessionAssistantTexts(params.sessionKey);
@ -580,6 +600,12 @@ async function waitForSessionAssistantText(params: {
        return freshText;
      }
    }
+    if (Date.now() - lastHeartbeatAt >= GATEWAY_LIVE_HEARTBEAT_MS) {
+      lastHeartbeatAt = Date.now();
+      logProgress(
+        `${params.context}: waiting for transcript (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
+      );
+    }
    await new Promise((resolve) => setTimeout(resolve, delayMs));
    delayMs = Math.min(delayMs * 2, 250);
  }
@ -863,6 +889,9 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
    logProgress(
      `[${params.label}] running ${params.candidates.length} models (thinking=${params.thinkingLevel})`,
    );
+    logProgress(
+      `[${params.label}] heartbeat=${Math.max(1, Math.round(GATEWAY_LIVE_HEARTBEAT_MS / 1_000))}s probe-timeout=${Math.max(1, Math.round(GATEWAY_LIVE_PROBE_TIMEOUT_MS / 1_000))}s`,
+    );
    const anthropicKeys = collectAnthropicApiKeys();
    if (anthropicKeys.length > 0) {
      process.env.ANTHROPIC_API_KEY = anthropicKeys[0];