diff --git a/.github/workflows/openclaw-npm-release.yml b/.github/workflows/openclaw-npm-release.yml index 5a88685d776..c4122661c90 100644 --- a/.github/workflows/openclaw-npm-release.yml +++ b/.github/workflows/openclaw-npm-release.yml @@ -129,6 +129,31 @@ jobs: - name: Verify release contents run: pnpm release:check + - name: Validate live cache credentials + if: ${{ github.ref == 'refs/heads/main' }} + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + set -euo pipefail + if [[ -z "${OPENAI_API_KEY}" ]]; then + echo "Missing OPENAI_API_KEY secret for release live cache validation." >&2 + exit 1 + fi + if [[ -z "${ANTHROPIC_API_KEY}" ]]; then + echo "Missing ANTHROPIC_API_KEY secret for release live cache validation." >&2 + exit 1 + fi + + - name: Verify live prompt cache floors + if: ${{ github.ref == 'refs/heads/main' }} + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENCLAW_LIVE_CACHE_TEST: "1" + OPENCLAW_LIVE_TEST: "1" + run: pnpm test:live:cache + - name: Pack prepared npm tarball id: packed_tarball env: diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index fa1f31a9f58..68aea25f529 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -41,6 +41,10 @@ OpenClaw has three public release lanes: `dist/*` release artifacts and Control UI bundle exist for the pack validation step - Run `pnpm release:check` before every tagged release +- Main-branch npm preflight also runs + `OPENCLAW_LIVE_TEST=1 OPENCLAW_LIVE_CACHE_TEST=1 pnpm test:live:cache` + before packaging the tarball, using both `OPENAI_API_KEY` and + `ANTHROPIC_API_KEY` workflow secrets - Run `RELEASE_TAG=vYYYY.M.D node --import tsx scripts/openclaw-npm-release-check.ts` (or the matching beta/correction tag) before approval - After npm publish, run diff --git a/docs/reference/prompt-caching.md b/docs/reference/prompt-caching.md index 4f97a4dd40a..09ac3cbd4ce 100644 --- a/docs/reference/prompt-caching.md +++ b/docs/reference/prompt-caching.md @@ -157,10 +157,19 @@ OpenClaw exposes dedicated cache-trace diagnostics for embedded agent runs. ## Live regression tests -OpenClaw keeps provider-specific live cache probes for repeated prefixes, tool turns, image turns, and MCP-style tool transcripts. +OpenClaw keeps one combined live cache regression gate for repeated prefixes, tool turns, image turns, MCP-style tool transcripts, and an Anthropic no-cache control. -- `src/agents/pi-embedded-runner.cache.live.test.ts` -- `src/agents/pi-mcp-style.cache.live.test.ts` +- `src/agents/live-cache-regression.live.test.ts` +- `src/agents/live-cache-regression-baseline.ts` + +Run the narrow live gate with: + +```sh +OPENCLAW_LIVE_TEST=1 OPENCLAW_LIVE_CACHE_TEST=1 pnpm test:live:cache +``` + +The baseline file stores the most recent observed live numbers plus the provider-specific regression floors used by the test. +The runner also uses fresh per-run session IDs and prompt namespaces so previous cache state does not pollute the current regression sample. These tests intentionally do not use identical success criteria across providers. @@ -180,12 +189,14 @@ These tests intentionally do not use identical success criteria across providers - image transcript: `cacheRead >= 3840`, hit rate `>= 0.82` - MCP-style transcript: `cacheRead >= 4096`, hit rate `>= 0.85` -Fresh OpenAI verification on 2026-04-04 landed at: +Fresh combined live verification on 2026-04-04 landed at: -- stable prefix: `cacheRead=4864`, hit rate `0.971` -- tool transcript: `cacheRead=4608`, hit rate `0.900` -- image transcript: `cacheRead=4864`, hit rate `0.959` -- MCP-style transcript: `cacheRead=4608`, hit rate `0.895` +- stable prefix: `cacheRead=4864`, hit rate `0.966` +- tool transcript: `cacheRead=4608`, hit rate `0.896` +- image transcript: `cacheRead=4864`, hit rate `0.954` +- MCP-style transcript: `cacheRead=4608`, hit rate `0.891` + +Recent local wall-clock time for the combined gate was about `88s`. Why the assertions differ: diff --git a/package.json b/package.json index 3f1f6119cf8..d02b99974bb 100644 --- a/package.json +++ b/package.json @@ -1149,6 +1149,7 @@ "test:install:e2e:openai": "OPENCLAW_E2E_MODELS=openai bash scripts/test-install-sh-e2e-docker.sh", "test:install:smoke": "bash scripts/test-install-sh-docker.sh", "test:live": "node scripts/test-live.mjs", + "test:live:cache": "bun scripts/check-live-cache.ts", "test:live:gateway-profiles": "node scripts/test-live.mjs -- src/gateway/gateway-models.profiles.live.test.ts", "test:live:models-profiles": "node scripts/test-live.mjs -- src/agents/models.profiles.live.test.ts", "test:max": "OPENCLAW_VITEST_MAX_WORKERS=8 vitest run --config vitest.config.ts", diff --git a/scripts/check-live-cache.ts b/scripts/check-live-cache.ts new file mode 100644 index 00000000000..e83a8cc3246 --- /dev/null +++ b/scripts/check-live-cache.ts @@ -0,0 +1,18 @@ +import { runLiveCacheRegression } from "../src/agents/live-cache-regression-runner.js"; +import { LIVE_CACHE_TEST_ENABLED, logLiveCache } from "../src/agents/live-cache-test-support.js"; + +if (!LIVE_CACHE_TEST_ENABLED) { + logLiveCache("skipped; set OPENCLAW_LIVE_TEST=1 and OPENCLAW_LIVE_CACHE_TEST=1"); + process.exit(0); +} + +const result = await runLiveCacheRegression(); +if (result.regressions.length > 0) { + process.stderr.write("\n[live-cache] regressions detected:\n"); + for (const regression of result.regressions) { + process.stderr.write(`- ${regression}\n`); + } + process.exitCode = 1; +} else { + process.stderr.write("\n[live-cache] all regression floors satisfied\n"); +} diff --git a/src/agents/live-cache-regression-baseline.ts b/src/agents/live-cache-regression-baseline.ts new file mode 100644 index 00000000000..a8d628272b4 --- /dev/null +++ b/src/agents/live-cache-regression-baseline.ts @@ -0,0 +1,79 @@ +export type LiveCacheFloor = { + observedCacheRead?: number; + observedCacheWrite?: number; + observedHitRate?: number; + minCacheRead?: number; + minCacheWrite?: number; + minHitRate?: number; + maxCacheRead?: number; + maxCacheWrite?: number; +}; + +export const LIVE_CACHE_REGRESSION_BASELINE = { + anthropic: { + disabled: { + observedCacheRead: 0, + observedCacheWrite: 0, + maxCacheRead: 32, + maxCacheWrite: 32, + }, + image: { + observedCacheRead: 5_660, + observedCacheWrite: 85, + observedHitRate: 0.985, + minCacheRead: 4_500, + minCacheWrite: 1, + minHitRate: 0.97, + }, + mcp: { + observedCacheRead: 6_240, + observedCacheWrite: 113, + observedHitRate: 0.982, + minCacheRead: 5_800, + minCacheWrite: 1, + minHitRate: 0.97, + }, + stable: { + observedCacheRead: 5_660, + observedCacheWrite: 18, + observedHitRate: 0.996, + minCacheRead: 5_400, + minCacheWrite: 1, + minHitRate: 0.97, + }, + tool: { + observedCacheRead: 6_223, + observedCacheWrite: 97, + observedHitRate: 0.984, + minCacheRead: 5_000, + minCacheWrite: 1, + minHitRate: 0.97, + }, + }, + openai: { + image: { + observedCacheRead: 4_864, + observedHitRate: 0.954, + minCacheRead: 3_840, + minHitRate: 0.82, + }, + mcp: { + observedCacheRead: 4_608, + observedHitRate: 0.891, + minCacheRead: 4_096, + minHitRate: 0.85, + }, + stable: { + observedCacheRead: 4_864, + observedHitRate: 0.966, + minCacheRead: 4_608, + minHitRate: 0.9, + }, + tool: { + observedCacheRead: 4_608, + observedHitRate: 0.896, + minCacheRead: 4_096, + minHitRate: 0.85, + }, + }, +} as const satisfies Record>; diff --git a/src/agents/live-cache-regression-runner.ts b/src/agents/live-cache-regression-runner.ts new file mode 100644 index 00000000000..3ef1083a0bb --- /dev/null +++ b/src/agents/live-cache-regression-runner.ts @@ -0,0 +1,472 @@ +import fs from "node:fs/promises"; +import type { AssistantMessage, Message, Tool } from "@mariozechner/pi-ai"; +import { Type } from "@sinclair/typebox"; +import { LIVE_CACHE_REGRESSION_BASELINE } from "./live-cache-regression-baseline.js"; +import { + buildAssistantHistoryTurn, + buildStableCachePrefix, + completeSimpleWithLiveTimeout, + computeCacheHitRate, + extractAssistantText, + logLiveCache, + resolveLiveDirectModel, +} from "./live-cache-test-support.js"; + +const OPENAI_TIMEOUT_MS = 120_000; +const ANTHROPIC_TIMEOUT_MS = 120_000; +const OPENAI_PREFIX = buildStableCachePrefix("openai"); +const OPENAI_MCP_PREFIX = buildStableCachePrefix("openai-mcp-style"); +const ANTHROPIC_PREFIX = buildStableCachePrefix("anthropic"); +const LIVE_TEST_PNG_URL = new URL( + "../../apps/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png", + import.meta.url, +); + +type LiveResolvedModel = Awaited>; +type ProviderKey = keyof typeof LIVE_CACHE_REGRESSION_BASELINE; +type CacheLane = "image" | "mcp" | "stable" | "tool"; +type CacheRun = { + hitRate: number; + suffix: string; + text: string; + usage: AssistantMessage["usage"]; +}; +type LaneResult = { + best?: CacheRun; + disabled?: CacheRun; + warmup?: CacheRun; +}; + +export type LiveCacheRegressionResult = { + regressions: string[]; + summary: Record>; +}; + +const NOOP_TOOL: Tool = { + name: "noop", + description: "Return ok.", + parameters: Type.Object({}, { additionalProperties: false }), +}; + +const MCP_TOOL: Tool = { + name: "bundleProbe__bundle_probe", + description: "Return bundle MCP probe text.", + parameters: Type.Object({}, { additionalProperties: false }), +}; + +function makeUserTurn(content: Extract["content"]): Message { + return { + role: "user", + content, + timestamp: Date.now(), + }; +} + +function makeImageUserTurn(text: string, pngBase64: string): Message { + return makeUserTurn([ + { type: "text", text }, + { type: "image", mimeType: "image/png", data: pngBase64 }, + ]); +} + +function makeToolResultMessage( + toolCallId: string, + toolName: string, + text: string, +): Extract { + return { + role: "toolResult", + toolCallId, + toolName, + content: [{ type: "text", text }], + isError: false, + timestamp: Date.now(), + }; +} + +function extractFirstToolCall(message: AssistantMessage) { + return message.content.find((block) => block.type === "toolCall"); +} + +function assert(condition: unknown, message: string): asserts condition { + if (!condition) { + throw new Error(message); + } +} + +async function runToolOnlyTurn(params: { + apiKey: string; + cacheRetention: "none" | "short" | "long"; + model: LiveResolvedModel["model"]; + providerTag: "anthropic" | "openai"; + sessionId: string; + systemPrompt: string; + tool: Tool; +}) { + const timeoutMs = params.providerTag === "openai" ? OPENAI_TIMEOUT_MS : ANTHROPIC_TIMEOUT_MS; + const options = { + apiKey: params.apiKey, + cacheRetention: params.cacheRetention, + sessionId: params.sessionId, + maxTokens: 128, + temperature: 0, + ...(params.providerTag === "openai" ? { reasoning: "none" as unknown as never } : {}), + }; + let prompt = `Call the tool \`${params.tool.name}\` with {}. IMPORTANT: respond ONLY with the tool call and no other text.`; + let response = await completeSimpleWithLiveTimeout( + params.model, + { + systemPrompt: params.systemPrompt, + messages: [makeUserTurn(prompt)], + tools: [params.tool], + }, + options, + `${params.providerTag} ${params.tool.name} tool-only turn`, + timeoutMs, + ); + + let toolCall = extractFirstToolCall(response); + let text = extractAssistantText(response); + for (let attempt = 0; attempt < 2 && (!toolCall || text.length > 0); attempt += 1) { + prompt = `Return only a tool call for \`${params.tool.name}\` with {}. No text.`; + response = await completeSimpleWithLiveTimeout( + params.model, + { + systemPrompt: params.systemPrompt, + messages: [makeUserTurn(prompt)], + tools: [params.tool], + }, + options, + `${params.providerTag} ${params.tool.name} tool-only retry ${attempt + 1}`, + timeoutMs, + ); + toolCall = extractFirstToolCall(response); + text = extractAssistantText(response); + } + + assert(toolCall, `expected tool call for ${params.tool.name}`); + assert( + text.length === 0, + `expected tool-only response for ${params.tool.name}, got ${JSON.stringify(text)}`, + ); + assert(toolCall.type === "toolCall", `expected toolCall block for ${params.tool.name}`); + + return { + prompt, + response, + toolCall, + }; +} + +async function completeCacheProbe(params: { + apiKey: string; + cacheRetention: "none" | "short" | "long"; + messages: Message[]; + model: LiveResolvedModel["model"]; + providerTag: "anthropic" | "openai"; + sessionId: string; + suffix: string; + systemPrompt: string; + tools?: Tool[]; + maxTokens?: number; +}): Promise { + const timeoutMs = params.providerTag === "openai" ? OPENAI_TIMEOUT_MS : ANTHROPIC_TIMEOUT_MS; + const response = await completeSimpleWithLiveTimeout( + params.model, + { + systemPrompt: params.systemPrompt, + messages: params.messages, + ...(params.tools ? { tools: params.tools } : {}), + }, + { + apiKey: params.apiKey, + cacheRetention: params.cacheRetention, + sessionId: params.sessionId, + maxTokens: params.maxTokens ?? 64, + temperature: 0, + ...(params.providerTag === "openai" ? { reasoning: "none" as unknown as never } : {}), + }, + `${params.providerTag} cache lane ${params.suffix}`, + timeoutMs, + ); + const text = extractAssistantText(response); + assert( + text.toLowerCase().includes(params.suffix.toLowerCase()), + `expected response to contain ${params.suffix}, got ${JSON.stringify(text)}`, + ); + return { + suffix: params.suffix, + text, + usage: response.usage, + hitRate: computeCacheHitRate(response.usage), + }; +} + +async function runRepeatedLane(params: { + lane: CacheLane; + providerTag: "anthropic" | "openai"; + fixture: LiveResolvedModel; + runToken: string; + sessionId: string; + pngBase64: string; +}): Promise { + const suffixBase = `${params.providerTag}-${params.lane}`; + const systemPromptBase = + params.providerTag === "openai" + ? params.lane === "mcp" + ? OPENAI_MCP_PREFIX + : OPENAI_PREFIX + : ANTHROPIC_PREFIX; + const systemPrompt = `${systemPromptBase}\nRun token: ${params.runToken}\nLane: ${params.providerTag}-${params.lane}\n`; + + const run = + params.lane === "stable" + ? (suffix: string) => + completeCacheProbe({ + apiKey: params.fixture.apiKey, + cacheRetention: "short", + messages: [makeUserTurn(`Reply with exactly CACHE-OK ${suffix}.`)], + model: params.fixture.model, + providerTag: params.providerTag, + sessionId: params.sessionId, + suffix, + systemPrompt, + maxTokens: 32, + }) + : params.lane === "image" + ? (suffix: string) => + completeCacheProbe({ + apiKey: params.fixture.apiKey, + cacheRetention: "short", + messages: [ + makeImageUserTurn( + "An image is attached. Ignore image semantics but keep the bytes in history.", + params.pngBase64, + ), + buildAssistantHistoryTurn("IMAGE HISTORY ACKNOWLEDGED", params.fixture.model), + makeUserTurn("Keep the earlier image turn stable in context."), + buildAssistantHistoryTurn("IMAGE HISTORY PRESERVED", params.fixture.model), + makeUserTurn(`Reply with exactly CACHE-OK ${suffix}.`), + ], + model: params.fixture.model, + providerTag: params.providerTag, + sessionId: params.sessionId, + suffix, + systemPrompt, + }) + : async (suffix: string) => { + const tool = params.lane === "mcp" ? MCP_TOOL : NOOP_TOOL; + const toolText = params.lane === "mcp" ? "FROM-BUNDLE" : "ok"; + const historyPrefix = params.lane === "mcp" ? "MCP TOOL HISTORY" : "TOOL HISTORY"; + const toolTurn = await runToolOnlyTurn({ + apiKey: params.fixture.apiKey, + cacheRetention: "short", + model: params.fixture.model, + providerTag: params.providerTag, + sessionId: params.sessionId, + systemPrompt, + tool, + }); + return await completeCacheProbe({ + apiKey: params.fixture.apiKey, + cacheRetention: "short", + messages: [ + makeUserTurn(toolTurn.prompt), + toolTurn.response, + makeToolResultMessage(toolTurn.toolCall.id, tool.name, toolText), + buildAssistantHistoryTurn(`${historyPrefix} ACKNOWLEDGED`, params.fixture.model), + makeUserTurn( + params.lane === "mcp" + ? "Keep the MCP tool output stable in history." + : "Keep the tool output stable in history.", + ), + buildAssistantHistoryTurn(`${historyPrefix} PRESERVED`, params.fixture.model), + makeUserTurn(`Reply with exactly CACHE-OK ${suffix}.`), + ], + model: params.fixture.model, + providerTag: params.providerTag, + sessionId: params.sessionId, + suffix, + systemPrompt, + tools: [tool], + }); + }; + + const warmup = await run(`${suffixBase}-warmup`); + const hitA = await run(`${suffixBase}-hit-a`); + const hitB = await run(`${suffixBase}-hit-b`); + const best = (hitA.usage.cacheRead ?? 0) >= (hitB.usage.cacheRead ?? 0) ? hitA : hitB; + return { best, warmup }; +} + +async function runAnthropicDisabledLane(params: { + fixture: LiveResolvedModel; + runToken: string; + sessionId: string; +}): Promise { + const disabled = await completeCacheProbe({ + apiKey: params.fixture.apiKey, + cacheRetention: "none", + messages: [makeUserTurn("Reply with exactly CACHE-OK anthropic-disabled.")], + model: params.fixture.model, + providerTag: "anthropic", + sessionId: params.sessionId, + suffix: "anthropic-disabled", + systemPrompt: `${ANTHROPIC_PREFIX}\nRun token: ${params.runToken}\nLane: anthropic-disabled\n`, + maxTokens: 32, + }); + return { disabled }; +} + +function formatUsage(usage: AssistantMessage["usage"]) { + return `cacheRead=${usage.cacheRead ?? 0} cacheWrite=${usage.cacheWrite ?? 0} input=${usage.input ?? 0}`; +} + +function assertAgainstBaseline(params: { + lane: string; + provider: ProviderKey; + result: LaneResult; + regressions: string[]; +}) { + const floor = + LIVE_CACHE_REGRESSION_BASELINE[params.provider][ + params.lane as keyof (typeof LIVE_CACHE_REGRESSION_BASELINE)[typeof params.provider] + ]; + if (!floor) { + params.regressions.push(`${params.provider}:${params.lane} missing baseline entry`); + return; + } + + if (params.result.best) { + const usage = params.result.best.usage; + if ((usage.cacheRead ?? 0) < (floor.minCacheRead ?? 0)) { + params.regressions.push( + `${params.provider}:${params.lane} cacheRead=${usage.cacheRead ?? 0} < min=${floor.minCacheRead}`, + ); + } + if (params.result.best.hitRate < (floor.minHitRate ?? 0)) { + params.regressions.push( + `${params.provider}:${params.lane} hitRate=${params.result.best.hitRate.toFixed(3)} < min=${floor.minHitRate?.toFixed(3)}`, + ); + } + } + + if (params.result.warmup) { + const warmupUsage = params.result.warmup.usage; + if ((warmupUsage.cacheWrite ?? 0) < (floor.minCacheWrite ?? 0)) { + params.regressions.push( + `${params.provider}:${params.lane} warmup cacheWrite=${warmupUsage.cacheWrite ?? 0} < min=${floor.minCacheWrite}`, + ); + } + } + + if (params.result.disabled) { + const usage = params.result.disabled.usage; + if ((usage.cacheRead ?? 0) > (floor.maxCacheRead ?? Number.POSITIVE_INFINITY)) { + params.regressions.push( + `${params.provider}:${params.lane} cacheRead=${usage.cacheRead ?? 0} > max=${floor.maxCacheRead}`, + ); + } + if ((usage.cacheWrite ?? 0) > (floor.maxCacheWrite ?? Number.POSITIVE_INFINITY)) { + params.regressions.push( + `${params.provider}:${params.lane} cacheWrite=${usage.cacheWrite ?? 0} > max=${floor.maxCacheWrite}`, + ); + } + } +} + +export async function runLiveCacheRegression(): Promise { + const pngBase64 = (await fs.readFile(LIVE_TEST_PNG_URL)).toString("base64"); + const runToken = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; + const openai = await resolveLiveDirectModel({ + provider: "openai", + api: "openai-responses", + envVar: "OPENCLAW_LIVE_OPENAI_CACHE_MODEL", + preferredModelIds: ["gpt-5.4-mini", "gpt-5.4", "gpt-5.2"], + }); + const anthropic = await resolveLiveDirectModel({ + provider: "anthropic", + api: "anthropic-messages", + envVar: "OPENCLAW_LIVE_ANTHROPIC_CACHE_MODEL", + preferredModelIds: ["claude-sonnet-4-6", "claude-sonnet-4-5", "claude-haiku-3-5"], + }); + + const regressions: string[] = []; + const summary: Record> = { + anthropic: {}, + openai: {}, + }; + + for (const lane of ["stable", "tool", "image", "mcp"] as const) { + const openaiResult = await runRepeatedLane({ + lane, + providerTag: "openai", + fixture: openai, + runToken, + sessionId: `live-cache-regression-${runToken}-openai-${lane}`, + pngBase64, + }); + logLiveCache( + `openai ${lane} warmup ${formatUsage(openaiResult.warmup?.usage ?? {})} rate=${openaiResult.warmup?.hitRate.toFixed(3) ?? "0.000"}`, + ); + logLiveCache( + `openai ${lane} best ${formatUsage(openaiResult.best?.usage ?? {})} rate=${openaiResult.best?.hitRate.toFixed(3) ?? "0.000"}`, + ); + summary.openai[lane] = { + best: openaiResult.best?.usage, + hitRate: openaiResult.best?.hitRate, + warmup: openaiResult.warmup?.usage, + }; + assertAgainstBaseline({ + lane, + provider: "openai", + result: openaiResult, + regressions, + }); + + const anthropicResult = await runRepeatedLane({ + lane, + providerTag: "anthropic", + fixture: anthropic, + runToken, + sessionId: `live-cache-regression-${runToken}-anthropic-${lane}`, + pngBase64, + }); + logLiveCache( + `anthropic ${lane} warmup ${formatUsage(anthropicResult.warmup?.usage ?? {})} rate=${anthropicResult.warmup?.hitRate.toFixed(3) ?? "0.000"}`, + ); + logLiveCache( + `anthropic ${lane} best ${formatUsage(anthropicResult.best?.usage ?? {})} rate=${anthropicResult.best?.hitRate.toFixed(3) ?? "0.000"}`, + ); + summary.anthropic[lane] = { + best: anthropicResult.best?.usage, + hitRate: anthropicResult.best?.hitRate, + warmup: anthropicResult.warmup?.usage, + }; + assertAgainstBaseline({ + lane, + provider: "anthropic", + result: anthropicResult, + regressions, + }); + } + + const disabled = await runAnthropicDisabledLane({ + fixture: anthropic, + runToken, + sessionId: `live-cache-regression-${runToken}-anthropic-disabled`, + }); + logLiveCache(`anthropic disabled ${formatUsage(disabled.disabled?.usage ?? {})}`); + summary.anthropic.disabled = { + disabled: disabled.disabled?.usage, + }; + assertAgainstBaseline({ + lane: "disabled", + provider: "anthropic", + result: disabled, + regressions, + }); + + logLiveCache(`cache regression summary ${JSON.stringify(summary)}`); + return { regressions, summary }; +} diff --git a/src/agents/live-cache-regression.live.test.ts b/src/agents/live-cache-regression.live.test.ts new file mode 100644 index 00000000000..8f4fb89ec6a --- /dev/null +++ b/src/agents/live-cache-regression.live.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from "vitest"; +import { runLiveCacheRegression } from "./live-cache-regression-runner.js"; +import { LIVE_CACHE_TEST_ENABLED } from "./live-cache-test-support.js"; + +const describeCacheLive = LIVE_CACHE_TEST_ENABLED ? describe : describe.skip; + +describeCacheLive("live cache regression", () => { + it( + "matches the stored provider cache baselines", + async () => { + const result = await runLiveCacheRegression(); + expect(result.regressions).toEqual([]); + }, + 30 * 60_000, + ); +});