mirror of https://github.com/openclaw/openclaw.git
fix(agents): split system prompt cache prefix by transport (#59054)
* fix(agents): restore Anthropic prompt cache seam * fix(agents): strip cache boundary for completions * fix(agents): strip cache boundary for cli backends * chore(changelog): note cross-transport cache boundary rollout * fix(agents): route default stream fallbacks through boundary shapers * fix(agents): strip cache boundary for provider streams
This commit is contained in:
parent
b0e1551eb8
commit
64f28906de
|
|
@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
|
|||
- Tests/runtime: trim local unit-test import/runtime fan-out across browser, WhatsApp, cron, task, and reply flows so owner suites start faster with lower shared-worker overhead while preserving the same focused behavior coverage. (#60249) Thanks @shakkernerd.
|
||||
- Tests/secrets runtime: restore split secrets suite cache and env isolation cleanup so broader runs do not leak stale plugin or provider snapshot state. (#60395) Thanks @shakkernerd.
|
||||
- Memory/dreaming (experimental): add opt-in weighted short-term recall promotion to `MEMORY.md`, managed dreaming modes (`off|core|rem|deep`), and a `/dreaming` command plus Dreams UI so durable memory promotion can run on background cadence without manual scheduling. (#60569) Thanks @vignesh07.
|
||||
- Agents/system prompts: add an internal cache-prefix boundary across Anthropic-family, OpenAI-family, Google, and CLI transport shaping so stable system-prompt prefixes stay reusable without leaking internal cache markers to provider payloads. (#59054)
|
||||
|
||||
### Fixes
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import {
|
|||
applyAnthropicPayloadPolicyToParams,
|
||||
resolveAnthropicPayloadPolicy,
|
||||
} from "./anthropic-payload-policy.js";
|
||||
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "./system-prompt-cache-boundary.js";
|
||||
|
||||
type TestPayload = {
|
||||
messages: Array<{ role: string; content: unknown }>;
|
||||
|
|
@ -102,4 +103,65 @@ describe("anthropic payload policy", () => {
|
|||
content: [{ type: "text", text: "Hello", cache_control: { type: "ephemeral" } }],
|
||||
});
|
||||
});
|
||||
|
||||
it("splits cached stable system content from uncached dynamic content", () => {
|
||||
const policy = resolveAnthropicPayloadPolicy({
|
||||
provider: "anthropic",
|
||||
api: "anthropic-messages",
|
||||
baseUrl: "https://api.anthropic.com/v1",
|
||||
cacheRetention: "long",
|
||||
enableCacheControl: true,
|
||||
});
|
||||
const payload: TestPayload = {
|
||||
system: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic lab suffix`,
|
||||
},
|
||||
],
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
};
|
||||
|
||||
applyAnthropicPayloadPolicyToParams(payload, policy);
|
||||
|
||||
expect(payload.system).toEqual([
|
||||
{
|
||||
type: "text",
|
||||
text: "Stable prefix",
|
||||
cache_control: { type: "ephemeral", ttl: "1h" },
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "Dynamic lab suffix",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("strips the boundary even when cache retention is disabled", () => {
|
||||
const policy = resolveAnthropicPayloadPolicy({
|
||||
provider: "anthropic",
|
||||
api: "anthropic-messages",
|
||||
baseUrl: "https://api.anthropic.com/v1",
|
||||
cacheRetention: "none",
|
||||
enableCacheControl: true,
|
||||
});
|
||||
const payload: TestPayload = {
|
||||
system: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic lab suffix`,
|
||||
},
|
||||
],
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
};
|
||||
|
||||
applyAnthropicPayloadPolicyToParams(payload, policy);
|
||||
|
||||
expect(payload.system).toEqual([
|
||||
{
|
||||
type: "text",
|
||||
text: "Stable prefix\nDynamic lab suffix",
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
import { resolveProviderRequestCapabilities } from "./provider-attribution.js";
|
||||
import {
|
||||
splitSystemPromptCacheBoundary,
|
||||
stripSystemPromptCacheBoundary,
|
||||
} from "./system-prompt-cache-boundary.js";
|
||||
|
||||
export type AnthropicServiceTier = "auto" | "standard_only";
|
||||
|
||||
|
|
@ -46,13 +50,57 @@ function applyAnthropicCacheControlToSystem(
|
|||
return;
|
||||
}
|
||||
|
||||
const normalizedBlocks: Array<unknown> = [];
|
||||
for (const block of system) {
|
||||
if (!block || typeof block !== "object") {
|
||||
normalizedBlocks.push(block);
|
||||
continue;
|
||||
}
|
||||
const record = block as Record<string, unknown>;
|
||||
if (record.type !== "text" || typeof record.text !== "string") {
|
||||
normalizedBlocks.push(block);
|
||||
continue;
|
||||
}
|
||||
const split = splitSystemPromptCacheBoundary(record.text);
|
||||
if (!split) {
|
||||
if (record.cache_control === undefined) {
|
||||
record.cache_control = cacheControl;
|
||||
}
|
||||
normalizedBlocks.push(record);
|
||||
continue;
|
||||
}
|
||||
|
||||
const { cache_control: existingCacheControl, ...rest } = record;
|
||||
if (split.stablePrefix) {
|
||||
normalizedBlocks.push({
|
||||
...rest,
|
||||
text: split.stablePrefix,
|
||||
cache_control: existingCacheControl ?? cacheControl,
|
||||
});
|
||||
}
|
||||
if (split.dynamicSuffix) {
|
||||
normalizedBlocks.push({
|
||||
...rest,
|
||||
text: split.dynamicSuffix,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
system.splice(0, system.length, ...normalizedBlocks);
|
||||
}
|
||||
|
||||
function stripAnthropicSystemPromptBoundary(system: unknown): void {
|
||||
if (!Array.isArray(system)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const block of system) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = block as Record<string, unknown>;
|
||||
if (record.type === "text" && record.cache_control === undefined) {
|
||||
record.cache_control = cacheControl;
|
||||
if (record.type === "text" && typeof record.text === "string") {
|
||||
record.text = stripSystemPromptCacheBoundary(record.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -136,11 +184,16 @@ export function applyAnthropicPayloadPolicyToParams(
|
|||
payloadObj.service_tier = policy.serviceTier;
|
||||
}
|
||||
|
||||
if (policy.cacheControl) {
|
||||
applyAnthropicCacheControlToSystem(payloadObj.system, policy.cacheControl);
|
||||
} else {
|
||||
stripAnthropicSystemPromptBoundary(payloadObj.system);
|
||||
}
|
||||
|
||||
if (!policy.cacheControl) {
|
||||
return;
|
||||
}
|
||||
|
||||
applyAnthropicCacheControlToSystem(payloadObj.system, policy.cacheControl);
|
||||
// Preserve Anthropic cache-write scope by only tagging the trailing user turn.
|
||||
applyAnthropicCacheControlToMessages(payloadObj.messages, policy.cacheControl);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { MAX_IMAGE_BYTES } from "../media/constants.js";
|
|||
import { buildCliArgs, loadPromptRefImages } from "./cli-runner/helpers.js";
|
||||
import * as promptImageUtils from "./pi-embedded-runner/run/images.js";
|
||||
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
|
||||
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "./system-prompt-cache-boundary.js";
|
||||
import * as toolImages from "./tool-images.js";
|
||||
|
||||
describe("loadPromptRefImages", () => {
|
||||
|
|
@ -117,4 +118,19 @@ describe("buildCliArgs", () => {
|
|||
}),
|
||||
).toEqual(["exec", "resume", "thread-123", "--model", "gpt-5.4"]);
|
||||
});
|
||||
|
||||
it("strips the internal cache boundary from CLI system prompt args", () => {
|
||||
expect(
|
||||
buildCliArgs({
|
||||
backend: {
|
||||
command: "claude",
|
||||
systemPromptArg: "--append-system-prompt",
|
||||
},
|
||||
baseArgs: ["-p"],
|
||||
modelId: "claude-sonnet-4-6",
|
||||
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
|
||||
useResume: false,
|
||||
}),
|
||||
).toEqual(["-p", "--append-system-prompt", "Stable prefix\nDynamic suffix"]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import type { EmbeddedContextFile } from "../pi-embedded-helpers.js";
|
|||
import { detectImageReferences, loadImageFromRef } from "../pi-embedded-runner/run/images.js";
|
||||
import type { SandboxFsBridge } from "../sandbox/fs-bridge.js";
|
||||
import { detectRuntimeShell } from "../shell-utils.js";
|
||||
import { stripSystemPromptCacheBoundary } from "../system-prompt-cache-boundary.js";
|
||||
import { buildSystemPromptParams } from "../system-prompt-params.js";
|
||||
import { buildAgentSystemPrompt } from "../system-prompt.js";
|
||||
import { sanitizeImageBlocks } from "../tool-images.js";
|
||||
|
|
@ -253,7 +254,7 @@ export function buildCliArgs(params: {
|
|||
args.push(params.backend.modelArg, params.modelId);
|
||||
}
|
||||
if (!params.useResume && params.systemPrompt && params.backend.systemPromptArg) {
|
||||
args.push(params.backend.systemPromptArg, params.systemPrompt);
|
||||
args.push(params.backend.systemPromptArg, stripSystemPromptCacheBoundary(params.systemPrompt));
|
||||
}
|
||||
if (!params.useResume && params.sessionId) {
|
||||
if (params.backend.sessionArgs && params.backend.sessionArgs.length > 0) {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import {
|
|||
import { parseGeminiAuth } from "../infra/gemini-auth.js";
|
||||
import { normalizeGoogleApiBaseUrl } from "../infra/google-api-base-url.js";
|
||||
import { buildGuardedModelFetch } from "./provider-transport-fetch.js";
|
||||
import { stripSystemPromptCacheBoundary } from "./system-prompt-cache-boundary.js";
|
||||
import { transformTransportMessages } from "./transport-message-transform.js";
|
||||
import {
|
||||
createEmptyTransportUsage,
|
||||
|
|
@ -445,7 +446,11 @@ export function buildGoogleGenerativeAiParams(
|
|||
}
|
||||
if (context.systemPrompt) {
|
||||
params.systemInstruction = {
|
||||
parts: [{ text: sanitizeTransportPayloadText(context.systemPrompt) }],
|
||||
parts: [
|
||||
{
|
||||
text: sanitizeTransportPayloadText(stripSystemPromptCacheBoundary(context.systemPrompt)),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
if (context.tools?.length) {
|
||||
|
|
|
|||
|
|
@ -10,10 +10,12 @@ import {
|
|||
import { attachModelProviderRequestTransport } from "./provider-request-config.js";
|
||||
import {
|
||||
buildTransportAwareSimpleStreamFn,
|
||||
createBoundaryAwareStreamFnForModel,
|
||||
isTransportAwareApiSupported,
|
||||
prepareTransportAwareSimpleModel,
|
||||
resolveTransportAwareSimpleApi,
|
||||
} from "./provider-transport-stream.js";
|
||||
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "./system-prompt-cache-boundary.js";
|
||||
|
||||
describe("openai transport stream", () => {
|
||||
it("reports the supported transport-aware APIs", () => {
|
||||
|
|
@ -24,6 +26,51 @@ describe("openai transport stream", () => {
|
|||
expect(isTransportAwareApiSupported("google-generative-ai")).toBe(true);
|
||||
});
|
||||
|
||||
it("builds boundary-aware stream shapers for supported default agent transports", () => {
|
||||
expect(
|
||||
createBoundaryAwareStreamFnForModel({
|
||||
id: "gpt-5.4",
|
||||
name: "GPT-5.4",
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"openai-responses">),
|
||||
).toBeTypeOf("function");
|
||||
expect(
|
||||
createBoundaryAwareStreamFnForModel({
|
||||
id: "claude-sonnet-4-6",
|
||||
name: "Claude Sonnet 4.6",
|
||||
api: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
baseUrl: "https://api.anthropic.com",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"anthropic-messages">),
|
||||
).toBeTypeOf("function");
|
||||
expect(
|
||||
createBoundaryAwareStreamFnForModel({
|
||||
id: "gemini-3.1-pro-preview",
|
||||
name: "Gemini 3.1 Pro Preview",
|
||||
api: "google-generative-ai",
|
||||
provider: "google",
|
||||
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"google-generative-ai">),
|
||||
).toBeTypeOf("function");
|
||||
});
|
||||
|
||||
it("prepares a custom simple-completion api alias when transport overrides are attached", () => {
|
||||
const model = attachModelProviderRequestTransport(
|
||||
{
|
||||
|
|
@ -439,6 +486,31 @@ describe("openai transport stream", () => {
|
|||
expect(params.input?.[0]).toMatchObject({ role: "developer" });
|
||||
});
|
||||
|
||||
it("strips the internal cache boundary from OpenAI system prompts", () => {
|
||||
const params = buildOpenAIResponsesParams(
|
||||
{
|
||||
id: "gpt-5.4",
|
||||
name: "GPT-5.4",
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"openai-responses">,
|
||||
{
|
||||
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never,
|
||||
undefined,
|
||||
) as { input?: Array<{ content?: string }> };
|
||||
|
||||
expect(params.input?.[0]?.content).toBe("Stable prefix\nDynamic suffix");
|
||||
});
|
||||
|
||||
it("defaults responses tool schemas to strict on native OpenAI routes", () => {
|
||||
const params = buildOpenAIResponsesParams(
|
||||
{
|
||||
|
|
@ -689,6 +761,31 @@ describe("openai transport stream", () => {
|
|||
expect(params.messages?.[0]).toMatchObject({ role: "system" });
|
||||
});
|
||||
|
||||
it("strips the internal cache boundary from OpenAI completions system prompts", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
id: "gpt-4.1",
|
||||
name: "GPT-4.1",
|
||||
api: "openai-completions",
|
||||
provider: "openai",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"openai-completions">,
|
||||
{
|
||||
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never,
|
||||
undefined,
|
||||
) as { messages?: Array<{ content?: string }> };
|
||||
|
||||
expect(params.messages?.[0]?.content).toBe("Stable prefix\nDynamic suffix");
|
||||
});
|
||||
|
||||
it("uses system role and streaming usage compat for native ModelStudio completions providers", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import {
|
|||
} from "./openai-responses-payload-policy.js";
|
||||
import { resolveProviderRequestCapabilities } from "./provider-attribution.js";
|
||||
import { buildGuardedModelFetch } from "./provider-transport-fetch.js";
|
||||
import { stripSystemPromptCacheBoundary } from "./system-prompt-cache-boundary.js";
|
||||
import { transformTransportMessages } from "./transport-message-transform.js";
|
||||
import { mergeTransportMetadata, sanitizeTransportPayloadText } from "./transport-stream-shared.js";
|
||||
|
||||
|
|
@ -225,7 +226,7 @@ function convertResponsesMessages(
|
|||
if (includeSystemPrompt && context.systemPrompt) {
|
||||
messages.push({
|
||||
role: model.reasoning && options?.supportsDeveloperRole !== false ? "developer" : "system",
|
||||
content: sanitizeTransportPayloadText(context.systemPrompt),
|
||||
content: sanitizeTransportPayloadText(stripSystemPromptCacheBoundary(context.systemPrompt)),
|
||||
});
|
||||
}
|
||||
let msgIndex = 0;
|
||||
|
|
@ -1294,9 +1295,15 @@ export function buildOpenAICompletionsParams(
|
|||
options: OpenAICompletionsOptions | undefined,
|
||||
) {
|
||||
const compat = getCompat(model);
|
||||
const completionsContext = context.systemPrompt
|
||||
? {
|
||||
...context,
|
||||
systemPrompt: stripSystemPromptCacheBoundary(context.systemPrompt),
|
||||
}
|
||||
: context;
|
||||
const params: Record<string, unknown> = {
|
||||
model: model.id,
|
||||
messages: convertMessages(model as never, context, compat as never),
|
||||
messages: convertMessages(model as never, completionsContext, compat as never),
|
||||
stream: true,
|
||||
};
|
||||
if (compat.supportsUsageInStreaming) {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import type {
|
|||
} from "./openai-ws-connection.js";
|
||||
import { resolveOpenAITextVerbosity } from "./pi-embedded-runner/openai-stream-wrappers.js";
|
||||
import { resolveProviderRequestPolicyConfig } from "./provider-request-config.js";
|
||||
import { stripSystemPromptCacheBoundary } from "./system-prompt-cache-boundary.js";
|
||||
|
||||
type WsModel = Parameters<StreamFn>[0];
|
||||
type WsContext = Parameters<StreamFn>[1];
|
||||
|
|
@ -106,7 +107,9 @@ export function buildOpenAIWebSocketResponseCreatePayload(params: {
|
|||
model: params.model.id,
|
||||
...(supportsResponsesStoreField ? { store: false } : {}),
|
||||
input: params.turnInput.inputItems,
|
||||
instructions: params.context.systemPrompt ?? undefined,
|
||||
instructions: params.context.systemPrompt
|
||||
? stripSystemPromptCacheBoundary(params.context.systemPrompt)
|
||||
: undefined,
|
||||
tools: params.tools.length > 0 ? params.tools : undefined,
|
||||
...(params.turnInput.previousResponseId
|
||||
? { previous_response_id: params.turnInput.previousResponseId }
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import {
|
|||
releaseWsSession,
|
||||
} from "./openai-ws-stream.js";
|
||||
import { log } from "./pi-embedded-runner/logger.js";
|
||||
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "./system-prompt-cache-boundary.js";
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Mock OpenAIWebSocketManager
|
||||
|
|
@ -1853,6 +1854,36 @@ describe("createOpenAIWebSocketStreamFn", () => {
|
|||
expect((sent.tools ?? []).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("strips the internal cache boundary from websocket instructions", async () => {
|
||||
const streamFn = createOpenAIWebSocketStreamFn("sk-test", "sess-boundary");
|
||||
const ctx = {
|
||||
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
|
||||
messages: [userMsg("Hello")] as Parameters<typeof convertMessagesToInputItems>[0],
|
||||
tools: [],
|
||||
};
|
||||
|
||||
const stream = streamFn(
|
||||
modelStub as Parameters<typeof streamFn>[0],
|
||||
ctx as Parameters<typeof streamFn>[1],
|
||||
);
|
||||
|
||||
await new Promise((r) => setImmediate(r));
|
||||
const manager = MockManager.lastInstance!;
|
||||
manager.simulateEvent({
|
||||
type: "response.completed",
|
||||
response: makeResponseObject("resp_boundary", "ok"),
|
||||
});
|
||||
|
||||
for await (const _ of await resolveStream(stream)) {
|
||||
// consume
|
||||
}
|
||||
|
||||
const sent = manager.sentEvents[0] as {
|
||||
instructions?: string;
|
||||
};
|
||||
expect(sent.instructions).toBe("Stable prefix\nDynamic suffix");
|
||||
});
|
||||
|
||||
it("falls back to HTTP after the websocket send retry budget is exhausted", async () => {
|
||||
const sessionId = "sess-send-fail-reset";
|
||||
const streamFn = createOpenAIWebSocketStreamFn("sk-test", sessionId);
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ import {
|
|||
buildStreamErrorAssistantMessage,
|
||||
} from "./stream-message-shared.js";
|
||||
import { mergeTransportMetadata } from "./transport-stream-shared.js";
|
||||
import { stripSystemPromptCacheBoundary } from "./system-prompt-cache-boundary.js";
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Per-session state
|
||||
|
|
@ -590,7 +591,9 @@ export function createOpenAIWebSocketStreamFn(
|
|||
manager: session.manager,
|
||||
modelId: model.id,
|
||||
tools: convertTools(context.tools),
|
||||
instructions: context.systemPrompt ?? undefined,
|
||||
instructions: context.systemPrompt
|
||||
? stripSystemPromptCacheBoundary(context.systemPrompt)
|
||||
: undefined,
|
||||
metadata: resolveProviderTransportTurnState(model, {
|
||||
sessionId,
|
||||
turnId,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import type {
|
|||
} from "../../../plugins/types.js";
|
||||
import { isCronSessionKey, isSubagentSessionKey } from "../../../routing/session-key.js";
|
||||
import { joinPresentTextSegments } from "../../../shared/text/join-segments.js";
|
||||
import { prependSystemPromptAdditionAfterCacheBoundary } from "../../system-prompt-cache-boundary.js";
|
||||
import { resolveEffectiveToolFsWorkspaceOnly } from "../../tool-fs-policy.js";
|
||||
import type { CompactEmbeddedPiSessionParams } from "../compact.js";
|
||||
import { buildEmbeddedCompactionRuntimeContext } from "../compaction-runtime-context.js";
|
||||
|
|
@ -109,10 +110,7 @@ export function prependSystemPromptAddition(params: {
|
|||
systemPrompt: string;
|
||||
systemPromptAddition?: string;
|
||||
}): string {
|
||||
if (!params.systemPromptAddition) {
|
||||
return params.systemPrompt;
|
||||
}
|
||||
return `${params.systemPromptAddition}\n\n${params.systemPrompt}`;
|
||||
return prependSystemPromptAdditionAfterCacheBoundary(params);
|
||||
}
|
||||
|
||||
/** Build runtime context passed into context-engine afterTurn hooks. */
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { streamSimple } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../../../config/config.js";
|
||||
import {
|
||||
|
|
@ -7,6 +8,7 @@ import {
|
|||
wrapOllamaCompatNumCtx,
|
||||
} from "../../../plugin-sdk/ollama.js";
|
||||
import { appendBootstrapPromptWarning } from "../../bootstrap-budget.js";
|
||||
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "../../system-prompt-cache-boundary.js";
|
||||
import { buildAgentSystemPrompt } from "../../system-prompt.js";
|
||||
import {
|
||||
buildAfterTurnRuntimeContext,
|
||||
|
|
@ -247,6 +249,65 @@ describe("resolveEmbeddedAgentStreamFn", () => {
|
|||
});
|
||||
expect(providerStreamFn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("strips the internal cache boundary before provider-owned stream calls", async () => {
|
||||
const providerStreamFn = vi.fn(async (_model, context) => context);
|
||||
const streamFn = resolveEmbeddedAgentStreamFn({
|
||||
currentStreamFn: undefined,
|
||||
providerStreamFn,
|
||||
shouldUseWebSocketTransport: false,
|
||||
sessionId: "session-1",
|
||||
model: {
|
||||
api: "openai-completions",
|
||||
provider: "demo-provider",
|
||||
id: "demo-model",
|
||||
} as never,
|
||||
});
|
||||
|
||||
await expect(
|
||||
streamFn(
|
||||
{ provider: "demo-provider", id: "demo-model" } as never,
|
||||
{
|
||||
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
|
||||
} as never,
|
||||
{},
|
||||
),
|
||||
).resolves.toMatchObject({
|
||||
systemPrompt: "Stable prefix\nDynamic suffix",
|
||||
});
|
||||
expect(providerStreamFn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("routes supported default streamSimple fallbacks through boundary-aware transports", () => {
|
||||
const streamFn = resolveEmbeddedAgentStreamFn({
|
||||
currentStreamFn: undefined,
|
||||
shouldUseWebSocketTransport: false,
|
||||
sessionId: "session-1",
|
||||
model: {
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
id: "gpt-5.4",
|
||||
} as never,
|
||||
});
|
||||
|
||||
expect(streamFn).not.toBe(streamSimple);
|
||||
});
|
||||
|
||||
it("keeps explicit custom currentStreamFn values unchanged", () => {
|
||||
const currentStreamFn = vi.fn();
|
||||
const streamFn = resolveEmbeddedAgentStreamFn({
|
||||
currentStreamFn: currentStreamFn as never,
|
||||
shouldUseWebSocketTransport: false,
|
||||
sessionId: "session-1",
|
||||
model: {
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
id: "gpt-5.4",
|
||||
} as never,
|
||||
});
|
||||
|
||||
expect(streamFn).toBe(currentStreamFn);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveAttemptFsWorkspaceOnly", () => {
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ import { applyPiAutoCompactionGuard } from "../../pi-settings.js";
|
|||
import { toClientToolDefinitions } from "../../pi-tool-definition-adapter.js";
|
||||
import { createOpenClawCodingTools, resolveToolLoopDetectionConfig } from "../../pi-tools.js";
|
||||
import { registerProviderStreamForModel } from "../../provider-stream.js";
|
||||
import { createBoundaryAwareStreamFnForModel } from "../../provider-transport-stream.js";
|
||||
import { resolveSandboxContext } from "../../sandbox.js";
|
||||
import { resolveSandboxRuntimeStatus } from "../../sandbox/runtime-status.js";
|
||||
import { repairSessionFileIfNeeded } from "../../session-file-repair.js";
|
||||
|
|
@ -90,6 +91,7 @@ import {
|
|||
applySkillEnvOverridesFromSnapshot,
|
||||
resolveSkillsPromptForRun,
|
||||
} from "../../skills.js";
|
||||
import { stripSystemPromptCacheBoundary } from "../../system-prompt-cache-boundary.js";
|
||||
import { buildSystemPromptParams } from "../../system-prompt-params.js";
|
||||
import { buildSystemPromptReport } from "../../system-prompt-report.js";
|
||||
import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
|
||||
|
|
@ -227,6 +229,13 @@ export function resolveEmbeddedAgentStreamFn(params: {
|
|||
}): StreamFn {
|
||||
if (params.providerStreamFn) {
|
||||
const inner = params.providerStreamFn;
|
||||
const normalizeContext = (context: Parameters<StreamFn>[1]) =>
|
||||
context.systemPrompt
|
||||
? {
|
||||
...context,
|
||||
systemPrompt: stripSystemPromptCacheBoundary(context.systemPrompt),
|
||||
}
|
||||
: context;
|
||||
// Provider-owned transports bypass pi-coding-agent's default auth lookup,
|
||||
// so keep injecting the resolved runtime apiKey for streamSimple-compatible
|
||||
// transports that still read credentials from options.apiKey.
|
||||
|
|
@ -234,10 +243,13 @@ export function resolveEmbeddedAgentStreamFn(params: {
|
|||
const { authStorage, model } = params;
|
||||
return async (m, context, options) => {
|
||||
const apiKey = await authStorage.getApiKey(model.provider);
|
||||
return inner(m, context, { ...options, apiKey: apiKey ?? options?.apiKey });
|
||||
return inner(m, normalizeContext(context), {
|
||||
...options,
|
||||
apiKey: apiKey ?? options?.apiKey,
|
||||
});
|
||||
};
|
||||
}
|
||||
return inner;
|
||||
return (m, context, options) => inner(m, normalizeContext(context), options);
|
||||
}
|
||||
|
||||
const currentStreamFn = params.currentStreamFn ?? streamSimple;
|
||||
|
|
@ -253,6 +265,13 @@ export function resolveEmbeddedAgentStreamFn(params: {
|
|||
return createAnthropicVertexStreamFnForModel(params.model);
|
||||
}
|
||||
|
||||
if (params.currentStreamFn === undefined || params.currentStreamFn === streamSimple) {
|
||||
const boundaryAwareStreamFn = createBoundaryAwareStreamFnForModel(params.model);
|
||||
if (boundaryAwareStreamFn) {
|
||||
return boundaryAwareStreamFn;
|
||||
}
|
||||
}
|
||||
|
||||
return currentStreamFn;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,23 @@ const SIMPLE_TRANSPORT_API_ALIAS: Record<string, Api> = {
|
|||
"google-generative-ai": "openclaw-google-generative-ai-transport",
|
||||
};
|
||||
|
||||
function createSupportedTransportStreamFn(api: Api): StreamFn | undefined {
|
||||
switch (api) {
|
||||
case "openai-responses":
|
||||
return createOpenAIResponsesTransportStreamFn();
|
||||
case "openai-completions":
|
||||
return createOpenAICompletionsTransportStreamFn();
|
||||
case "azure-openai-responses":
|
||||
return createAzureOpenAIResponsesTransportStreamFn();
|
||||
case "anthropic-messages":
|
||||
return createAnthropicMessagesTransportStreamFn();
|
||||
case "google-generative-ai":
|
||||
return createGoogleGenerativeAiTransportStreamFn();
|
||||
default:
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function hasTransportOverrides(model: Model<Api>): boolean {
|
||||
const request = getModelProviderRequestTransport(model);
|
||||
return Boolean(request?.proxy || request?.tls);
|
||||
|
|
@ -47,20 +64,14 @@ export function createTransportAwareStreamFnForModel(model: Model<Api>): StreamF
|
|||
`Model-provider request.proxy/request.tls is not yet supported for api "${model.api}"`,
|
||||
);
|
||||
}
|
||||
switch (model.api) {
|
||||
case "openai-responses":
|
||||
return createOpenAIResponsesTransportStreamFn();
|
||||
case "openai-completions":
|
||||
return createOpenAICompletionsTransportStreamFn();
|
||||
case "azure-openai-responses":
|
||||
return createAzureOpenAIResponsesTransportStreamFn();
|
||||
case "anthropic-messages":
|
||||
return createAnthropicMessagesTransportStreamFn();
|
||||
case "google-generative-ai":
|
||||
return createGoogleGenerativeAiTransportStreamFn();
|
||||
default:
|
||||
return undefined;
|
||||
return createSupportedTransportStreamFn(model.api);
|
||||
}
|
||||
|
||||
export function createBoundaryAwareStreamFnForModel(model: Model<Api>): StreamFn | undefined {
|
||||
if (!isTransportAwareApiSupported(model.api)) {
|
||||
return undefined;
|
||||
}
|
||||
return createSupportedTransportStreamFn(model.api);
|
||||
}
|
||||
|
||||
export function prepareTransportAwareSimpleModel<TApi extends Api>(model: Model<TApi>): Model<Api> {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
prependSystemPromptAdditionAfterCacheBoundary,
|
||||
splitSystemPromptCacheBoundary,
|
||||
stripSystemPromptCacheBoundary,
|
||||
SYSTEM_PROMPT_CACHE_BOUNDARY,
|
||||
} from "./system-prompt-cache-boundary.js";
|
||||
|
||||
describe("system prompt cache boundary helpers", () => {
|
||||
it("splits stable and dynamic prompt regions", () => {
|
||||
expect(
|
||||
splitSystemPromptCacheBoundary(`Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`),
|
||||
).toEqual({
|
||||
stablePrefix: "Stable prefix",
|
||||
dynamicSuffix: "Dynamic suffix",
|
||||
});
|
||||
});
|
||||
|
||||
it("strips the internal marker from prompt text", () => {
|
||||
expect(
|
||||
stripSystemPromptCacheBoundary(`Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`),
|
||||
).toBe("Stable prefix\nDynamic suffix");
|
||||
});
|
||||
|
||||
it("inserts prompt additions after the cache boundary", () => {
|
||||
expect(
|
||||
prependSystemPromptAdditionAfterCacheBoundary({
|
||||
systemPrompt: `Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Dynamic suffix`,
|
||||
systemPromptAddition: "Per-turn lab context",
|
||||
}),
|
||||
).toBe(`Stable prefix${SYSTEM_PROMPT_CACHE_BOUNDARY}Per-turn lab context\n\nDynamic suffix`);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
export const SYSTEM_PROMPT_CACHE_BOUNDARY = "\n<!-- OPENCLAW_CACHE_BOUNDARY -->\n";
|
||||
|
||||
export function stripSystemPromptCacheBoundary(text: string): string {
|
||||
return text.replaceAll(SYSTEM_PROMPT_CACHE_BOUNDARY, "\n");
|
||||
}
|
||||
|
||||
export function splitSystemPromptCacheBoundary(
|
||||
text: string,
|
||||
): { stablePrefix: string; dynamicSuffix: string } | undefined {
|
||||
const boundaryIndex = text.indexOf(SYSTEM_PROMPT_CACHE_BOUNDARY);
|
||||
if (boundaryIndex === -1) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
stablePrefix: text.slice(0, boundaryIndex).trimEnd(),
|
||||
dynamicSuffix: text.slice(boundaryIndex + SYSTEM_PROMPT_CACHE_BOUNDARY.length).trimStart(),
|
||||
};
|
||||
}
|
||||
|
||||
export function prependSystemPromptAdditionAfterCacheBoundary(params: {
|
||||
systemPrompt: string;
|
||||
systemPromptAddition?: string;
|
||||
}): string {
|
||||
if (!params.systemPromptAddition) {
|
||||
return params.systemPrompt;
|
||||
}
|
||||
|
||||
const split = splitSystemPromptCacheBoundary(params.systemPrompt);
|
||||
if (!split) {
|
||||
return `${params.systemPromptAddition}\n\n${params.systemPrompt}`;
|
||||
}
|
||||
|
||||
if (!split.dynamicSuffix) {
|
||||
return `${split.stablePrefix}${SYSTEM_PROMPT_CACHE_BOUNDARY}${params.systemPromptAddition}`;
|
||||
}
|
||||
|
||||
return `${split.stablePrefix}${SYSTEM_PROMPT_CACHE_BOUNDARY}${params.systemPromptAddition}\n\n${split.dynamicSuffix}`;
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@ import type { ResolvedTimeFormat } from "./date-time.js";
|
|||
import type { EmbeddedContextFile } from "./pi-embedded-helpers.js";
|
||||
import type { EmbeddedSandboxInfo } from "./pi-embedded-runner/types.js";
|
||||
import { sanitizeForPromptLiteral } from "./sanitize-for-prompt.js";
|
||||
import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "./system-prompt-cache-boundary.js";
|
||||
|
||||
/**
|
||||
* Controls which hardcoded sections are included in the system prompt.
|
||||
|
|
@ -584,12 +585,6 @@ export function buildAgentSystemPrompt(params: {
|
|||
...buildVoiceSection({ isMinimal, ttsHint: params.ttsHint }),
|
||||
];
|
||||
|
||||
if (extraSystemPrompt) {
|
||||
// Use "Subagent Context" header for minimal mode (subagents), otherwise "Group Chat Context"
|
||||
const contextHeader =
|
||||
promptMode === "minimal" ? "## Subagent Context" : "## Group Chat Context";
|
||||
lines.push(contextHeader, extraSystemPrompt, "");
|
||||
}
|
||||
if (params.reactionGuidance) {
|
||||
const { level, channel } = params.reactionGuidance;
|
||||
const guidanceText =
|
||||
|
|
@ -660,6 +655,18 @@ export function buildAgentSystemPrompt(params: {
|
|||
);
|
||||
}
|
||||
|
||||
// Keep large stable prompt context above this seam so Anthropic-family
|
||||
// transports can reuse it across labs and turns. Dynamic group/session
|
||||
// additions below it are the primary cache invalidators.
|
||||
lines.push(SYSTEM_PROMPT_CACHE_BOUNDARY);
|
||||
|
||||
if (extraSystemPrompt) {
|
||||
// Use "Subagent Context" header for minimal mode (subagents), otherwise "Group Chat Context"
|
||||
const contextHeader =
|
||||
promptMode === "minimal" ? "## Subagent Context" : "## Group Chat Context";
|
||||
lines.push(contextHeader, extraSystemPrompt, "");
|
||||
}
|
||||
|
||||
// Skip heartbeats for subagent/none modes
|
||||
if (!isMinimal && heartbeatPrompt) {
|
||||
lines.push(
|
||||
|
|
|
|||
Loading…
Reference in New Issue