diff --git a/CHANGELOG.md b/CHANGELOG.md index 908d9660c76..e2ba6d40eea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -106,6 +106,7 @@ Docs: https://docs.openclaw.ai - Slack/Subagent completion delivery: stop forcing bound conversation IDs into `threadId` so Slack completion announces do not send invalid `thread_ts` for DMs/top-level channels. Landed from contributor PR #31105 by @stakeswky. Thanks @stakeswky. - Signal/Loop protection: evaluate own-account detection before sync-message filtering (including UUID-only `accountUuid` configs) so `sentTranscript` sync events cannot bypass loop protection and self-reply loops. Landed from contributor PR #31093 by @kevinWangSheng. Thanks @kevinWangSheng. - Gateway/Control UI origins: support wildcard `"*"` in `gateway.controlUi.allowedOrigins` for trusted remote access setups. Landed from contributor PR #31088 by @frankekn. Thanks @frankekn. +- Cron/Isolated CLI timeout ratio: avoid reusing persisted CLI session IDs on fresh isolated cron runs so the fresh watchdog profile is used and jobs do not abort at roughly one-third of configured `timeoutSeconds`. (#30140) Thanks @ningding97. - Security/Sandbox media reads: eliminate sandbox media TOCTOU symlink-retarget escapes by enforcing root-scoped boundary-safe reads at attachment/image load time and consolidating shared safe-read helpers across sandbox media callsites. This ships in the next npm release. Thanks @tdjackey for reporting. - Security/Subagents sandbox inheritance: block sandboxed sessions from spawning cross-agent subagents that would run unsandboxed, preventing runtime sandbox downgrade via `sessions_spawn agentId`. Thanks @tdjackey for reporting. - Security/Workspace safe writes: harden `writeFileWithinRoot` against symlink-retarget TOCTOU races by opening existing files without truncation, creating missing files with exclusive create, deferring truncation until post-open identity+boundary validation, and removing out-of-root create artifacts on blocked races; added regression tests for truncate/create race paths. This ships in the next npm release (`2026.3.1`). Thanks @tdjackey for reporting. diff --git a/src/cron/isolated-agent/run.skill-filter.test.ts b/src/cron/isolated-agent/run.skill-filter.test.ts index 2b6e4bbf7be..5e4c410af62 100644 --- a/src/cron/isolated-agent/run.skill-filter.test.ts +++ b/src/cron/isolated-agent/run.skill-filter.test.ts @@ -95,12 +95,14 @@ vi.mock("../../agents/subagent-announce.js", () => ({ runSubagentAnnounceFlow: vi.fn().mockResolvedValue(true), })); +const runCliAgentMock = vi.fn(); vi.mock("../../agents/cli-runner.js", () => ({ - runCliAgent: vi.fn(), + runCliAgent: runCliAgentMock, })); +const getCliSessionIdMock = vi.fn().mockReturnValue(undefined); vi.mock("../../agents/cli-session.js", () => ({ - getCliSessionId: vi.fn().mockReturnValue(undefined), + getCliSessionId: getCliSessionIdMock, setCliSessionId: vi.fn(), })); @@ -494,4 +496,82 @@ describe("runCronIsolatedAgentTurn — skill filter", () => { expect(runWithModelFallbackMock).not.toHaveBeenCalled(); }); }); + + describe("CLI session handoff (issue #29774)", () => { + it("does not pass stored cliSessionId on fresh isolated runs (isNewSession=true)", async () => { + // Simulate a persisted CLI session ID from a previous run. + getCliSessionIdMock.mockReturnValue("prev-cli-session-abc"); + isCliProviderMock.mockReturnValue(true); + runCliAgentMock.mockResolvedValue({ + payloads: [{ text: "output" }], + meta: { agentMeta: { sessionId: "new-cli-session-xyz", usage: { input: 5, output: 10 } } }, + }); + // Make runWithModelFallback invoke the run callback so the CLI path executes. + runWithModelFallbackMock.mockImplementationOnce( + async (params: { run: (provider: string, model: string) => Promise }) => { + const result = await params.run("claude-cli", "claude-opus-4-6"); + return { result, provider: "claude-cli", model: "claude-opus-4-6", attempts: [] }; + }, + ); + resolveCronSessionMock.mockReturnValue({ + storePath: "/tmp/store.json", + store: {}, + sessionEntry: { + sessionId: "test-session-fresh", + updatedAt: 0, + systemSent: false, + skillsSnapshot: undefined, + // A stored CLI session ID that should NOT be reused on fresh runs. + cliSessionIds: { "claude-cli": "prev-cli-session-abc" }, + }, + systemSent: false, + isNewSession: true, + }); + + await runCronIsolatedAgentTurn(makeParams()); + + expect(runCliAgentMock).toHaveBeenCalledOnce(); + // Fresh session: cliSessionId must be undefined, not the stored value. + expect(runCliAgentMock.mock.calls[0][0]).toHaveProperty("cliSessionId", undefined); + }); + + it("reuses stored cliSessionId on continuation runs (isNewSession=false)", async () => { + getCliSessionIdMock.mockReturnValue("existing-cli-session-def"); + isCliProviderMock.mockReturnValue(true); + runCliAgentMock.mockResolvedValue({ + payloads: [{ text: "output" }], + meta: { + agentMeta: { sessionId: "existing-cli-session-def", usage: { input: 5, output: 10 } }, + }, + }); + runWithModelFallbackMock.mockImplementationOnce( + async (params: { run: (provider: string, model: string) => Promise }) => { + const result = await params.run("claude-cli", "claude-opus-4-6"); + return { result, provider: "claude-cli", model: "claude-opus-4-6", attempts: [] }; + }, + ); + resolveCronSessionMock.mockReturnValue({ + storePath: "/tmp/store.json", + store: {}, + sessionEntry: { + sessionId: "test-session-continuation", + updatedAt: 0, + systemSent: false, + skillsSnapshot: undefined, + cliSessionIds: { "claude-cli": "existing-cli-session-def" }, + }, + systemSent: false, + isNewSession: false, + }); + + await runCronIsolatedAgentTurn(makeParams()); + + expect(runCliAgentMock).toHaveBeenCalledOnce(); + // Continuation: cliSessionId should be passed through for session resume. + expect(runCliAgentMock.mock.calls[0][0]).toHaveProperty( + "cliSessionId", + "existing-cli-session-def", + ); + }); + }); }); diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts index ff36c508097..cf9cfcce716 100644 --- a/src/cron/isolated-agent/run.ts +++ b/src/cron/isolated-agent/run.ts @@ -463,7 +463,14 @@ export async function runCronIsolatedAgentTurn(params: { throw new Error(abortReason()); } if (isCliProvider(providerOverride, cfgWithAgentDefaults)) { - const cliSessionId = getCliSessionId(cronSession.sessionEntry, providerOverride); + // Fresh isolated cron sessions must not reuse a stored CLI session ID. + // Passing an existing ID activates the resume watchdog profile + // (noOutputTimeoutRatio 0.3, maxMs 180 s) instead of the fresh profile + // (ratio 0.8, maxMs 600 s), causing jobs to time out at roughly 1/3 of + // the configured timeoutSeconds. See: https://github.com/openclaw/openclaw/issues/29774 + const cliSessionId = cronSession.isNewSession + ? undefined + : getCliSessionId(cronSession.sessionEntry, providerOverride); return runCliAgent({ sessionId: cronSession.sessionEntry.sessionId, sessionKey: agentSessionKey, diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index 5a8753d4aad..fdc097f6c5c 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -1513,4 +1513,83 @@ describe("Cron issue regressions", () => { expect(jobs.find((job) => job.id === first.id)?.state.lastStatus).toBe("ok"); expect(jobs.find((job) => job.id === second.id)?.state.lastStatus).toBe("ok"); }); + + // Regression: isolated cron runs must not abort at 1/3 of configured timeoutSeconds. + // The bug (issue #29774) caused the CLI-provider resume watchdog (ratio 0.3, maxMs 180 s) + // to be applied on fresh sessions because a persisted cliSessionId was passed to + // runCliAgent even when isNewSession=true. At the service level this manifests as a + // job abort that fires much sooner than the configured outer timeout. + it("outer cron timeout fires at configured timeoutSeconds, not at 1/3 (#29774)", async () => { + vi.useRealTimers(); + const store = await makeStorePath(); + const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z"); + + // Use a short but observable timeout: 300 ms. + // Before the fix, premature timeout would fire at ~100 ms (1/3 of 300 ms). + const timeoutSeconds = 0.3; + const cronJob = createIsolatedRegressionJob({ + id: "timeout-fraction-29774", + name: "timeout fraction regression", + scheduledAt, + schedule: { kind: "at", at: new Date(scheduledAt).toISOString() }, + payload: { kind: "agentTurn", message: "work", timeoutSeconds }, + state: { nextRunAtMs: scheduledAt }, + }); + await writeCronJobs(store.storePath, [cronJob]); + + const tempFile = path.join(os.tmpdir(), `cron-29774-${Date.now()}.txt`); + let now = scheduledAt; + const wallStart = Date.now(); + let abortWallMs: number | undefined; + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => now, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async ({ abortSignal }: { abortSignal?: AbortSignal }) => { + // Real side effect: confirm the job actually started. + await fs.writeFile(tempFile, "started", "utf-8"); + await new Promise((resolve) => { + if (!abortSignal) { + resolve(); + return; + } + if (abortSignal.aborted) { + abortWallMs = Date.now(); + resolve(); + return; + } + abortSignal.addEventListener( + "abort", + () => { + abortWallMs = Date.now(); + resolve(); + }, + { once: true }, + ); + }); + now += 5; + return { status: "ok" as const, summary: "done" }; + }), + }); + + await onTimer(state); + + // Confirm job started (real side effect). + await expect(fs.readFile(tempFile, "utf-8")).resolves.toBe("started"); + await fs.unlink(tempFile).catch(() => {}); + + // The outer cron timeout fires at timeoutSeconds * 1000 = 300 ms. + // The abort must not have fired at ~100 ms (the 1/3 regression value). + // Allow generous lower bound (80%) to keep the test stable on loaded CI runners. + const elapsedMs = (abortWallMs ?? Date.now()) - wallStart; + expect(elapsedMs).toBeGreaterThanOrEqual(timeoutSeconds * 1000 * 0.8); + + const job = state.store?.jobs.find((entry) => entry.id === "timeout-fraction-29774"); + expect(job?.state.lastStatus).toBe("error"); + expect(job?.state.lastError).toContain("timed out"); + }); });