From 40939b29a283a8627e6e72c898fa6f3ad5cedd2f Mon Sep 17 00:00:00 2001 From: SK Akram Date: Sun, 15 Feb 2026 12:28:46 +0000 Subject: [PATCH] feat: add stuck loop detection and exponential backoff infrastructure for agent polling --- src/agents/command-poll-backoff.test.ts | 173 ++++++++++++++++ src/agents/command-poll-backoff.ts | 82 ++++++++ src/agents/pi-tools.before-tool-call.ts | 26 +++ src/agents/tool-loop-detection.test.ts | 253 ++++++++++++++++++++++++ src/agents/tool-loop-detection.ts | 123 ++++++++++++ src/logging/diagnostic-session-state.ts | 8 + src/logging/diagnostic.ts | 8 + 7 files changed, 673 insertions(+) create mode 100644 src/agents/command-poll-backoff.test.ts create mode 100644 src/agents/command-poll-backoff.ts create mode 100644 src/agents/tool-loop-detection.test.ts create mode 100644 src/agents/tool-loop-detection.ts diff --git a/src/agents/command-poll-backoff.test.ts b/src/agents/command-poll-backoff.test.ts new file mode 100644 index 00000000000..a83272b386f --- /dev/null +++ b/src/agents/command-poll-backoff.test.ts @@ -0,0 +1,173 @@ +import { describe, expect, it } from "vitest"; +import type { SessionState } from "../logging/diagnostic-session-state.js"; +import { + calculateBackoffMs, + getCommandPollSuggestion, + pruneStaleCommandPolls, + recordCommandPoll, + resetCommandPollCount, +} from "./command-poll-backoff.js"; + +describe("command-poll-backoff", () => { + describe("calculateBackoffMs", () => { + it("returns 5s for first poll", () => { + expect(calculateBackoffMs(0)).toBe(5000); + }); + + it("returns 10s for second poll", () => { + expect(calculateBackoffMs(1)).toBe(10000); + }); + + it("returns 30s for third poll", () => { + expect(calculateBackoffMs(2)).toBe(30000); + }); + + it("returns 60s for fourth and subsequent polls (capped)", () => { + expect(calculateBackoffMs(3)).toBe(60000); + expect(calculateBackoffMs(4)).toBe(60000); + expect(calculateBackoffMs(10)).toBe(60000); + expect(calculateBackoffMs(100)).toBe(60000); + }); + }); + + describe("recordCommandPoll", () => { + it("returns 5s on first no-output poll", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + const retryMs = recordCommandPoll(state, "cmd-123", false); + expect(retryMs).toBe(5000); + expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(0); // First poll = index 0 + }); + + it("increments count and increases backoff on consecutive no-output polls", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + expect(recordCommandPoll(state, "cmd-123", false)).toBe(5000); // count=0 -> 5s + expect(recordCommandPoll(state, "cmd-123", false)).toBe(10000); // count=1 -> 10s + expect(recordCommandPoll(state, "cmd-123", false)).toBe(30000); // count=2 -> 30s + expect(recordCommandPoll(state, "cmd-123", false)).toBe(60000); // count=3 -> 60s + expect(recordCommandPoll(state, "cmd-123", false)).toBe(60000); // count=4 -> 60s (capped) + + expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(4); // 5 polls = index 4 + }); + + it("resets count when poll returns new output", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + recordCommandPoll(state, "cmd-123", false); + recordCommandPoll(state, "cmd-123", false); + recordCommandPoll(state, "cmd-123", false); + expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(2); // 3 polls = index 2 + + // New output resets count + const retryMs = recordCommandPoll(state, "cmd-123", true); + expect(retryMs).toBe(5000); // Back to first poll delay + expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(0); + }); + + it("tracks different commands independently", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + recordCommandPoll(state, "cmd-1", false); + recordCommandPoll(state, "cmd-1", false); + recordCommandPoll(state, "cmd-2", false); + + expect(state.commandPollCounts?.get("cmd-1")?.count).toBe(1); // 2 polls = index 1 + expect(state.commandPollCounts?.get("cmd-2")?.count).toBe(0); // 1 poll = index 0 + }); + }); + + describe("getCommandPollSuggestion", () => { + it("returns undefined for untracked command", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + expect(getCommandPollSuggestion(state, "unknown")).toBeUndefined(); + }); + + it("returns current backoff for tracked command", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + recordCommandPoll(state, "cmd-123", false); + recordCommandPoll(state, "cmd-123", false); + + expect(getCommandPollSuggestion(state, "cmd-123")).toBe(10000); + }); + }); + + describe("resetCommandPollCount", () => { + it("removes command from tracking", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + recordCommandPoll(state, "cmd-123", false); + expect(state.commandPollCounts?.has("cmd-123")).toBe(true); + + resetCommandPollCount(state, "cmd-123"); + expect(state.commandPollCounts?.has("cmd-123")).toBe(false); + }); + + it("is safe to call on untracked command", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + expect(() => resetCommandPollCount(state, "unknown")).not.toThrow(); + }); + }); + + describe("pruneStaleCommandPolls", () => { + it("removes polls older than maxAge", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + commandPollCounts: new Map([ + ["cmd-old", { count: 5, lastPollAt: Date.now() - 7200000 }], // 2 hours ago + ["cmd-new", { count: 3, lastPollAt: Date.now() - 1000 }], // 1 second ago + ]), + }; + + pruneStaleCommandPolls(state, 3600000); // 1 hour max age + + expect(state.commandPollCounts?.has("cmd-old")).toBe(false); + expect(state.commandPollCounts?.has("cmd-new")).toBe(true); + }); + + it("handles empty state gracefully", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "idle", + queueDepth: 0, + }; + + expect(() => pruneStaleCommandPolls(state)).not.toThrow(); + }); + }); +}); diff --git a/src/agents/command-poll-backoff.ts b/src/agents/command-poll-backoff.ts new file mode 100644 index 00000000000..d26134892f0 --- /dev/null +++ b/src/agents/command-poll-backoff.ts @@ -0,0 +1,82 @@ +import type { SessionState } from "../logging/diagnostic-session-state.js"; + +// Exponential backoff schedule for command polling +const BACKOFF_SCHEDULE_MS = [5000, 10000, 30000, 60000]; + +/** + * Calculate suggested retry delay based on consecutive no-output poll count. + * Implements exponential backoff schedule: 5s → 10s → 30s → 60s (capped). + */ +export function calculateBackoffMs(consecutiveNoOutputPolls: number): number { + const index = Math.min(consecutiveNoOutputPolls, BACKOFF_SCHEDULE_MS.length - 1); + return BACKOFF_SCHEDULE_MS[index] ?? 60000; +} + +/** + * Record a command poll and return suggested retry delay. + * @param state Session state to track polling in + * @param commandId Unique identifier for the command being polled + * @param hasNewOutput Whether this poll returned new output + * @returns Suggested delay in milliseconds before next poll + */ +export function recordCommandPoll( + state: SessionState, + commandId: string, + hasNewOutput: boolean, +): number { + if (!state.commandPollCounts) { + state.commandPollCounts = new Map(); + } + + const existing = state.commandPollCounts.get(commandId); + const now = Date.now(); + + if (hasNewOutput) { + state.commandPollCounts.set(commandId, { count: 0, lastPollAt: now }); + return BACKOFF_SCHEDULE_MS[0] ?? 5000; + } + + const newCount = (existing?.count ?? -1) + 1; + state.commandPollCounts.set(commandId, { count: newCount, lastPollAt: now }); + + return calculateBackoffMs(newCount); +} + +/** + * Get current suggested backoff for a command without modifying state. + * Useful for checking current backoff level. + */ +export function getCommandPollSuggestion( + state: SessionState, + commandId: string, +): number | undefined { + const pollData = state.commandPollCounts?.get(commandId); + if (!pollData) { + return undefined; + } + return calculateBackoffMs(pollData.count); +} + +/** + * Reset poll count for a command (e.g., when command completes). + */ +export function resetCommandPollCount(state: SessionState, commandId: string): void { + state.commandPollCounts?.delete(commandId); +} + +/** + * Prune stale command poll records (older than 1 hour). + * Call periodically to prevent memory bloat. + */ +export function pruneStaleCommandPolls(state: SessionState, maxAgeMs = 3600000): void { + if (!state.commandPollCounts) { + return; + } + + const now = Date.now(); + for (const [commandId, data] of state.commandPollCounts.entries()) { + if (now - data.lastPollAt > maxAgeMs) { + state.commandPollCounts.delete(commandId); + } + } +} diff --git a/src/agents/pi-tools.before-tool-call.ts b/src/agents/pi-tools.before-tool-call.ts index 26761f3127f..8ed84f67541 100644 --- a/src/agents/pi-tools.before-tool-call.ts +++ b/src/agents/pi-tools.before-tool-call.ts @@ -25,6 +25,32 @@ export async function runBeforeToolCallHook(args: { const toolName = normalizeToolName(args.toolName || "tool"); const params = args.params; + if (args.ctx?.sessionKey) { + const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js"); + const { detectToolCallLoop, recordToolCall } = await import("./tool-loop-detection.js"); + + const sessionState = getDiagnosticSessionState({ + sessionKey: args.ctx.sessionKey, + sessionId: args.ctx?.agentId, + }); + + const loopResult = detectToolCallLoop(sessionState, toolName, params); + + if (loopResult.stuck) { + if (loopResult.level === "critical") { + log.error(`Blocking ${toolName} due to critical loop: ${loopResult.message}`); + return { + blocked: true, + reason: loopResult.message, + }; + } else { + log.warn(`Loop warning for ${toolName}: ${loopResult.message}`); + } + } + + recordToolCall(sessionState, toolName, params); + } + const hookRunner = getGlobalHookRunner(); if (!hookRunner?.hasHooks("before_tool_call")) { return { blocked: false, params: args.params }; diff --git a/src/agents/tool-loop-detection.test.ts b/src/agents/tool-loop-detection.test.ts new file mode 100644 index 00000000000..0c05b041964 --- /dev/null +++ b/src/agents/tool-loop-detection.test.ts @@ -0,0 +1,253 @@ +import { describe, expect, it } from "vitest"; +import type { SessionState } from "../logging/diagnostic-session-state.js"; +import { + CRITICAL_THRESHOLD, + TOOL_CALL_HISTORY_SIZE, + WARNING_THRESHOLD, + detectToolCallLoop, + getToolCallStats, + hashToolCall, + recordToolCall, +} from "./tool-loop-detection.js"; + +describe("tool-loop-detection", () => { + describe("hashToolCall", () => { + it("creates consistent hash for same tool and params", () => { + const hash1 = hashToolCall("read", { path: "/file.txt" }); + const hash2 = hashToolCall("read", { path: "/file.txt" }); + expect(hash1).toBe(hash2); + }); + + it("creates different hashes for different params", () => { + const hash1 = hashToolCall("read", { path: "/file1.txt" }); + const hash2 = hashToolCall("read", { path: "/file2.txt" }); + expect(hash1).not.toBe(hash2); + }); + + it("creates different hashes for different tools", () => { + const hash1 = hashToolCall("read", { path: "/file.txt" }); + const hash2 = hashToolCall("write", { path: "/file.txt" }); + expect(hash1).not.toBe(hash2); + }); + + it("handles non-object params", () => { + expect(() => hashToolCall("tool", "string-param")).not.toThrow(); + expect(() => hashToolCall("tool", 123)).not.toThrow(); + expect(() => hashToolCall("tool", null)).not.toThrow(); + }); + + it("produces deterministic hashes regardless of key order", () => { + const hash1 = hashToolCall("tool", { a: 1, b: 2 }); + const hash2 = hashToolCall("tool", { b: 2, a: 1 }); + expect(hash1).toBe(hash2); + }); + }); + + describe("recordToolCall", () => { + it("adds tool call to empty history", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + recordToolCall(state, "read", { path: "/file.txt" }); + + expect(state.toolCallHistory).toHaveLength(1); + expect(state.toolCallHistory?.[0]?.toolName).toBe("read"); + }); + + it("maintains sliding window of last N calls", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + // Record more than TOOL_CALL_HISTORY_SIZE calls + for (let i = 0; i < TOOL_CALL_HISTORY_SIZE + 10; i++) { + recordToolCall(state, "tool", { iteration: i }); + } + + expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE); + + // Oldest calls should be removed + const oldestCall = state.toolCallHistory?.[0]; + expect(oldestCall?.argsHash).toContain("iteration"); + expect(oldestCall?.argsHash).not.toContain('"iteration":0'); // Call 0 should be evicted + }); + + it("records timestamp for each call", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + const before = Date.now(); + recordToolCall(state, "tool", { arg: 1 }); + const after = Date.now(); + + const timestamp = state.toolCallHistory?.[0]?.timestamp ?? 0; + expect(timestamp).toBeGreaterThanOrEqual(before); + expect(timestamp).toBeLessThanOrEqual(after); + }); + }); + + describe("detectToolCallLoop", () => { + it("does not flag unique tool calls", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + // Record 15 unique calls + for (let i = 0; i < 15; i++) { + recordToolCall(state, "read", { path: `/file${i}.txt` }); + } + + const result = detectToolCallLoop(state, "read", { path: "/new-file.txt" }); + expect(result.stuck).toBe(false); + }); + + it("returns warning after WARNING_THRESHOLD identical calls", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + // Record WARNING_THRESHOLD identical calls + for (let i = 0; i < WARNING_THRESHOLD; i++) { + recordToolCall(state, "command_status", { commandId: "cmd-123" }); + } + + const result = detectToolCallLoop(state, "command_status", { commandId: "cmd-123" }); + + expect(result.stuck).toBe(true); + if (result.stuck) { + expect(result.level).toBe("warning"); + expect(result.message).toContain("WARNING"); + expect(result.message).toContain("10 times"); + } + }); + + it("returns critical after CRITICAL_THRESHOLD identical calls", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + // Record CRITICAL_THRESHOLD identical calls + for (let i = 0; i < CRITICAL_THRESHOLD; i++) { + recordToolCall(state, "command_status", { commandId: "cmd-123" }); + } + + const result = detectToolCallLoop(state, "command_status", { commandId: "cmd-123" }); + + expect(result.stuck).toBe(true); + if (result.stuck) { + expect(result.level).toBe("critical"); + expect(result.message).toContain("CRITICAL"); + expect(result.message).toContain("20 times"); + } + }); + + it("only counts identical tool+params combinations", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + // Mix different calls - only 5 identical calls for cmd-123 + for (let i = 0; i < 5; i++) { + recordToolCall(state, "command_status", { commandId: "cmd-123" }); + recordToolCall(state, "command_status", { commandId: "cmd-456" }); // Different params + recordToolCall(state, "read", { path: "/file.txt" }); // Different tool + } + + const result = detectToolCallLoop(state, "command_status", { commandId: "cmd-123" }); + + // Should be below warning threshold (only 5 cmd-123 calls in history) + expect(result.stuck).toBe(false); + }); + + it("handles empty history", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + const result = detectToolCallLoop(state, "tool", { arg: 1 }); + expect(result.stuck).toBe(false); + }); + }); + + describe("getToolCallStats", () => { + it("returns zero stats for empty history", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + const stats = getToolCallStats(state); + expect(stats.totalCalls).toBe(0); + expect(stats.uniquePatterns).toBe(0); + expect(stats.mostFrequent).toBeNull(); + }); + + it("counts total calls and unique patterns", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + // Add 5 identical calls + for (let i = 0; i < 5; i++) { + recordToolCall(state, "read", { path: "/file.txt" }); + } + + // Add 3 different unique calls + recordToolCall(state, "write", { path: "/output.txt" }); + recordToolCall(state, "list", { dir: "/home" }); + recordToolCall(state, "read", { path: "/other.txt" }); + + const stats = getToolCallStats(state); + expect(stats.totalCalls).toBe(8); + expect(stats.uniquePatterns).toBe(4); // read file.txt, write, list, read other.txt + }); + + it("identifies most frequent pattern", () => { + const state: SessionState = { + lastActivity: Date.now(), + state: "processing", + queueDepth: 0, + }; + + // Pattern 1: 3 calls + for (let i = 0; i < 3; i++) { + recordToolCall(state, "read", { path: "/file1.txt" }); + } + + // Pattern 2: 7 calls (most frequent) + for (let i = 0; i < 7; i++) { + recordToolCall(state, "read", { path: "/file2.txt" }); + } + + // Pattern 3: 2 calls + for (let i = 0; i < 2; i++) { + recordToolCall(state, "write", { path: "/output.txt" }); + } + + const stats = getToolCallStats(state); + expect(stats.mostFrequent?.toolName).toBe("read"); + expect(stats.mostFrequent?.count).toBe(7); + }); + }); +}); diff --git a/src/agents/tool-loop-detection.ts b/src/agents/tool-loop-detection.ts new file mode 100644 index 00000000000..32cccb983c0 --- /dev/null +++ b/src/agents/tool-loop-detection.ts @@ -0,0 +1,123 @@ +import type { SessionState } from "../logging/diagnostic-session-state.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; + +const log = createSubsystemLogger("agents/loop-detection"); + +export type LoopDetectionResult = + | { stuck: false } + | { stuck: true; level: "warning" | "critical"; message: string }; + +export const TOOL_CALL_HISTORY_SIZE = 30; +export const WARNING_THRESHOLD = 10; +export const CRITICAL_THRESHOLD = 20; + +/** + * Hash a tool call for pattern matching. + * Uses tool name + deterministic JSON stringification of params. + */ +export function hashToolCall(toolName: string, params: unknown): string { + try { + const paramsStr = + typeof params === "object" && params !== null + ? JSON.stringify(params, Object.keys(params).toSorted()) + : String(params); + return `${toolName}:${paramsStr}`; + } catch { + return `${toolName}:${String(params)}`; + } +} + +/** + * Detect if an agent is stuck in a repetitive tool call loop. + * Checks if the same tool+params combination has been called excessively. + */ +export function detectToolCallLoop( + state: SessionState, + toolName: string, + params: unknown, +): LoopDetectionResult { + const history = state.toolCallHistory ?? []; + const currentHash = hashToolCall(toolName, params); + + // Count occurrences of this exact call in recent history + const recentCount = history.filter( + (h) => h.toolName === toolName && h.argsHash === currentHash, + ).length; + + if (recentCount >= CRITICAL_THRESHOLD) { + log.error( + `Critical loop detected: ${toolName} called ${recentCount} times with identical arguments`, + ); + return { + stuck: true, + level: "critical", + message: `CRITICAL: Called ${toolName} with identical arguments ${recentCount} times. This appears to be a stuck polling loop. Session execution blocked to prevent resource waste.`, + }; + } + + if (recentCount >= WARNING_THRESHOLD) { + log.warn(`Loop warning: ${toolName} called ${recentCount} times with identical arguments`); + return { + stuck: true, + level: "warning", + message: `WARNING: You have called ${toolName} ${recentCount} times with identical arguments and no progress. Stop polling and either (1) increase wait time between checks, or (2) report the task as failed if the process is stuck.`, + }; + } + + return { stuck: false }; +} + +/** + * Record a tool call in the session's history for loop detection. + * Maintains sliding window of last N calls. + */ +export function recordToolCall(state: SessionState, toolName: string, params: unknown): void { + if (!state.toolCallHistory) { + state.toolCallHistory = []; + } + + state.toolCallHistory.push({ + toolName, + argsHash: hashToolCall(toolName, params), + timestamp: Date.now(), + }); + + if (state.toolCallHistory.length > TOOL_CALL_HISTORY_SIZE) { + state.toolCallHistory.shift(); + } +} + +/** + * Get current tool call statistics for a session (for debugging/monitoring). + */ +export function getToolCallStats(state: SessionState): { + totalCalls: number; + uniquePatterns: number; + mostFrequent: { toolName: string; count: number } | null; +} { + const history = state.toolCallHistory ?? []; + const patterns = new Map(); + + for (const call of history) { + const key = call.argsHash; + const existing = patterns.get(key); + if (existing) { + existing.count += 1; + } else { + patterns.set(key, { toolName: call.toolName, count: 1 }); + } + } + + let mostFrequent: { toolName: string; count: number } | null = null; + for (const pattern of patterns.values()) { + if (!mostFrequent || pattern.count > mostFrequent.count) { + mostFrequent = pattern; + } + } + + return { + totalCalls: history.length, + uniquePatterns: patterns.size, + mostFrequent, + }; +} diff --git a/src/logging/diagnostic-session-state.ts b/src/logging/diagnostic-session-state.ts index aa5224550b8..69af560a537 100644 --- a/src/logging/diagnostic-session-state.ts +++ b/src/logging/diagnostic-session-state.ts @@ -6,6 +6,14 @@ export type SessionState = { lastActivity: number; state: SessionStateValue; queueDepth: number; + toolCallHistory?: ToolCallRecord[]; + commandPollCounts?: Map; +}; + +export type ToolCallRecord = { + toolName: string; + argsHash: string; + timestamp: number; }; export type SessionRef = { diff --git a/src/logging/diagnostic.ts b/src/logging/diagnostic.ts index 73a3ac8938b..e8c0dc2ca1e 100644 --- a/src/logging/diagnostic.ts +++ b/src/logging/diagnostic.ts @@ -314,6 +314,14 @@ export function startDiagnosticHeartbeat() { queued: totalQueued, }); + import("../agents/command-poll-backoff.js") + .then(({ pruneStaleCommandPolls }) => { + for (const [, state] of diagnosticSessionStates) { + pruneStaleCommandPolls(state); + } + }) + .catch(() => {}); + for (const [, state] of diagnosticSessionStates) { const ageMs = now - state.lastActivity; if (state.state === "processing" && ageMs > 120_000) {