feat: add stuck loop detection and exponential backoff infrastructure for agent polling

This commit is contained in:
SK Akram 2026-02-15 12:28:46 +00:00 committed by Gustavo Madeira Santana
parent 1f99d82712
commit 40939b29a2
7 changed files with 673 additions and 0 deletions

View File

@ -0,0 +1,173 @@
import { describe, expect, it } from "vitest";
import type { SessionState } from "../logging/diagnostic-session-state.js";
import {
calculateBackoffMs,
getCommandPollSuggestion,
pruneStaleCommandPolls,
recordCommandPoll,
resetCommandPollCount,
} from "./command-poll-backoff.js";
describe("command-poll-backoff", () => {
describe("calculateBackoffMs", () => {
it("returns 5s for first poll", () => {
expect(calculateBackoffMs(0)).toBe(5000);
});
it("returns 10s for second poll", () => {
expect(calculateBackoffMs(1)).toBe(10000);
});
it("returns 30s for third poll", () => {
expect(calculateBackoffMs(2)).toBe(30000);
});
it("returns 60s for fourth and subsequent polls (capped)", () => {
expect(calculateBackoffMs(3)).toBe(60000);
expect(calculateBackoffMs(4)).toBe(60000);
expect(calculateBackoffMs(10)).toBe(60000);
expect(calculateBackoffMs(100)).toBe(60000);
});
});
describe("recordCommandPoll", () => {
it("returns 5s on first no-output poll", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
const retryMs = recordCommandPoll(state, "cmd-123", false);
expect(retryMs).toBe(5000);
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(0); // First poll = index 0
});
it("increments count and increases backoff on consecutive no-output polls", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
expect(recordCommandPoll(state, "cmd-123", false)).toBe(5000); // count=0 -> 5s
expect(recordCommandPoll(state, "cmd-123", false)).toBe(10000); // count=1 -> 10s
expect(recordCommandPoll(state, "cmd-123", false)).toBe(30000); // count=2 -> 30s
expect(recordCommandPoll(state, "cmd-123", false)).toBe(60000); // count=3 -> 60s
expect(recordCommandPoll(state, "cmd-123", false)).toBe(60000); // count=4 -> 60s (capped)
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(4); // 5 polls = index 4
});
it("resets count when poll returns new output", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
recordCommandPoll(state, "cmd-123", false);
recordCommandPoll(state, "cmd-123", false);
recordCommandPoll(state, "cmd-123", false);
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(2); // 3 polls = index 2
// New output resets count
const retryMs = recordCommandPoll(state, "cmd-123", true);
expect(retryMs).toBe(5000); // Back to first poll delay
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(0);
});
it("tracks different commands independently", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
recordCommandPoll(state, "cmd-1", false);
recordCommandPoll(state, "cmd-1", false);
recordCommandPoll(state, "cmd-2", false);
expect(state.commandPollCounts?.get("cmd-1")?.count).toBe(1); // 2 polls = index 1
expect(state.commandPollCounts?.get("cmd-2")?.count).toBe(0); // 1 poll = index 0
});
});
describe("getCommandPollSuggestion", () => {
it("returns undefined for untracked command", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
expect(getCommandPollSuggestion(state, "unknown")).toBeUndefined();
});
it("returns current backoff for tracked command", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
recordCommandPoll(state, "cmd-123", false);
recordCommandPoll(state, "cmd-123", false);
expect(getCommandPollSuggestion(state, "cmd-123")).toBe(10000);
});
});
describe("resetCommandPollCount", () => {
it("removes command from tracking", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
recordCommandPoll(state, "cmd-123", false);
expect(state.commandPollCounts?.has("cmd-123")).toBe(true);
resetCommandPollCount(state, "cmd-123");
expect(state.commandPollCounts?.has("cmd-123")).toBe(false);
});
it("is safe to call on untracked command", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
expect(() => resetCommandPollCount(state, "unknown")).not.toThrow();
});
});
describe("pruneStaleCommandPolls", () => {
it("removes polls older than maxAge", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
commandPollCounts: new Map([
["cmd-old", { count: 5, lastPollAt: Date.now() - 7200000 }], // 2 hours ago
["cmd-new", { count: 3, lastPollAt: Date.now() - 1000 }], // 1 second ago
]),
};
pruneStaleCommandPolls(state, 3600000); // 1 hour max age
expect(state.commandPollCounts?.has("cmd-old")).toBe(false);
expect(state.commandPollCounts?.has("cmd-new")).toBe(true);
});
it("handles empty state gracefully", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "idle",
queueDepth: 0,
};
expect(() => pruneStaleCommandPolls(state)).not.toThrow();
});
});
});

View File

@ -0,0 +1,82 @@
import type { SessionState } from "../logging/diagnostic-session-state.js";
// Exponential backoff schedule for command polling
const BACKOFF_SCHEDULE_MS = [5000, 10000, 30000, 60000];
/**
* Calculate suggested retry delay based on consecutive no-output poll count.
* Implements exponential backoff schedule: 5s 10s 30s 60s (capped).
*/
export function calculateBackoffMs(consecutiveNoOutputPolls: number): number {
const index = Math.min(consecutiveNoOutputPolls, BACKOFF_SCHEDULE_MS.length - 1);
return BACKOFF_SCHEDULE_MS[index] ?? 60000;
}
/**
* Record a command poll and return suggested retry delay.
* @param state Session state to track polling in
* @param commandId Unique identifier for the command being polled
* @param hasNewOutput Whether this poll returned new output
* @returns Suggested delay in milliseconds before next poll
*/
export function recordCommandPoll(
state: SessionState,
commandId: string,
hasNewOutput: boolean,
): number {
if (!state.commandPollCounts) {
state.commandPollCounts = new Map();
}
const existing = state.commandPollCounts.get(commandId);
const now = Date.now();
if (hasNewOutput) {
state.commandPollCounts.set(commandId, { count: 0, lastPollAt: now });
return BACKOFF_SCHEDULE_MS[0] ?? 5000;
}
const newCount = (existing?.count ?? -1) + 1;
state.commandPollCounts.set(commandId, { count: newCount, lastPollAt: now });
return calculateBackoffMs(newCount);
}
/**
* Get current suggested backoff for a command without modifying state.
* Useful for checking current backoff level.
*/
export function getCommandPollSuggestion(
state: SessionState,
commandId: string,
): number | undefined {
const pollData = state.commandPollCounts?.get(commandId);
if (!pollData) {
return undefined;
}
return calculateBackoffMs(pollData.count);
}
/**
* Reset poll count for a command (e.g., when command completes).
*/
export function resetCommandPollCount(state: SessionState, commandId: string): void {
state.commandPollCounts?.delete(commandId);
}
/**
* Prune stale command poll records (older than 1 hour).
* Call periodically to prevent memory bloat.
*/
export function pruneStaleCommandPolls(state: SessionState, maxAgeMs = 3600000): void {
if (!state.commandPollCounts) {
return;
}
const now = Date.now();
for (const [commandId, data] of state.commandPollCounts.entries()) {
if (now - data.lastPollAt > maxAgeMs) {
state.commandPollCounts.delete(commandId);
}
}
}

View File

@ -25,6 +25,32 @@ export async function runBeforeToolCallHook(args: {
const toolName = normalizeToolName(args.toolName || "tool");
const params = args.params;
if (args.ctx?.sessionKey) {
const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js");
const { detectToolCallLoop, recordToolCall } = await import("./tool-loop-detection.js");
const sessionState = getDiagnosticSessionState({
sessionKey: args.ctx.sessionKey,
sessionId: args.ctx?.agentId,
});
const loopResult = detectToolCallLoop(sessionState, toolName, params);
if (loopResult.stuck) {
if (loopResult.level === "critical") {
log.error(`Blocking ${toolName} due to critical loop: ${loopResult.message}`);
return {
blocked: true,
reason: loopResult.message,
};
} else {
log.warn(`Loop warning for ${toolName}: ${loopResult.message}`);
}
}
recordToolCall(sessionState, toolName, params);
}
const hookRunner = getGlobalHookRunner();
if (!hookRunner?.hasHooks("before_tool_call")) {
return { blocked: false, params: args.params };

View File

@ -0,0 +1,253 @@
import { describe, expect, it } from "vitest";
import type { SessionState } from "../logging/diagnostic-session-state.js";
import {
CRITICAL_THRESHOLD,
TOOL_CALL_HISTORY_SIZE,
WARNING_THRESHOLD,
detectToolCallLoop,
getToolCallStats,
hashToolCall,
recordToolCall,
} from "./tool-loop-detection.js";
describe("tool-loop-detection", () => {
describe("hashToolCall", () => {
it("creates consistent hash for same tool and params", () => {
const hash1 = hashToolCall("read", { path: "/file.txt" });
const hash2 = hashToolCall("read", { path: "/file.txt" });
expect(hash1).toBe(hash2);
});
it("creates different hashes for different params", () => {
const hash1 = hashToolCall("read", { path: "/file1.txt" });
const hash2 = hashToolCall("read", { path: "/file2.txt" });
expect(hash1).not.toBe(hash2);
});
it("creates different hashes for different tools", () => {
const hash1 = hashToolCall("read", { path: "/file.txt" });
const hash2 = hashToolCall("write", { path: "/file.txt" });
expect(hash1).not.toBe(hash2);
});
it("handles non-object params", () => {
expect(() => hashToolCall("tool", "string-param")).not.toThrow();
expect(() => hashToolCall("tool", 123)).not.toThrow();
expect(() => hashToolCall("tool", null)).not.toThrow();
});
it("produces deterministic hashes regardless of key order", () => {
const hash1 = hashToolCall("tool", { a: 1, b: 2 });
const hash2 = hashToolCall("tool", { b: 2, a: 1 });
expect(hash1).toBe(hash2);
});
});
describe("recordToolCall", () => {
it("adds tool call to empty history", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
recordToolCall(state, "read", { path: "/file.txt" });
expect(state.toolCallHistory).toHaveLength(1);
expect(state.toolCallHistory?.[0]?.toolName).toBe("read");
});
it("maintains sliding window of last N calls", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
// Record more than TOOL_CALL_HISTORY_SIZE calls
for (let i = 0; i < TOOL_CALL_HISTORY_SIZE + 10; i++) {
recordToolCall(state, "tool", { iteration: i });
}
expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE);
// Oldest calls should be removed
const oldestCall = state.toolCallHistory?.[0];
expect(oldestCall?.argsHash).toContain("iteration");
expect(oldestCall?.argsHash).not.toContain('"iteration":0'); // Call 0 should be evicted
});
it("records timestamp for each call", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
const before = Date.now();
recordToolCall(state, "tool", { arg: 1 });
const after = Date.now();
const timestamp = state.toolCallHistory?.[0]?.timestamp ?? 0;
expect(timestamp).toBeGreaterThanOrEqual(before);
expect(timestamp).toBeLessThanOrEqual(after);
});
});
describe("detectToolCallLoop", () => {
it("does not flag unique tool calls", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
// Record 15 unique calls
for (let i = 0; i < 15; i++) {
recordToolCall(state, "read", { path: `/file${i}.txt` });
}
const result = detectToolCallLoop(state, "read", { path: "/new-file.txt" });
expect(result.stuck).toBe(false);
});
it("returns warning after WARNING_THRESHOLD identical calls", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
// Record WARNING_THRESHOLD identical calls
for (let i = 0; i < WARNING_THRESHOLD; i++) {
recordToolCall(state, "command_status", { commandId: "cmd-123" });
}
const result = detectToolCallLoop(state, "command_status", { commandId: "cmd-123" });
expect(result.stuck).toBe(true);
if (result.stuck) {
expect(result.level).toBe("warning");
expect(result.message).toContain("WARNING");
expect(result.message).toContain("10 times");
}
});
it("returns critical after CRITICAL_THRESHOLD identical calls", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
// Record CRITICAL_THRESHOLD identical calls
for (let i = 0; i < CRITICAL_THRESHOLD; i++) {
recordToolCall(state, "command_status", { commandId: "cmd-123" });
}
const result = detectToolCallLoop(state, "command_status", { commandId: "cmd-123" });
expect(result.stuck).toBe(true);
if (result.stuck) {
expect(result.level).toBe("critical");
expect(result.message).toContain("CRITICAL");
expect(result.message).toContain("20 times");
}
});
it("only counts identical tool+params combinations", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
// Mix different calls - only 5 identical calls for cmd-123
for (let i = 0; i < 5; i++) {
recordToolCall(state, "command_status", { commandId: "cmd-123" });
recordToolCall(state, "command_status", { commandId: "cmd-456" }); // Different params
recordToolCall(state, "read", { path: "/file.txt" }); // Different tool
}
const result = detectToolCallLoop(state, "command_status", { commandId: "cmd-123" });
// Should be below warning threshold (only 5 cmd-123 calls in history)
expect(result.stuck).toBe(false);
});
it("handles empty history", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
const result = detectToolCallLoop(state, "tool", { arg: 1 });
expect(result.stuck).toBe(false);
});
});
describe("getToolCallStats", () => {
it("returns zero stats for empty history", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
const stats = getToolCallStats(state);
expect(stats.totalCalls).toBe(0);
expect(stats.uniquePatterns).toBe(0);
expect(stats.mostFrequent).toBeNull();
});
it("counts total calls and unique patterns", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
// Add 5 identical calls
for (let i = 0; i < 5; i++) {
recordToolCall(state, "read", { path: "/file.txt" });
}
// Add 3 different unique calls
recordToolCall(state, "write", { path: "/output.txt" });
recordToolCall(state, "list", { dir: "/home" });
recordToolCall(state, "read", { path: "/other.txt" });
const stats = getToolCallStats(state);
expect(stats.totalCalls).toBe(8);
expect(stats.uniquePatterns).toBe(4); // read file.txt, write, list, read other.txt
});
it("identifies most frequent pattern", () => {
const state: SessionState = {
lastActivity: Date.now(),
state: "processing",
queueDepth: 0,
};
// Pattern 1: 3 calls
for (let i = 0; i < 3; i++) {
recordToolCall(state, "read", { path: "/file1.txt" });
}
// Pattern 2: 7 calls (most frequent)
for (let i = 0; i < 7; i++) {
recordToolCall(state, "read", { path: "/file2.txt" });
}
// Pattern 3: 2 calls
for (let i = 0; i < 2; i++) {
recordToolCall(state, "write", { path: "/output.txt" });
}
const stats = getToolCallStats(state);
expect(stats.mostFrequent?.toolName).toBe("read");
expect(stats.mostFrequent?.count).toBe(7);
});
});
});

View File

@ -0,0 +1,123 @@
import type { SessionState } from "../logging/diagnostic-session-state.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
const log = createSubsystemLogger("agents/loop-detection");
export type LoopDetectionResult =
| { stuck: false }
| { stuck: true; level: "warning" | "critical"; message: string };
export const TOOL_CALL_HISTORY_SIZE = 30;
export const WARNING_THRESHOLD = 10;
export const CRITICAL_THRESHOLD = 20;
/**
* Hash a tool call for pattern matching.
* Uses tool name + deterministic JSON stringification of params.
*/
export function hashToolCall(toolName: string, params: unknown): string {
try {
const paramsStr =
typeof params === "object" && params !== null
? JSON.stringify(params, Object.keys(params).toSorted())
: String(params);
return `${toolName}:${paramsStr}`;
} catch {
return `${toolName}:${String(params)}`;
}
}
/**
* Detect if an agent is stuck in a repetitive tool call loop.
* Checks if the same tool+params combination has been called excessively.
*/
export function detectToolCallLoop(
state: SessionState,
toolName: string,
params: unknown,
): LoopDetectionResult {
const history = state.toolCallHistory ?? [];
const currentHash = hashToolCall(toolName, params);
// Count occurrences of this exact call in recent history
const recentCount = history.filter(
(h) => h.toolName === toolName && h.argsHash === currentHash,
).length;
if (recentCount >= CRITICAL_THRESHOLD) {
log.error(
`Critical loop detected: ${toolName} called ${recentCount} times with identical arguments`,
);
return {
stuck: true,
level: "critical",
message: `CRITICAL: Called ${toolName} with identical arguments ${recentCount} times. This appears to be a stuck polling loop. Session execution blocked to prevent resource waste.`,
};
}
if (recentCount >= WARNING_THRESHOLD) {
log.warn(`Loop warning: ${toolName} called ${recentCount} times with identical arguments`);
return {
stuck: true,
level: "warning",
message: `WARNING: You have called ${toolName} ${recentCount} times with identical arguments and no progress. Stop polling and either (1) increase wait time between checks, or (2) report the task as failed if the process is stuck.`,
};
}
return { stuck: false };
}
/**
* Record a tool call in the session's history for loop detection.
* Maintains sliding window of last N calls.
*/
export function recordToolCall(state: SessionState, toolName: string, params: unknown): void {
if (!state.toolCallHistory) {
state.toolCallHistory = [];
}
state.toolCallHistory.push({
toolName,
argsHash: hashToolCall(toolName, params),
timestamp: Date.now(),
});
if (state.toolCallHistory.length > TOOL_CALL_HISTORY_SIZE) {
state.toolCallHistory.shift();
}
}
/**
* Get current tool call statistics for a session (for debugging/monitoring).
*/
export function getToolCallStats(state: SessionState): {
totalCalls: number;
uniquePatterns: number;
mostFrequent: { toolName: string; count: number } | null;
} {
const history = state.toolCallHistory ?? [];
const patterns = new Map<string, { toolName: string; count: number }>();
for (const call of history) {
const key = call.argsHash;
const existing = patterns.get(key);
if (existing) {
existing.count += 1;
} else {
patterns.set(key, { toolName: call.toolName, count: 1 });
}
}
let mostFrequent: { toolName: string; count: number } | null = null;
for (const pattern of patterns.values()) {
if (!mostFrequent || pattern.count > mostFrequent.count) {
mostFrequent = pattern;
}
}
return {
totalCalls: history.length,
uniquePatterns: patterns.size,
mostFrequent,
};
}

View File

@ -6,6 +6,14 @@ export type SessionState = {
lastActivity: number;
state: SessionStateValue;
queueDepth: number;
toolCallHistory?: ToolCallRecord[];
commandPollCounts?: Map<string, { count: number; lastPollAt: number }>;
};
export type ToolCallRecord = {
toolName: string;
argsHash: string;
timestamp: number;
};
export type SessionRef = {

View File

@ -314,6 +314,14 @@ export function startDiagnosticHeartbeat() {
queued: totalQueued,
});
import("../agents/command-poll-backoff.js")
.then(({ pruneStaleCommandPolls }) => {
for (const [, state] of diagnosticSessionStates) {
pruneStaleCommandPolls(state);
}
})
.catch(() => {});
for (const [, state] of diagnosticSessionStates) {
const ageMs = now - state.lastActivity;
if (state.state === "processing" && ageMs > 120_000) {