mirror of https://github.com/openclaw/openclaw.git
feat: add stuck loop detection and exponential backoff infrastructure for agent polling
This commit is contained in:
parent
1f99d82712
commit
40939b29a2
|
|
@ -0,0 +1,173 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
import {
|
||||
calculateBackoffMs,
|
||||
getCommandPollSuggestion,
|
||||
pruneStaleCommandPolls,
|
||||
recordCommandPoll,
|
||||
resetCommandPollCount,
|
||||
} from "./command-poll-backoff.js";
|
||||
|
||||
describe("command-poll-backoff", () => {
|
||||
describe("calculateBackoffMs", () => {
|
||||
it("returns 5s for first poll", () => {
|
||||
expect(calculateBackoffMs(0)).toBe(5000);
|
||||
});
|
||||
|
||||
it("returns 10s for second poll", () => {
|
||||
expect(calculateBackoffMs(1)).toBe(10000);
|
||||
});
|
||||
|
||||
it("returns 30s for third poll", () => {
|
||||
expect(calculateBackoffMs(2)).toBe(30000);
|
||||
});
|
||||
|
||||
it("returns 60s for fourth and subsequent polls (capped)", () => {
|
||||
expect(calculateBackoffMs(3)).toBe(60000);
|
||||
expect(calculateBackoffMs(4)).toBe(60000);
|
||||
expect(calculateBackoffMs(10)).toBe(60000);
|
||||
expect(calculateBackoffMs(100)).toBe(60000);
|
||||
});
|
||||
});
|
||||
|
||||
describe("recordCommandPoll", () => {
|
||||
it("returns 5s on first no-output poll", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
const retryMs = recordCommandPoll(state, "cmd-123", false);
|
||||
expect(retryMs).toBe(5000);
|
||||
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(0); // First poll = index 0
|
||||
});
|
||||
|
||||
it("increments count and increases backoff on consecutive no-output polls", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(5000); // count=0 -> 5s
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(10000); // count=1 -> 10s
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(30000); // count=2 -> 30s
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(60000); // count=3 -> 60s
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(60000); // count=4 -> 60s (capped)
|
||||
|
||||
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(4); // 5 polls = index 4
|
||||
});
|
||||
|
||||
it("resets count when poll returns new output", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(2); // 3 polls = index 2
|
||||
|
||||
// New output resets count
|
||||
const retryMs = recordCommandPoll(state, "cmd-123", true);
|
||||
expect(retryMs).toBe(5000); // Back to first poll delay
|
||||
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(0);
|
||||
});
|
||||
|
||||
it("tracks different commands independently", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
recordCommandPoll(state, "cmd-1", false);
|
||||
recordCommandPoll(state, "cmd-1", false);
|
||||
recordCommandPoll(state, "cmd-2", false);
|
||||
|
||||
expect(state.commandPollCounts?.get("cmd-1")?.count).toBe(1); // 2 polls = index 1
|
||||
expect(state.commandPollCounts?.get("cmd-2")?.count).toBe(0); // 1 poll = index 0
|
||||
});
|
||||
});
|
||||
|
||||
describe("getCommandPollSuggestion", () => {
|
||||
it("returns undefined for untracked command", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
expect(getCommandPollSuggestion(state, "unknown")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns current backoff for tracked command", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
|
||||
expect(getCommandPollSuggestion(state, "cmd-123")).toBe(10000);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resetCommandPollCount", () => {
|
||||
it("removes command from tracking", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
expect(state.commandPollCounts?.has("cmd-123")).toBe(true);
|
||||
|
||||
resetCommandPollCount(state, "cmd-123");
|
||||
expect(state.commandPollCounts?.has("cmd-123")).toBe(false);
|
||||
});
|
||||
|
||||
it("is safe to call on untracked command", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
expect(() => resetCommandPollCount(state, "unknown")).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("pruneStaleCommandPolls", () => {
|
||||
it("removes polls older than maxAge", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
commandPollCounts: new Map([
|
||||
["cmd-old", { count: 5, lastPollAt: Date.now() - 7200000 }], // 2 hours ago
|
||||
["cmd-new", { count: 3, lastPollAt: Date.now() - 1000 }], // 1 second ago
|
||||
]),
|
||||
};
|
||||
|
||||
pruneStaleCommandPolls(state, 3600000); // 1 hour max age
|
||||
|
||||
expect(state.commandPollCounts?.has("cmd-old")).toBe(false);
|
||||
expect(state.commandPollCounts?.has("cmd-new")).toBe(true);
|
||||
});
|
||||
|
||||
it("handles empty state gracefully", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "idle",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
expect(() => pruneStaleCommandPolls(state)).not.toThrow();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
|
||||
// Exponential backoff schedule for command polling
|
||||
const BACKOFF_SCHEDULE_MS = [5000, 10000, 30000, 60000];
|
||||
|
||||
/**
|
||||
* Calculate suggested retry delay based on consecutive no-output poll count.
|
||||
* Implements exponential backoff schedule: 5s → 10s → 30s → 60s (capped).
|
||||
*/
|
||||
export function calculateBackoffMs(consecutiveNoOutputPolls: number): number {
|
||||
const index = Math.min(consecutiveNoOutputPolls, BACKOFF_SCHEDULE_MS.length - 1);
|
||||
return BACKOFF_SCHEDULE_MS[index] ?? 60000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a command poll and return suggested retry delay.
|
||||
* @param state Session state to track polling in
|
||||
* @param commandId Unique identifier for the command being polled
|
||||
* @param hasNewOutput Whether this poll returned new output
|
||||
* @returns Suggested delay in milliseconds before next poll
|
||||
*/
|
||||
export function recordCommandPoll(
|
||||
state: SessionState,
|
||||
commandId: string,
|
||||
hasNewOutput: boolean,
|
||||
): number {
|
||||
if (!state.commandPollCounts) {
|
||||
state.commandPollCounts = new Map();
|
||||
}
|
||||
|
||||
const existing = state.commandPollCounts.get(commandId);
|
||||
const now = Date.now();
|
||||
|
||||
if (hasNewOutput) {
|
||||
state.commandPollCounts.set(commandId, { count: 0, lastPollAt: now });
|
||||
return BACKOFF_SCHEDULE_MS[0] ?? 5000;
|
||||
}
|
||||
|
||||
const newCount = (existing?.count ?? -1) + 1;
|
||||
state.commandPollCounts.set(commandId, { count: newCount, lastPollAt: now });
|
||||
|
||||
return calculateBackoffMs(newCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current suggested backoff for a command without modifying state.
|
||||
* Useful for checking current backoff level.
|
||||
*/
|
||||
export function getCommandPollSuggestion(
|
||||
state: SessionState,
|
||||
commandId: string,
|
||||
): number | undefined {
|
||||
const pollData = state.commandPollCounts?.get(commandId);
|
||||
if (!pollData) {
|
||||
return undefined;
|
||||
}
|
||||
return calculateBackoffMs(pollData.count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset poll count for a command (e.g., when command completes).
|
||||
*/
|
||||
export function resetCommandPollCount(state: SessionState, commandId: string): void {
|
||||
state.commandPollCounts?.delete(commandId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune stale command poll records (older than 1 hour).
|
||||
* Call periodically to prevent memory bloat.
|
||||
*/
|
||||
export function pruneStaleCommandPolls(state: SessionState, maxAgeMs = 3600000): void {
|
||||
if (!state.commandPollCounts) {
|
||||
return;
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
for (const [commandId, data] of state.commandPollCounts.entries()) {
|
||||
if (now - data.lastPollAt > maxAgeMs) {
|
||||
state.commandPollCounts.delete(commandId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -25,6 +25,32 @@ export async function runBeforeToolCallHook(args: {
|
|||
const toolName = normalizeToolName(args.toolName || "tool");
|
||||
const params = args.params;
|
||||
|
||||
if (args.ctx?.sessionKey) {
|
||||
const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js");
|
||||
const { detectToolCallLoop, recordToolCall } = await import("./tool-loop-detection.js");
|
||||
|
||||
const sessionState = getDiagnosticSessionState({
|
||||
sessionKey: args.ctx.sessionKey,
|
||||
sessionId: args.ctx?.agentId,
|
||||
});
|
||||
|
||||
const loopResult = detectToolCallLoop(sessionState, toolName, params);
|
||||
|
||||
if (loopResult.stuck) {
|
||||
if (loopResult.level === "critical") {
|
||||
log.error(`Blocking ${toolName} due to critical loop: ${loopResult.message}`);
|
||||
return {
|
||||
blocked: true,
|
||||
reason: loopResult.message,
|
||||
};
|
||||
} else {
|
||||
log.warn(`Loop warning for ${toolName}: ${loopResult.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
recordToolCall(sessionState, toolName, params);
|
||||
}
|
||||
|
||||
const hookRunner = getGlobalHookRunner();
|
||||
if (!hookRunner?.hasHooks("before_tool_call")) {
|
||||
return { blocked: false, params: args.params };
|
||||
|
|
|
|||
|
|
@ -0,0 +1,253 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
import {
|
||||
CRITICAL_THRESHOLD,
|
||||
TOOL_CALL_HISTORY_SIZE,
|
||||
WARNING_THRESHOLD,
|
||||
detectToolCallLoop,
|
||||
getToolCallStats,
|
||||
hashToolCall,
|
||||
recordToolCall,
|
||||
} from "./tool-loop-detection.js";
|
||||
|
||||
describe("tool-loop-detection", () => {
|
||||
describe("hashToolCall", () => {
|
||||
it("creates consistent hash for same tool and params", () => {
|
||||
const hash1 = hashToolCall("read", { path: "/file.txt" });
|
||||
const hash2 = hashToolCall("read", { path: "/file.txt" });
|
||||
expect(hash1).toBe(hash2);
|
||||
});
|
||||
|
||||
it("creates different hashes for different params", () => {
|
||||
const hash1 = hashToolCall("read", { path: "/file1.txt" });
|
||||
const hash2 = hashToolCall("read", { path: "/file2.txt" });
|
||||
expect(hash1).not.toBe(hash2);
|
||||
});
|
||||
|
||||
it("creates different hashes for different tools", () => {
|
||||
const hash1 = hashToolCall("read", { path: "/file.txt" });
|
||||
const hash2 = hashToolCall("write", { path: "/file.txt" });
|
||||
expect(hash1).not.toBe(hash2);
|
||||
});
|
||||
|
||||
it("handles non-object params", () => {
|
||||
expect(() => hashToolCall("tool", "string-param")).not.toThrow();
|
||||
expect(() => hashToolCall("tool", 123)).not.toThrow();
|
||||
expect(() => hashToolCall("tool", null)).not.toThrow();
|
||||
});
|
||||
|
||||
it("produces deterministic hashes regardless of key order", () => {
|
||||
const hash1 = hashToolCall("tool", { a: 1, b: 2 });
|
||||
const hash2 = hashToolCall("tool", { b: 2, a: 1 });
|
||||
expect(hash1).toBe(hash2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("recordToolCall", () => {
|
||||
it("adds tool call to empty history", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
recordToolCall(state, "read", { path: "/file.txt" });
|
||||
|
||||
expect(state.toolCallHistory).toHaveLength(1);
|
||||
expect(state.toolCallHistory?.[0]?.toolName).toBe("read");
|
||||
});
|
||||
|
||||
it("maintains sliding window of last N calls", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
// Record more than TOOL_CALL_HISTORY_SIZE calls
|
||||
for (let i = 0; i < TOOL_CALL_HISTORY_SIZE + 10; i++) {
|
||||
recordToolCall(state, "tool", { iteration: i });
|
||||
}
|
||||
|
||||
expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE);
|
||||
|
||||
// Oldest calls should be removed
|
||||
const oldestCall = state.toolCallHistory?.[0];
|
||||
expect(oldestCall?.argsHash).toContain("iteration");
|
||||
expect(oldestCall?.argsHash).not.toContain('"iteration":0'); // Call 0 should be evicted
|
||||
});
|
||||
|
||||
it("records timestamp for each call", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
const before = Date.now();
|
||||
recordToolCall(state, "tool", { arg: 1 });
|
||||
const after = Date.now();
|
||||
|
||||
const timestamp = state.toolCallHistory?.[0]?.timestamp ?? 0;
|
||||
expect(timestamp).toBeGreaterThanOrEqual(before);
|
||||
expect(timestamp).toBeLessThanOrEqual(after);
|
||||
});
|
||||
});
|
||||
|
||||
describe("detectToolCallLoop", () => {
|
||||
it("does not flag unique tool calls", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
// Record 15 unique calls
|
||||
for (let i = 0; i < 15; i++) {
|
||||
recordToolCall(state, "read", { path: `/file${i}.txt` });
|
||||
}
|
||||
|
||||
const result = detectToolCallLoop(state, "read", { path: "/new-file.txt" });
|
||||
expect(result.stuck).toBe(false);
|
||||
});
|
||||
|
||||
it("returns warning after WARNING_THRESHOLD identical calls", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
// Record WARNING_THRESHOLD identical calls
|
||||
for (let i = 0; i < WARNING_THRESHOLD; i++) {
|
||||
recordToolCall(state, "command_status", { commandId: "cmd-123" });
|
||||
}
|
||||
|
||||
const result = detectToolCallLoop(state, "command_status", { commandId: "cmd-123" });
|
||||
|
||||
expect(result.stuck).toBe(true);
|
||||
if (result.stuck) {
|
||||
expect(result.level).toBe("warning");
|
||||
expect(result.message).toContain("WARNING");
|
||||
expect(result.message).toContain("10 times");
|
||||
}
|
||||
});
|
||||
|
||||
it("returns critical after CRITICAL_THRESHOLD identical calls", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
// Record CRITICAL_THRESHOLD identical calls
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i++) {
|
||||
recordToolCall(state, "command_status", { commandId: "cmd-123" });
|
||||
}
|
||||
|
||||
const result = detectToolCallLoop(state, "command_status", { commandId: "cmd-123" });
|
||||
|
||||
expect(result.stuck).toBe(true);
|
||||
if (result.stuck) {
|
||||
expect(result.level).toBe("critical");
|
||||
expect(result.message).toContain("CRITICAL");
|
||||
expect(result.message).toContain("20 times");
|
||||
}
|
||||
});
|
||||
|
||||
it("only counts identical tool+params combinations", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
// Mix different calls - only 5 identical calls for cmd-123
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordToolCall(state, "command_status", { commandId: "cmd-123" });
|
||||
recordToolCall(state, "command_status", { commandId: "cmd-456" }); // Different params
|
||||
recordToolCall(state, "read", { path: "/file.txt" }); // Different tool
|
||||
}
|
||||
|
||||
const result = detectToolCallLoop(state, "command_status", { commandId: "cmd-123" });
|
||||
|
||||
// Should be below warning threshold (only 5 cmd-123 calls in history)
|
||||
expect(result.stuck).toBe(false);
|
||||
});
|
||||
|
||||
it("handles empty history", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
const result = detectToolCallLoop(state, "tool", { arg: 1 });
|
||||
expect(result.stuck).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getToolCallStats", () => {
|
||||
it("returns zero stats for empty history", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
const stats = getToolCallStats(state);
|
||||
expect(stats.totalCalls).toBe(0);
|
||||
expect(stats.uniquePatterns).toBe(0);
|
||||
expect(stats.mostFrequent).toBeNull();
|
||||
});
|
||||
|
||||
it("counts total calls and unique patterns", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
// Add 5 identical calls
|
||||
for (let i = 0; i < 5; i++) {
|
||||
recordToolCall(state, "read", { path: "/file.txt" });
|
||||
}
|
||||
|
||||
// Add 3 different unique calls
|
||||
recordToolCall(state, "write", { path: "/output.txt" });
|
||||
recordToolCall(state, "list", { dir: "/home" });
|
||||
recordToolCall(state, "read", { path: "/other.txt" });
|
||||
|
||||
const stats = getToolCallStats(state);
|
||||
expect(stats.totalCalls).toBe(8);
|
||||
expect(stats.uniquePatterns).toBe(4); // read file.txt, write, list, read other.txt
|
||||
});
|
||||
|
||||
it("identifies most frequent pattern", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
// Pattern 1: 3 calls
|
||||
for (let i = 0; i < 3; i++) {
|
||||
recordToolCall(state, "read", { path: "/file1.txt" });
|
||||
}
|
||||
|
||||
// Pattern 2: 7 calls (most frequent)
|
||||
for (let i = 0; i < 7; i++) {
|
||||
recordToolCall(state, "read", { path: "/file2.txt" });
|
||||
}
|
||||
|
||||
// Pattern 3: 2 calls
|
||||
for (let i = 0; i < 2; i++) {
|
||||
recordToolCall(state, "write", { path: "/output.txt" });
|
||||
}
|
||||
|
||||
const stats = getToolCallStats(state);
|
||||
expect(stats.mostFrequent?.toolName).toBe("read");
|
||||
expect(stats.mostFrequent?.count).toBe(7);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
|
||||
const log = createSubsystemLogger("agents/loop-detection");
|
||||
|
||||
export type LoopDetectionResult =
|
||||
| { stuck: false }
|
||||
| { stuck: true; level: "warning" | "critical"; message: string };
|
||||
|
||||
export const TOOL_CALL_HISTORY_SIZE = 30;
|
||||
export const WARNING_THRESHOLD = 10;
|
||||
export const CRITICAL_THRESHOLD = 20;
|
||||
|
||||
/**
|
||||
* Hash a tool call for pattern matching.
|
||||
* Uses tool name + deterministic JSON stringification of params.
|
||||
*/
|
||||
export function hashToolCall(toolName: string, params: unknown): string {
|
||||
try {
|
||||
const paramsStr =
|
||||
typeof params === "object" && params !== null
|
||||
? JSON.stringify(params, Object.keys(params).toSorted())
|
||||
: String(params);
|
||||
return `${toolName}:${paramsStr}`;
|
||||
} catch {
|
||||
return `${toolName}:${String(params)}`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an agent is stuck in a repetitive tool call loop.
|
||||
* Checks if the same tool+params combination has been called excessively.
|
||||
*/
|
||||
export function detectToolCallLoop(
|
||||
state: SessionState,
|
||||
toolName: string,
|
||||
params: unknown,
|
||||
): LoopDetectionResult {
|
||||
const history = state.toolCallHistory ?? [];
|
||||
const currentHash = hashToolCall(toolName, params);
|
||||
|
||||
// Count occurrences of this exact call in recent history
|
||||
const recentCount = history.filter(
|
||||
(h) => h.toolName === toolName && h.argsHash === currentHash,
|
||||
).length;
|
||||
|
||||
if (recentCount >= CRITICAL_THRESHOLD) {
|
||||
log.error(
|
||||
`Critical loop detected: ${toolName} called ${recentCount} times with identical arguments`,
|
||||
);
|
||||
return {
|
||||
stuck: true,
|
||||
level: "critical",
|
||||
message: `CRITICAL: Called ${toolName} with identical arguments ${recentCount} times. This appears to be a stuck polling loop. Session execution blocked to prevent resource waste.`,
|
||||
};
|
||||
}
|
||||
|
||||
if (recentCount >= WARNING_THRESHOLD) {
|
||||
log.warn(`Loop warning: ${toolName} called ${recentCount} times with identical arguments`);
|
||||
return {
|
||||
stuck: true,
|
||||
level: "warning",
|
||||
message: `WARNING: You have called ${toolName} ${recentCount} times with identical arguments and no progress. Stop polling and either (1) increase wait time between checks, or (2) report the task as failed if the process is stuck.`,
|
||||
};
|
||||
}
|
||||
|
||||
return { stuck: false };
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a tool call in the session's history for loop detection.
|
||||
* Maintains sliding window of last N calls.
|
||||
*/
|
||||
export function recordToolCall(state: SessionState, toolName: string, params: unknown): void {
|
||||
if (!state.toolCallHistory) {
|
||||
state.toolCallHistory = [];
|
||||
}
|
||||
|
||||
state.toolCallHistory.push({
|
||||
toolName,
|
||||
argsHash: hashToolCall(toolName, params),
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
|
||||
if (state.toolCallHistory.length > TOOL_CALL_HISTORY_SIZE) {
|
||||
state.toolCallHistory.shift();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current tool call statistics for a session (for debugging/monitoring).
|
||||
*/
|
||||
export function getToolCallStats(state: SessionState): {
|
||||
totalCalls: number;
|
||||
uniquePatterns: number;
|
||||
mostFrequent: { toolName: string; count: number } | null;
|
||||
} {
|
||||
const history = state.toolCallHistory ?? [];
|
||||
const patterns = new Map<string, { toolName: string; count: number }>();
|
||||
|
||||
for (const call of history) {
|
||||
const key = call.argsHash;
|
||||
const existing = patterns.get(key);
|
||||
if (existing) {
|
||||
existing.count += 1;
|
||||
} else {
|
||||
patterns.set(key, { toolName: call.toolName, count: 1 });
|
||||
}
|
||||
}
|
||||
|
||||
let mostFrequent: { toolName: string; count: number } | null = null;
|
||||
for (const pattern of patterns.values()) {
|
||||
if (!mostFrequent || pattern.count > mostFrequent.count) {
|
||||
mostFrequent = pattern;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
totalCalls: history.length,
|
||||
uniquePatterns: patterns.size,
|
||||
mostFrequent,
|
||||
};
|
||||
}
|
||||
|
|
@ -6,6 +6,14 @@ export type SessionState = {
|
|||
lastActivity: number;
|
||||
state: SessionStateValue;
|
||||
queueDepth: number;
|
||||
toolCallHistory?: ToolCallRecord[];
|
||||
commandPollCounts?: Map<string, { count: number; lastPollAt: number }>;
|
||||
};
|
||||
|
||||
export type ToolCallRecord = {
|
||||
toolName: string;
|
||||
argsHash: string;
|
||||
timestamp: number;
|
||||
};
|
||||
|
||||
export type SessionRef = {
|
||||
|
|
|
|||
|
|
@ -314,6 +314,14 @@ export function startDiagnosticHeartbeat() {
|
|||
queued: totalQueued,
|
||||
});
|
||||
|
||||
import("../agents/command-poll-backoff.js")
|
||||
.then(({ pruneStaleCommandPolls }) => {
|
||||
for (const [, state] of diagnosticSessionStates) {
|
||||
pruneStaleCommandPolls(state);
|
||||
}
|
||||
})
|
||||
.catch(() => {});
|
||||
|
||||
for (const [, state] of diagnosticSessionStates) {
|
||||
const ageMs = now - state.lastActivity;
|
||||
if (state.state === "processing" && ageMs > 120_000) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue