fix: block ping-pong loops at critical threshold

2026-02-16 14:29:20 -05:00 · 2026-02-16 14:29:20 -05:00 · de3f446b92
parent 7485f3cdda
commit de3f446b92
4 changed files with 99 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -20,7 +20,7 @@ Docs: https://docs.openclaw.ai
 - Gateway/Config: prevent `config.patch` object-array merges from falling back to full-array replacement when some patch entries lack `id`, so partial `agents.list` updates no longer drop unrelated agents. (#17989) Thanks @stakeswky.
 - Config/Discord: require string IDs in Discord allowlists, keep onboarding inputs string-only, and add doctor repair for numeric entries. (#18220) Thanks @thewilloftheshadow.
 - Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost.
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only (including ping-pong alternation warnings), adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc.
+- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, warning on generic identical-call repeats, warning + blocking ping-pong alternation loops (10/20), adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc.
 - Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus.
 - Discord: optimize reaction notification handling to skip unnecessary message fetches in `off`/`all`/`allowlist` modes, streamline reaction routing, and improve reaction emoji formatting. (#18248) Thanks @thewilloftheshadow and @victorGPT.
 - Telegram: keep draft-stream preview replies attached to the user message for `replyToMode: "all"` in groups and DMs, preserving threaded reply context from preview through finalization. (#17880) Thanks @yinghaosang.
--- a/src/agents/pi-tools.before-tool-call.test.ts
+++ b/src/agents/pi-tools.before-tool-call.test.ts
@ -165,6 +165,66 @@ describe("before_tool_call loop detection behavior", () => {
    }
  });

+  it("blocks ping-pong loops at critical threshold and emits critical diagnostic events", async () => {
+    const emitted: DiagnosticToolLoopEvent[] = [];
+    const stop = onDiagnosticEvent((evt) => {
+      if (evt.type === "tool.loop") {
+        emitted.push(evt);
+      }
+    });
+    try {
+      const readExecute = vi.fn().mockResolvedValue({
+        content: [{ type: "text", text: "read ok" }],
+        details: { ok: true },
+      });
+      const listExecute = vi.fn().mockResolvedValue({
+        content: [{ type: "text", text: "list ok" }],
+        details: { ok: true },
+      });
+      const readTool = wrapToolWithBeforeToolCallHook(
+        { name: "read", execute: readExecute } as unknown as AnyAgentTool,
+        {
+          agentId: "main",
+          sessionKey: "main",
+        },
+      );
+      const listTool = wrapToolWithBeforeToolCallHook(
+        { name: "list", execute: listExecute } as unknown as AnyAgentTool,
+        {
+          agentId: "main",
+          sessionKey: "main",
+        },
+      );
+
+      for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
+        if (i % 2 === 0) {
+          await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined);
+        } else {
+          await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined);
+        }
+      }
+
+      await expect(
+        listTool.execute(
+          `list-${CRITICAL_THRESHOLD - 1}`,
+          { dir: "/workspace" },
+          undefined,
+          undefined,
+        ),
+      ).rejects.toThrow("CRITICAL");
+
+      const loopEvent = emitted.at(-1);
+      expect(loopEvent?.type).toBe("tool.loop");
+      expect(loopEvent?.level).toBe("critical");
+      expect(loopEvent?.action).toBe("block");
+      expect(loopEvent?.detector).toBe("ping_pong");
+      expect(loopEvent?.count).toBe(CRITICAL_THRESHOLD);
+      expect(loopEvent?.toolName).toBe("list");
+    } finally {
+      stop();
+    }
+  });
+
  it("emits structured critical diagnostic events when blocking loops", async () => {
    const emitted: DiagnosticToolLoopEvent[] = [];
    const stop = onDiagnosticEvent((evt) => {
--- a/src/agents/tool-loop-detection.test.ts
+++ b/src/agents/tool-loop-detection.test.ts
@ -264,6 +264,30 @@ describe("tool-loop-detection", () => {
      }
    });

+    it("blocks ping-pong alternating patterns at critical threshold", () => {
+      const state = createState();
+      const readParams = { path: "/a.txt" };
+      const listParams = { dir: "/workspace" };
+
+      for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
+        if (i % 2 === 0) {
+          recordToolCall(state, "read", readParams, `read-${i}`);
+        } else {
+          recordToolCall(state, "list", listParams, `list-${i}`);
+        }
+      }
+
+      const loopResult = detectToolCallLoop(state, "list", listParams);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("critical");
+        expect(loopResult.detector).toBe("ping_pong");
+        expect(loopResult.count).toBe(CRITICAL_THRESHOLD);
+        expect(loopResult.message).toContain("CRITICAL");
+        expect(loopResult.message).toContain("ping-pong loop");
+      }
+    });
+
    it("does not flag ping-pong when alternation is broken", () => {
      const state = createState();
      recordToolCall(state, "read", { path: "/a.txt" }, "a1");
--- a/src/agents/tool-loop-detection.ts
+++ b/src/agents/tool-loop-detection.ts
@ -292,6 +292,20 @@ export function detectToolCallLoop(
    };
  }

+  if (pingPong.count >= CRITICAL_THRESHOLD) {
+    log.error(
+      `Critical ping-pong loop detected: alternating calls count=${pingPong.count} currentTool=${toolName}`,
+    );
+    return {
+      stuck: true,
+      level: "critical",
+      detector: "ping_pong",
+      count: pingPong.count,
+      message: `CRITICAL: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls). This appears to be a stuck ping-pong loop. Session execution blocked to prevent resource waste.`,
+      pairedToolName: pingPong.pairedToolName,
+    };
+  }
+
  if (pingPong.count >= WARNING_THRESHOLD) {
    log.warn(
      `Ping-pong loop warning: alternating calls count=${pingPong.count} currentTool=${toolName}`,