From bb2c010e078b8da21b3c6ef72bcf424117633aa6 Mon Sep 17 00:00:00 2001 From: Douglas Lardo Date: Sun, 29 Mar 2026 22:35:15 -0700 Subject: [PATCH] fix(delivery): treat Matrix "User not in room" as permanent delivery error (#57426) Merged via squash. Prepared head SHA: 6a777197cb69ccd2230e44a6bf743aa852e5bb7e Co-authored-by: dlardo <5000601+dlardo@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras --- CHANGELOG.md | 1 + src/infra/outbound/delivery-queue-recovery.ts | 1 + .../outbound/delivery-queue.policy.test.ts | 1 + .../outbound/delivery-queue.recovery.test.ts | 22 +++++++++++++++++++ 4 files changed, 25 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b69434bd04f..9fe268fc514 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -93,6 +93,7 @@ Docs: https://docs.openclaw.ai - Config/update: stop `openclaw doctor` write-backs from persisting plugin-injected channel defaults, so `openclaw update` no longer seeds config keys that later break service refresh validation. (#56834) Thanks @openperf. - Agents/Anthropic failover: treat Anthropic `api_error` payloads with `An unexpected error occurred while processing the response` as transient so retry/fallback can engage instead of surfacing a terminal failure. (#57441) Thanks @zijiess and @vincentkoc. - Agents/compaction: keep late compaction-retry rejections handled after the aggregate timeout path wins without swallowing real pre-timeout wait failures, so timed-out retries no longer surface an unhandled rejection on later unsubscribe. (#57451) Thanks @mpz4life and @vincentkoc. +- Matrix/delivery recovery: treat Synapse `User not in room` replay failures as permanent during startup recovery so poisoned queued messages move to `failed/` instead of crash-looping Matrix after restart. (#57426) thanks @dlardo. ## 2026.3.28 diff --git a/src/infra/outbound/delivery-queue-recovery.ts b/src/infra/outbound/delivery-queue-recovery.ts index 6649cd8684d..26057f03e58 100644 --- a/src/infra/outbound/delivery-queue-recovery.ts +++ b/src/infra/outbound/delivery-queue-recovery.ts @@ -50,6 +50,7 @@ const PERMANENT_ERROR_PATTERNS: readonly RegExp[] = [ /recipient is not a valid/i, /outbound not configured for channel/i, /ambiguous discord recipient/i, + /User .* not in room/i, ]; function createEmptyRecoverySummary(): RecoverySummary { diff --git a/src/infra/outbound/delivery-queue.policy.test.ts b/src/infra/outbound/delivery-queue.policy.test.ts index 5924b20d611..a2da4aa7470 100644 --- a/src/infra/outbound/delivery-queue.policy.test.ts +++ b/src/infra/outbound/delivery-queue.policy.test.ts @@ -16,6 +16,7 @@ describe("delivery-queue policy", () => { "Forbidden: bot was kicked from the group chat", "chat_id is empty", "Outbound not configured for channel: demo-channel", + "MatrixError: [403] User @bot:matrix.example.com not in room !mixedCase:matrix.example.com", ])("returns true for permanent error: %s", (msg) => { expect(isPermanentDeliveryError(msg)).toBe(true); }); diff --git a/src/infra/outbound/delivery-queue.recovery.test.ts b/src/infra/outbound/delivery-queue.recovery.test.ts index f81ace05328..a8dbc8b7617 100644 --- a/src/infra/outbound/delivery-queue.recovery.test.ts +++ b/src/infra/outbound/delivery-queue.recovery.test.ts @@ -116,6 +116,28 @@ describe("delivery-queue recovery", () => { expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("permanent error")); }); + it("treats Matrix 'User not in room' as a permanent error", async () => { + const id = await enqueueDelivery( + { channel: "matrix", to: "!lowercased:matrix.example.com", payloads: [{ text: "hi" }] }, + tmpDir(), + ); + const deliver = vi + .fn() + .mockRejectedValue( + new Error( + "MatrixError: [403] User @bot:matrix.example.com not in room !lowercased:matrix.example.com", + ), + ); + const log = createRecoveryLog(); + const { result } = await runRecovery({ deliver, log }); + + expect(result.failed).toBe(1); + expect(result.recovered).toBe(0); + expect(await loadPendingDeliveries(tmpDir())).toHaveLength(0); + expect(fs.existsSync(path.join(tmpDir(), "delivery-queue", "failed", `${id}.json`))).toBe(true); + expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("permanent error")); + }); + it("passes skipQueue: true to prevent re-enqueueing during recovery", async () => { await enqueueDelivery( { channel: "demo-channel-a", to: "+1", payloads: [{ text: "a" }] },