From 30e80fb94702ea1d39520cd7e5fd89dbe53b601d Mon Sep 17 00:00:00 2001 From: Jonathan Jing Date: Tue, 24 Mar 2026 21:52:42 -0700 Subject: [PATCH] fix: isolate channel startup failures (#54215) (thanks @JonathanJing) * fix(gateway): isolate channel startup failures to prevent cascade When one channel (e.g., WhatsApp) fails to start due to missing runtime modules, it should not block other channels (e.g., Discord) from starting. Changes: - Use Promise.allSettled to start channels concurrently - Catch individual channel startup errors without affecting others - Add startup summary logging for observability Before: Sequential await startChannel() - if one throws, subsequent channels never start. After: Concurrent startup with per-channel error handling - all channels attempt to start, failures are logged but don't cascade. Fixes: P0 - WhatsApp runtime exception no longer blocks Discord startup * fix(gateway): keep channel startup isolation sequential * fix: isolate channel startup failures (#54215) (thanks @JonathanJing) --------- Co-authored-by: Ayaan Zaidi --- CHANGELOG.md | 1 + src/gateway/server-channels.test.ts | 51 ++++++++++++++++++++++------- src/gateway/server-channels.ts | 8 ++++- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3792ebab18a..ce3341c2757 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Docs: https://docs.openclaw.ai - Feishu/docx block ordering: preserve the document tree order from `docx.document.convert` when inserting blocks, fixing heading/paragraph/list misordering in newly written Feishu documents. (#40524) Thanks @TaoXieSZ. - Agents/cron: suppress the default heartbeat system prompt for cron-triggered embedded runs even when they target non-cron session keys, so cron tasks stop reading `HEARTBEAT.md` and polluting unrelated threads. (#53152) Thanks @Protocol-zero-0. - TUI/chat: preserve pending user messages when a slow local run emits an empty final event, but still defer and flush the needed history reload after the newer active run finishes so silent/tool-only runs do not stay incomplete. (#53130) Thanks @joelnishanth. +- Gateway/channels: keep channel startup sequential while isolating per-channel boot failures, so one broken channel no longer blocks later channels from starting. (#54215) Thanks @JonathanJing. - Docs/IRC: fix five `json55` code-fence typos in the IRC channel examples so Mintlify applies JSON5 syntax highlighting correctly. (#50842) Thanks @Hollychou924. - Telegram/forum topics: recover `#General` topic `1` routing when Telegram omits forum metadata, including native commands, interactive callbacks, inbound message context, and fallback error replies. (#53699) thanks @huntharo - Discord/config types: add missing `autoArchiveDuration` to `DiscordGuildChannelConfig` so TypeScript config definitions match the existing schema and runtime support. (#43427) Thanks @davidguttman. diff --git a/src/gateway/server-channels.test.ts b/src/gateway/server-channels.test.ts index 97ad3d8d473..1f2e218afaa 100644 --- a/src/gateway/server-channels.test.ts +++ b/src/gateway/server-channels.test.ts @@ -41,12 +41,15 @@ type TestAccount = { }; function createTestPlugin(params?: { + id?: ChannelId; + order?: number; account?: TestAccount; startAccount?: NonNullable["gateway"]>["startAccount"]; includeDescribeAccount?: boolean; resolveAccount?: ChannelPlugin["config"]["resolveAccount"]; isConfigured?: ChannelPlugin["config"]["isConfigured"]; }): ChannelPlugin { + const id = params?.id ?? "discord"; const account = params?.account ?? { enabled: true, configured: true }; const includeDescribeAccount = params?.includeDescribeAccount !== false; const config: ChannelPlugin["config"] = { @@ -67,13 +70,14 @@ function createTestPlugin(params?: { gateway.startAccount = params.startAccount; } return { - id: "discord", + id, meta: { - id: "discord", - label: "Discord", - selectionLabel: "Discord", - docsPath: "/channels/discord", + id, + label: id, + selectionLabel: id, + docsPath: `/channels/${id}`, blurb: "test stub", + ...(params?.order === undefined ? {} : { order: params.order }), }, capabilities: { chatTypes: ["direct"] }, config, @@ -89,13 +93,15 @@ function createDeferred(): { promise: Promise; resolve: () => void } { return { promise, resolve: resolvePromise }; } -function installTestRegistry(plugin: ChannelPlugin) { +function installTestRegistry(...plugins: ChannelPlugin[]) { const registry = createEmptyPluginRegistry(); - registry.channels.push({ - pluginId: plugin.id, - source: "test", - plugin, - }); + for (const plugin of plugins) { + registry.channels.push({ + pluginId: plugin.id, + source: "test", + plugin, + }); + } setActivePluginRegistry(registry); } @@ -103,11 +109,17 @@ function createManager(options?: { channelRuntime?: PluginRuntime["channel"]; resolveChannelRuntime?: () => PluginRuntime["channel"]; loadConfig?: () => Record; + channelIds?: ChannelId[]; }) { const log = createSubsystemLogger("gateway/server-channels-test"); const channelLogs = { discord: log } as Record; const runtime = runtimeForLogger(log); const channelRuntimeEnvs = { discord: runtime } as unknown as Record; + const channelIds = options?.channelIds ?? ["discord"]; + for (const channelId of channelIds) { + channelLogs[channelId] ??= log.child(channelId); + channelRuntimeEnvs[channelId] ??= runtime; + } return createChannelManager({ loadConfig: () => options?.loadConfig?.() ?? {}, channelLogs, @@ -268,6 +280,23 @@ describe("server-channels auto restart", () => { expect(startAccount).toHaveBeenCalledTimes(1); }); + it("continues starting later channels after one startup failure", async () => { + const failingStart = vi.fn(async () => { + throw new Error("missing runtime"); + }); + const succeedingStart = vi.fn(async () => {}); + installTestRegistry( + createTestPlugin({ id: "discord", order: 1, startAccount: failingStart }), + createTestPlugin({ id: "slack", order: 2, startAccount: succeedingStart }), + ); + const manager = createManager({ channelIds: ["discord", "slack"] }); + + await expect(manager.startChannels()).resolves.toBeUndefined(); + + expect(failingStart).toHaveBeenCalledTimes(1); + expect(succeedingStart).toHaveBeenCalledTimes(1); + }); + it("reuses plugin account resolution for health monitor overrides", () => { installTestRegistry( createTestPlugin({ diff --git a/src/gateway/server-channels.ts b/src/gateway/server-channels.ts index 9496426cf5a..29328ade445 100644 --- a/src/gateway/server-channels.ts +++ b/src/gateway/server-channels.ts @@ -496,7 +496,13 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage const startChannels = async () => { for (const plugin of listChannelPlugins()) { - await startChannel(plugin.id); + try { + await startChannel(plugin.id); + } catch (err) { + channelLogs[plugin.id]?.error?.( + `[${plugin.id}] channel startup failed: ${formatErrorMessage(err)}`, + ); + } } };