fix: isolate channel startup failures (#54215) (thanks @JonathanJing)

* fix(gateway): isolate channel startup failures to prevent cascade

When one channel (e.g., WhatsApp) fails to start due to missing runtime
modules, it should not block other channels (e.g., Discord) from starting.

Changes:
- Use Promise.allSettled to start channels concurrently
- Catch individual channel startup errors without affecting others
- Add startup summary logging for observability

Before: Sequential await startChannel() - if one throws, subsequent
channels never start.

After: Concurrent startup with per-channel error handling - all channels
attempt to start, failures are logged but don't cascade.

Fixes: P0 - WhatsApp runtime exception no longer blocks Discord startup

* fix(gateway): keep channel startup isolation sequential

* fix: isolate channel startup failures (#54215) (thanks @JonathanJing)

---------

Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
Jonathan Jing 2026-03-24 21:52:42 -07:00 committed by GitHub
parent 8a463e7aa9
commit 30e80fb947
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 48 additions and 12 deletions

View File

@ -32,6 +32,7 @@ Docs: https://docs.openclaw.ai
- Feishu/docx block ordering: preserve the document tree order from `docx.document.convert` when inserting blocks, fixing heading/paragraph/list misordering in newly written Feishu documents. (#40524) Thanks @TaoXieSZ.
- Agents/cron: suppress the default heartbeat system prompt for cron-triggered embedded runs even when they target non-cron session keys, so cron tasks stop reading `HEARTBEAT.md` and polluting unrelated threads. (#53152) Thanks @Protocol-zero-0.
- TUI/chat: preserve pending user messages when a slow local run emits an empty final event, but still defer and flush the needed history reload after the newer active run finishes so silent/tool-only runs do not stay incomplete. (#53130) Thanks @joelnishanth.
- Gateway/channels: keep channel startup sequential while isolating per-channel boot failures, so one broken channel no longer blocks later channels from starting. (#54215) Thanks @JonathanJing.
- Docs/IRC: fix five `json55` code-fence typos in the IRC channel examples so Mintlify applies JSON5 syntax highlighting correctly. (#50842) Thanks @Hollychou924.
- Telegram/forum topics: recover `#General` topic `1` routing when Telegram omits forum metadata, including native commands, interactive callbacks, inbound message context, and fallback error replies. (#53699) thanks @huntharo
- Discord/config types: add missing `autoArchiveDuration` to `DiscordGuildChannelConfig` so TypeScript config definitions match the existing schema and runtime support. (#43427) Thanks @davidguttman.

View File

@ -41,12 +41,15 @@ type TestAccount = {
};
function createTestPlugin(params?: {
id?: ChannelId;
order?: number;
account?: TestAccount;
startAccount?: NonNullable<ChannelPlugin<TestAccount>["gateway"]>["startAccount"];
includeDescribeAccount?: boolean;
resolveAccount?: ChannelPlugin<TestAccount>["config"]["resolveAccount"];
isConfigured?: ChannelPlugin<TestAccount>["config"]["isConfigured"];
}): ChannelPlugin<TestAccount> {
const id = params?.id ?? "discord";
const account = params?.account ?? { enabled: true, configured: true };
const includeDescribeAccount = params?.includeDescribeAccount !== false;
const config: ChannelPlugin<TestAccount>["config"] = {
@ -67,13 +70,14 @@ function createTestPlugin(params?: {
gateway.startAccount = params.startAccount;
}
return {
id: "discord",
id,
meta: {
id: "discord",
label: "Discord",
selectionLabel: "Discord",
docsPath: "/channels/discord",
id,
label: id,
selectionLabel: id,
docsPath: `/channels/${id}`,
blurb: "test stub",
...(params?.order === undefined ? {} : { order: params.order }),
},
capabilities: { chatTypes: ["direct"] },
config,
@ -89,13 +93,15 @@ function createDeferred(): { promise: Promise<void>; resolve: () => void } {
return { promise, resolve: resolvePromise };
}
function installTestRegistry(plugin: ChannelPlugin<TestAccount>) {
function installTestRegistry(...plugins: ChannelPlugin<TestAccount>[]) {
const registry = createEmptyPluginRegistry();
registry.channels.push({
pluginId: plugin.id,
source: "test",
plugin,
});
for (const plugin of plugins) {
registry.channels.push({
pluginId: plugin.id,
source: "test",
plugin,
});
}
setActivePluginRegistry(registry);
}
@ -103,11 +109,17 @@ function createManager(options?: {
channelRuntime?: PluginRuntime["channel"];
resolveChannelRuntime?: () => PluginRuntime["channel"];
loadConfig?: () => Record<string, unknown>;
channelIds?: ChannelId[];
}) {
const log = createSubsystemLogger("gateway/server-channels-test");
const channelLogs = { discord: log } as Record<ChannelId, SubsystemLogger>;
const runtime = runtimeForLogger(log);
const channelRuntimeEnvs = { discord: runtime } as unknown as Record<ChannelId, RuntimeEnv>;
const channelIds = options?.channelIds ?? ["discord"];
for (const channelId of channelIds) {
channelLogs[channelId] ??= log.child(channelId);
channelRuntimeEnvs[channelId] ??= runtime;
}
return createChannelManager({
loadConfig: () => options?.loadConfig?.() ?? {},
channelLogs,
@ -268,6 +280,23 @@ describe("server-channels auto restart", () => {
expect(startAccount).toHaveBeenCalledTimes(1);
});
it("continues starting later channels after one startup failure", async () => {
const failingStart = vi.fn(async () => {
throw new Error("missing runtime");
});
const succeedingStart = vi.fn(async () => {});
installTestRegistry(
createTestPlugin({ id: "discord", order: 1, startAccount: failingStart }),
createTestPlugin({ id: "slack", order: 2, startAccount: succeedingStart }),
);
const manager = createManager({ channelIds: ["discord", "slack"] });
await expect(manager.startChannels()).resolves.toBeUndefined();
expect(failingStart).toHaveBeenCalledTimes(1);
expect(succeedingStart).toHaveBeenCalledTimes(1);
});
it("reuses plugin account resolution for health monitor overrides", () => {
installTestRegistry(
createTestPlugin({

View File

@ -496,7 +496,13 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
const startChannels = async () => {
for (const plugin of listChannelPlugins()) {
await startChannel(plugin.id);
try {
await startChannel(plugin.id);
} catch (err) {
channelLogs[plugin.id]?.error?.(
`[${plugin.id}] channel startup failed: ${formatErrorMessage(err)}`,
);
}
}
};