feat(gateway): make health monitor stale threshold and max restarts configurable (openclaw#42107)

Verified:
- pnpm exec vitest --run src/config/config-misc.test.ts -t "gateway.channelHealthCheckMinutes"
- pnpm exec vitest --run src/gateway/server-channels.test.ts -t "health monitor"
- pnpm exec vitest --run src/gateway/channel-health-monitor.test.ts src/gateway/server/readiness.test.ts
- pnpm exec vitest --run extensions/feishu/src/outbound.test.ts
- pnpm exec tsc --noEmit

Co-authored-by: rstar327 <114364448+rstar327@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
rstar327 2026-03-14 22:21:56 -04:00 committed by GitHub
parent f00db91590
commit ba6064cc22
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 418 additions and 20 deletions

View File

@ -12314,14 +12314,14 @@
"filename": "src/config/schema.help.ts",
"hashed_secret": "9f4cda226d3868676ac7f86f59e4190eb94bd208",
"is_verified": false,
"line_number": 653
"line_number": 657
},
{
"type": "Secret Keyword",
"filename": "src/config/schema.help.ts",
"hashed_secret": "01822c8bbf6a8b136944b14182cb885100ec2eae",
"is_verified": false,
"line_number": 686
"line_number": 690
}
],
"src/config/schema.irc.ts": [
@ -12360,14 +12360,14 @@
"filename": "src/config/schema.labels.ts",
"hashed_secret": "e73c9fcad85cd4eecc74181ec4bdb31064d68439",
"is_verified": false,
"line_number": 217
"line_number": 219
},
{
"type": "Secret Keyword",
"filename": "src/config/schema.labels.ts",
"hashed_secret": "2eda7cd978f39eebec3bf03e4410a40e14167fff",
"is_verified": false,
"line_number": 326
"line_number": 328
}
],
"src/config/slack-http-config.test.ts": [

View File

@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
- Refactor/channels: remove the legacy channel shim directories and point channel-specific imports directly at the extension-owned implementations. (#45967) thanks @scoootscooob.
- Feishu/streaming: add `onReasoningStream` and `onReasoningEnd` support to streaming cards, so `/reasoning stream` renders thinking tokens as markdown blockquotes in the same card — matching the Telegram channel's reasoning lane behavior.
- Feishu/cards: add identity-aware structured card headers and note footers for Feishu replies and direct sends, while keeping that presentation wired through the shared outbound identity path. (#29938) Thanks @nszhsl.
- Gateway/health monitor: add configurable stale-event thresholds and restart limits, plus per-channel and per-account `healthMonitor.enabled` overrides, while keeping the existing global disable path on `gateway.channelHealthCheckMinutes=0`. (#42107) Thanks @rstar327.
### Fixes

View File

@ -57,6 +57,10 @@ export type BlueBubblesAccountConfig = {
allowPrivateNetwork?: boolean;
/** Per-group configuration keyed by chat GUID or identifier. */
groups?: Record<string, BlueBubblesGroupConfig>;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: {
enabled?: boolean;
};
};
export type BlueBubblesActionConfig = {

View File

@ -212,6 +212,49 @@ describe("gateway.channelHealthCheckMinutes", () => {
expect(res.issues[0]?.path).toBe("gateway.channelHealthCheckMinutes");
}
});
it("rejects stale thresholds shorter than the health check interval", () => {
const res = validateConfigObject({
gateway: {
channelHealthCheckMinutes: 5,
channelStaleEventThresholdMinutes: 4,
},
});
expect(res.ok).toBe(false);
if (!res.ok) {
expect(res.issues[0]?.path).toBe("gateway.channelStaleEventThresholdMinutes");
}
});
it("accepts stale thresholds that match or exceed the health check interval", () => {
const equal = validateConfigObject({
gateway: {
channelHealthCheckMinutes: 5,
channelStaleEventThresholdMinutes: 5,
},
});
expect(equal.ok).toBe(true);
const greater = validateConfigObject({
gateway: {
channelHealthCheckMinutes: 5,
channelStaleEventThresholdMinutes: 6,
},
});
expect(greater.ok).toBe(true);
});
it("rejects stale thresholds shorter than the default health check interval", () => {
const res = validateConfigObject({
gateway: {
channelStaleEventThresholdMinutes: 4,
},
});
expect(res.ok).toBe(false);
if (!res.ok) {
expect(res.issues[0]?.path).toBe("gateway.channelStaleEventThresholdMinutes");
}
});
});
describe("cron webhook schema", () => {

View File

@ -102,6 +102,10 @@ export const FIELD_HELP: Record<string, string> = {
"Explicit gateway-level tool denylist to block risky tools even if lower-level policies allow them. Use deny rules for emergency response and defense-in-depth hardening.",
"gateway.channelHealthCheckMinutes":
"Interval in minutes for automatic channel health probing and status updates. Use lower intervals for faster detection, or higher intervals to reduce periodic probe noise.",
"gateway.channelStaleEventThresholdMinutes":
"How many minutes a connected channel can go without receiving any event before the health monitor treats it as a stale socket and triggers a restart. Default: 30.",
"gateway.channelMaxRestartsPerHour":
"Maximum number of health-monitor-initiated channel restarts allowed within a rolling one-hour window. Once hit, further restarts are skipped until the window expires. Default: 10.",
"gateway.tailscale":
"Tailscale integration settings for Serve/Funnel exposure and lifecycle handling on gateway start/exit. Keep off unless your deployment intentionally relies on Tailscale ingress.",
"gateway.tailscale.mode":

View File

@ -84,6 +84,8 @@ export const FIELD_LABELS: Record<string, string> = {
"gateway.tools.allow": "Gateway Tool Allowlist",
"gateway.tools.deny": "Gateway Tool Denylist",
"gateway.channelHealthCheckMinutes": "Gateway Channel Health Check Interval (min)",
"gateway.channelStaleEventThresholdMinutes": "Gateway Channel Stale Event Threshold (min)",
"gateway.channelMaxRestartsPerHour": "Gateway Channel Max Restarts Per Hour",
"gateway.tailscale": "Gateway Tailscale",
"gateway.tailscale.mode": "Gateway Tailscale Mode",
"gateway.tailscale.resetOnExit": "Gateway Tailscale Reset on Exit",

View File

@ -4,7 +4,10 @@ import type {
GroupPolicy,
MarkdownConfig,
} from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig } from "./types.messages.js";
export type CommonChannelMessagingConfig = {
@ -43,6 +46,8 @@ export type CommonChannelMessagingConfig = {
blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
/** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Outbound response prefix override for this channel/account. */
responsePrefix?: string;
/** Max outbound media size in MB. */

View File

@ -18,6 +18,14 @@ export type ChannelHeartbeatVisibilityConfig = {
useIndicator?: boolean;
};
export type ChannelHealthMonitorConfig = {
/**
* Enable channel-health-monitor restarts for this channel or account.
* Inherits the global gateway setting when omitted.
*/
enabled?: boolean;
};
export type ChannelDefaultsConfig = {
groupPolicy?: GroupPolicy;
/** Default heartbeat visibility for all channels. */
@ -39,6 +47,7 @@ export type ExtensionChannelConfig = {
defaultAccount?: string;
dmPolicy?: string;
groupPolicy?: GroupPolicy;
healthMonitor?: ChannelHealthMonitorConfig;
accounts?: Record<string, unknown>;
[key: string]: unknown;
};

View File

@ -8,7 +8,10 @@ import type {
OutboundRetryConfig,
ReplyToMode,
} from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
import type { SecretInput } from "./types.secrets.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -297,6 +300,8 @@ export type DiscordAccountConfig = {
guilds?: Record<string, DiscordGuildEntry>;
/** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Exec approval forwarding configuration. */
execApprovals?: DiscordExecApprovalConfig;
/** Agent-controlled interactive components (buttons, select menus). */

View File

@ -431,4 +431,16 @@ export type GatewayConfig = {
* Set to 0 to disable. Default: 5.
*/
channelHealthCheckMinutes?: number;
/**
* Stale event threshold in minutes for the channel health monitor.
* A connected channel that receives no events for this duration is treated
* as a stale socket and restarted. Default: 30.
*/
channelStaleEventThresholdMinutes?: number;
/**
* Maximum number of health-monitor-initiated channel restarts per hour.
* Once this limit is reached, the monitor skips further restarts until
* the rolling window expires. Default: 10.
*/
channelMaxRestartsPerHour?: number;
};

View File

@ -4,6 +4,7 @@ import type {
GroupPolicy,
ReplyToMode,
} from "./types.base.js";
import type { ChannelHealthMonitorConfig } from "./types.channels.js";
import type { DmConfig } from "./types.messages.js";
import type { SecretRef } from "./types.secrets.js";
@ -99,6 +100,8 @@ export type GoogleChatAccountConfig = {
/** Per-action tool gating (default: true for all). */
actions?: GoogleChatActionConfig;
dm?: GoogleChatDmConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/**
* Typing indicator mode (default: "message").
* - "none": No indicator

View File

@ -4,7 +4,10 @@ import type {
GroupPolicy,
MarkdownConfig,
} from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig } from "./types.messages.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -77,6 +80,8 @@ export type IMessageAccountConfig = {
>;
/** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Outbound response prefix override for this channel/account. */
responsePrefix?: string;
};

View File

@ -4,7 +4,10 @@ import type {
GroupPolicy,
MarkdownConfig,
} from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig } from "./types.messages.js";
import type { SecretInput } from "./types.secrets.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -114,6 +117,8 @@ export type MSTeamsConfig = {
sharePointSiteId?: string;
/** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Outbound response prefix override for this channel/account. */
responsePrefix?: string;
};

View File

@ -5,7 +5,10 @@ import type {
MarkdownConfig,
ReplyToMode,
} from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -185,6 +188,8 @@ export type SlackAccountConfig = {
channels?: Record<string, SlackChannelConfig>;
/** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Outbound response prefix override for this channel/account. */
responsePrefix?: string;
/**

View File

@ -8,7 +8,10 @@ import type {
ReplyToMode,
SessionThreadBindingsConfig,
} from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -179,6 +182,8 @@ export type TelegramAccountConfig = {
reactionLevel?: "off" | "ack" | "minimal" | "extensive";
/** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Controls whether link previews are shown in outbound messages. Default: true. */
linkPreview?: boolean;
/**

View File

@ -4,7 +4,10 @@ import type {
GroupPolicy,
MarkdownConfig,
} from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig } from "./types.messages.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -78,6 +81,8 @@ type WhatsAppSharedConfig = {
debounceMs?: number;
/** Heartbeat visibility settings. */
heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
};
type WhatsAppConfigCore = {

View File

@ -8,3 +8,10 @@ export const ChannelHeartbeatVisibilitySchema = z
})
.strict()
.optional();
export const ChannelHealthMonitorSchema = z
.object({
enabled: z.boolean().optional(),
})
.strict()
.optional();

View File

@ -13,7 +13,10 @@ import {
resolveTelegramCustomCommands,
} from "./telegram-custom-commands.js";
import { ToolPolicySchema } from "./zod-schema.agent-runtime.js";
import { ChannelHeartbeatVisibilitySchema } from "./zod-schema.channels.js";
import {
ChannelHealthMonitorSchema,
ChannelHeartbeatVisibilitySchema,
} from "./zod-schema.channels.js";
import {
BlockStreamingChunkSchema,
BlockStreamingCoalesceSchema,
@ -271,6 +274,7 @@ export const TelegramAccountSchemaBase = z
reactionNotifications: z.enum(["off", "own", "all"]).optional(),
reactionLevel: z.enum(["off", "ack", "minimal", "extensive"]).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
linkPreview: z.boolean().optional(),
responsePrefix: z.string().optional(),
ackReaction: z.string().optional(),
@ -511,6 +515,7 @@ export const DiscordAccountSchema = z
dm: DiscordDmSchema.optional(),
guilds: z.record(z.string(), DiscordGuildSchema.optional()).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
execApprovals: z
.object({
enabled: z.boolean().optional(),
@ -782,6 +787,7 @@ export const GoogleChatAccountSchema = z
.strict()
.optional(),
dm: GoogleChatDmSchema.optional(),
healthMonitor: ChannelHealthMonitorSchema,
typingIndicator: z.enum(["none", "message", "reaction"]).optional(),
responsePrefix: z.string().optional(),
})
@ -898,6 +904,7 @@ export const SlackAccountSchema = z
dm: SlackDmSchema.optional(),
channels: z.record(z.string(), SlackChannelSchema.optional()).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(),
ackReaction: z.string().optional(),
typingReaction: z.string().optional(),
@ -1032,6 +1039,7 @@ export const SignalAccountSchemaBase = z
.optional(),
reactionLevel: z.enum(["off", "ack", "minimal", "extensive"]).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(),
})
.strict();
@ -1145,6 +1153,7 @@ export const IrcAccountSchemaBase = z
blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
mediaMaxMb: z.number().positive().optional(),
heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(),
})
.strict();
@ -1272,6 +1281,7 @@ export const IMessageAccountSchemaBase = z
)
.optional(),
heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(),
})
.strict();
@ -1383,6 +1393,7 @@ export const BlueBubblesAccountSchemaBase = z
blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
groups: z.record(z.string(), BlueBubblesGroupConfigSchema.optional()).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(),
})
.strict();
@ -1499,6 +1510,7 @@ export const MSTeamsConfigSchema = z
/** SharePoint site ID for file uploads in group chats/channels (e.g., "contoso.sharepoint.com,guid1,guid2") */
sharePointSiteId: z.string().optional(),
heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(),
})
.strict()

View File

@ -1,6 +1,9 @@
import { z } from "zod";
import { ToolPolicySchema } from "./zod-schema.agent-runtime.js";
import { ChannelHeartbeatVisibilitySchema } from "./zod-schema.channels.js";
import {
ChannelHealthMonitorSchema,
ChannelHeartbeatVisibilitySchema,
} from "./zod-schema.channels.js";
import {
BlockStreamingCoalesceSchema,
DmConfigSchema,
@ -56,6 +59,7 @@ const WhatsAppSharedSchema = z.object({
ackReaction: WhatsAppAckReactionSchema,
debounceMs: z.number().int().nonnegative().optional().default(0),
heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
});
function enforceOpenDmPolicyAllowFromStar(params: {

View File

@ -696,6 +696,8 @@ export const OpenClawSchema = z
.strict()
.optional(),
channelHealthCheckMinutes: z.number().int().min(0).optional(),
channelStaleEventThresholdMinutes: z.number().int().min(1).optional(),
channelMaxRestartsPerHour: z.number().int().min(1).optional(),
tailscale: z
.object({
mode: z.union([z.literal("off"), z.literal("serve"), z.literal("funnel")]).optional(),
@ -833,6 +835,21 @@ export const OpenClawSchema = z
.optional(),
})
.strict()
.superRefine((gateway, ctx) => {
const effectiveHealthCheckMinutes = gateway.channelHealthCheckMinutes ?? 5;
if (
gateway.channelStaleEventThresholdMinutes != null &&
effectiveHealthCheckMinutes !== 0 &&
gateway.channelStaleEventThresholdMinutes < effectiveHealthCheckMinutes
) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ["channelStaleEventThresholdMinutes"],
message:
"channelStaleEventThresholdMinutes should be >= channelHealthCheckMinutes to avoid delayed stale detection",
});
}
})
.optional(),
memory: MemorySchema,
skills: z

View File

@ -11,6 +11,7 @@ function createMockChannelManager(overrides?: Partial<ChannelManager>): ChannelM
startChannel: vi.fn(async () => {}),
stopChannel: vi.fn(async () => {}),
markChannelLoggedOut: vi.fn(),
isHealthMonitorEnabled: vi.fn(() => true),
isManuallyStopped: vi.fn(() => false),
resetRestartAttempts: vi.fn(),
...overrides,
@ -226,6 +227,53 @@ describe("channel-health-monitor", () => {
await expectNoStart(manager);
});
it("skips channels with health monitor disabled globally for that account", async () => {
const manager = createSnapshotManager(
{
discord: {
default: { running: false, enabled: true, configured: true },
},
},
{ isHealthMonitorEnabled: vi.fn(() => false) },
);
await expectNoStart(manager);
});
it("still restarts enabled accounts when another account on the same channel is disabled", async () => {
const now = Date.now();
const manager = createSnapshotManager(
{
discord: {
default: {
running: true,
connected: false,
enabled: true,
configured: true,
lastStartAt: now - 300_000,
},
quiet: {
running: true,
connected: false,
enabled: true,
configured: true,
lastStartAt: now - 300_000,
},
},
},
{
isHealthMonitorEnabled: vi.fn((channelId: ChannelId, accountId: string) => {
return !(channelId === "discord" && accountId === "quiet");
}),
},
);
const monitor = await startAndRunCheck(manager);
expect(manager.stopChannel).toHaveBeenCalledWith("discord", "default");
expect(manager.startChannel).toHaveBeenCalledWith("discord", "default");
expect(manager.stopChannel).not.toHaveBeenCalledWith("discord", "quiet");
expect(manager.startChannel).not.toHaveBeenCalledWith("discord", "quiet");
monitor.stop();
});
it("restarts a stuck channel (running but not connected)", async () => {
const now = Date.now();
const manager = createSnapshotManager({

View File

@ -118,6 +118,9 @@ export function startChannelHealthMonitor(deps: ChannelHealthMonitorDeps): Chann
if (!status) {
continue;
}
if (!channelManager.isHealthMonitorEnabled(channelId as ChannelId, accountId)) {
continue;
}
if (channelManager.isManuallyStopped(channelId as ChannelId, accountId)) {
continue;
}

View File

@ -41,6 +41,16 @@ const BASE_RELOAD_RULES: ReloadRule[] = [
kind: "hot",
actions: ["restart-health-monitor"],
},
{
prefix: "gateway.channelStaleEventThresholdMinutes",
kind: "hot",
actions: ["restart-health-monitor"],
},
{
prefix: "gateway.channelMaxRestartsPerHour",
kind: "hot",
actions: ["restart-health-monitor"],
},
// Stuck-session warning threshold is read by the diagnostics heartbeat loop.
{ prefix: "diagnostics.stuckSessionWarnMs", kind: "none" },
{ prefix: "hooks.gmail", kind: "hot", actions: ["restart-gmail-watcher"] },

View File

@ -44,12 +44,13 @@ function createTestPlugin(params?: {
account?: TestAccount;
startAccount?: NonNullable<ChannelPlugin<TestAccount>["gateway"]>["startAccount"];
includeDescribeAccount?: boolean;
resolveAccount?: ChannelPlugin<TestAccount>["config"]["resolveAccount"];
}): ChannelPlugin<TestAccount> {
const account = params?.account ?? { enabled: true, configured: true };
const includeDescribeAccount = params?.includeDescribeAccount !== false;
const config: ChannelPlugin<TestAccount>["config"] = {
listAccountIds: () => [DEFAULT_ACCOUNT_ID],
resolveAccount: () => account,
resolveAccount: params?.resolveAccount ?? (() => account),
isEnabled: (resolved) => resolved.enabled !== false,
};
if (includeDescribeAccount) {
@ -88,13 +89,16 @@ function installTestRegistry(plugin: ChannelPlugin<TestAccount>) {
setActivePluginRegistry(registry);
}
function createManager(options?: { channelRuntime?: PluginRuntime["channel"] }) {
function createManager(options?: {
channelRuntime?: PluginRuntime["channel"];
loadConfig?: () => Record<string, unknown>;
}) {
const log = createSubsystemLogger("gateway/server-channels-test");
const channelLogs = { discord: log } as Record<ChannelId, SubsystemLogger>;
const runtime = runtimeForLogger(log);
const channelRuntimeEnvs = { discord: runtime } as Record<ChannelId, RuntimeEnv>;
return createChannelManager({
loadConfig: () => ({}),
loadConfig: () => options?.loadConfig?.() ?? {},
channelLogs,
channelRuntimeEnvs,
...(options?.channelRuntime ? { channelRuntime: options.channelRuntime } : {}),
@ -180,4 +184,104 @@ describe("server-channels auto restart", () => {
await manager.startChannels();
expect(startAccount).toHaveBeenCalledTimes(1);
});
it("reuses plugin account resolution for health monitor overrides", () => {
installTestRegistry(
createTestPlugin({
resolveAccount: (cfg, accountId) => {
const accounts = (
cfg as {
channels?: {
discord?: {
accounts?: Record<
string,
TestAccount & { healthMonitor?: { enabled?: boolean } }
>;
};
};
}
).channels?.discord?.accounts;
if (!accounts) {
return { enabled: true, configured: true };
}
const direct = accounts[accountId ?? DEFAULT_ACCOUNT_ID];
if (direct) {
return direct;
}
const normalized = (accountId ?? DEFAULT_ACCOUNT_ID).toLowerCase().replaceAll(" ", "-");
const matchKey = Object.keys(accounts).find(
(key) => key.toLowerCase().replaceAll(" ", "-") === normalized,
);
return matchKey ? (accounts[matchKey] ?? { enabled: true, configured: true }) : {};
},
}),
);
const manager = createManager({
loadConfig: () => ({
channels: {
discord: {
accounts: {
"Router D": {
enabled: true,
configured: true,
healthMonitor: { enabled: false },
},
},
},
},
}),
});
expect(manager.isHealthMonitorEnabled("discord", "router-d")).toBe(false);
});
it("falls back to channel-level health monitor overrides when account resolution omits them", () => {
installTestRegistry(
createTestPlugin({
resolveAccount: () => ({
enabled: true,
configured: true,
}),
}),
);
const manager = createManager({
loadConfig: () => ({
channels: {
discord: {
healthMonitor: { enabled: false },
},
},
}),
});
expect(manager.isHealthMonitorEnabled("discord", DEFAULT_ACCOUNT_ID)).toBe(false);
});
it("uses wrapped account config health monitor overrides", () => {
installTestRegistry(
createTestPlugin({
resolveAccount: () => ({
enabled: true,
configured: true,
config: {
healthMonitor: { enabled: false },
},
}),
}),
);
const manager = createManager({
loadConfig: () => ({
channels: {
discord: {
healthMonitor: { enabled: true },
},
},
}),
});
expect(manager.isHealthMonitorEnabled("discord", DEFAULT_ACCOUNT_ID)).toBe(false);
});
});

View File

@ -105,6 +105,7 @@ export type ChannelManager = {
markChannelLoggedOut: (channelId: ChannelId, cleared: boolean, accountId?: string) => void;
isManuallyStopped: (channelId: ChannelId, accountId: string) => boolean;
resetRestartAttempts: (channelId: ChannelId, accountId: string) => void;
isHealthMonitorEnabled: (channelId: ChannelId, accountId: string) => boolean;
};
// Channel docking: lifecycle hooks (`plugin.gateway`) flow through this manager.
@ -119,6 +120,48 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
const restartKey = (channelId: ChannelId, accountId: string) => `${channelId}:${accountId}`;
const isHealthMonitorEnabled = (channelId: ChannelId, accountId: string): boolean => {
const cfg = loadConfig();
const plugin = getChannelPlugin(channelId);
const resolvedAccount = plugin?.config.resolveAccount(cfg, accountId) as
| {
healthMonitor?: {
enabled?: boolean;
};
config?: {
healthMonitor?: {
enabled?: boolean;
};
};
}
| undefined;
const accountOverride = resolvedAccount?.healthMonitor?.enabled;
const wrappedAccountOverride = resolvedAccount?.config?.healthMonitor?.enabled;
const channelOverride = (
cfg.channels?.[channelId] as
| {
healthMonitor?: {
enabled?: boolean;
};
}
| undefined
)?.healthMonitor?.enabled;
if (typeof accountOverride === "boolean") {
return accountOverride;
}
if (typeof wrappedAccountOverride === "boolean") {
return wrappedAccountOverride;
}
if (typeof channelOverride === "boolean") {
return channelOverride;
}
return true;
};
const getStore = (channelId: ChannelId): ChannelRuntimeStore => {
const existing = channelStores.get(channelId);
if (existing) {
@ -453,5 +496,6 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
markChannelLoggedOut,
isManuallyStopped: isManuallyStopped_,
resetRestartAttempts: resetRestartAttempts_,
isHealthMonitorEnabled,
};
}

View File

@ -50,7 +50,11 @@ export function createGatewayReloadHandlers(params: {
logChannels: { info: (msg: string) => void; error: (msg: string) => void };
logCron: { error: (msg: string) => void };
logReload: { info: (msg: string) => void; warn: (msg: string) => void };
createHealthMonitor: (checkIntervalMs: number) => ChannelHealthMonitor;
createHealthMonitor: (opts: {
checkIntervalMs: number;
staleEventThresholdMs?: number;
maxRestartsPerHour?: number;
}) => ChannelHealthMonitor;
}) {
const applyHotReload = async (
plan: GatewayReloadPlan,
@ -101,8 +105,17 @@ export function createGatewayReloadHandlers(params: {
if (plan.restartHealthMonitor) {
state.channelHealthMonitor?.stop();
const minutes = nextConfig.gateway?.channelHealthCheckMinutes;
const staleMinutes = nextConfig.gateway?.channelStaleEventThresholdMinutes;
nextState.channelHealthMonitor =
minutes === 0 ? null : params.createHealthMonitor((minutes ?? 5) * 60_000);
minutes === 0
? null
: params.createHealthMonitor({
checkIntervalMs: (minutes ?? 5) * 60_000,
...(staleMinutes != null && { staleEventThresholdMs: staleMinutes * 60_000 }),
...(nextConfig.gateway?.channelMaxRestartsPerHour != null && {
maxRestartsPerHour: nextConfig.gateway.channelMaxRestartsPerHour,
}),
});
}
if (plan.restartGmailWatcher) {

View File

@ -757,11 +757,17 @@ export async function startGatewayServer(
const healthCheckMinutes = cfgAtStart.gateway?.channelHealthCheckMinutes;
const healthCheckDisabled = healthCheckMinutes === 0;
const staleEventThresholdMinutes = cfgAtStart.gateway?.channelStaleEventThresholdMinutes;
const maxRestartsPerHour = cfgAtStart.gateway?.channelMaxRestartsPerHour;
let channelHealthMonitor = healthCheckDisabled
? null
: startChannelHealthMonitor({
channelManager,
checkIntervalMs: (healthCheckMinutes ?? 5) * 60_000,
...(staleEventThresholdMinutes != null && {
staleEventThresholdMs: staleEventThresholdMinutes * 60_000,
}),
...(maxRestartsPerHour != null && { maxRestartsPerHour }),
});
if (!minimalTestGateway) {
@ -980,8 +986,21 @@ export async function startGatewayServer(
logChannels,
logCron,
logReload,
createHealthMonitor: (checkIntervalMs: number) =>
startChannelHealthMonitor({ channelManager, checkIntervalMs }),
createHealthMonitor: (opts: {
checkIntervalMs: number;
staleEventThresholdMs?: number;
maxRestartsPerHour?: number;
}) =>
startChannelHealthMonitor({
channelManager,
checkIntervalMs: opts.checkIntervalMs,
...(opts.staleEventThresholdMs != null && {
staleEventThresholdMs: opts.staleEventThresholdMs,
}),
...(opts.maxRestartsPerHour != null && {
maxRestartsPerHour: opts.maxRestartsPerHour,
}),
}),
});
return startGatewayConfigReloader({

View File

@ -109,6 +109,9 @@ const hoisted = vi.hoisted(() => {
startChannel: vi.fn(async () => {}),
stopChannel: vi.fn(async () => {}),
markChannelLoggedOut: vi.fn(),
isHealthMonitorEnabled: vi.fn(() => true),
isManuallyStopped: vi.fn(() => false),
resetRestartAttempts: vi.fn(),
};
const createChannelManager = vi.fn(() => providerManager);

View File

@ -26,6 +26,7 @@ function createManager(snapshot: ChannelRuntimeSnapshot): ChannelManager {
startChannel: vi.fn(),
stopChannel: vi.fn(),
markChannelLoggedOut: vi.fn(),
isHealthMonitorEnabled: vi.fn(() => true),
isManuallyStopped: vi.fn(() => false),
resetRestartAttempts: vi.fn(),
};