feat(gateway): make health monitor stale threshold and max restarts configurable (openclaw#42107)

Verified:
- pnpm exec vitest --run src/config/config-misc.test.ts -t "gateway.channelHealthCheckMinutes"
- pnpm exec vitest --run src/gateway/server-channels.test.ts -t "health monitor"
- pnpm exec vitest --run src/gateway/channel-health-monitor.test.ts src/gateway/server/readiness.test.ts
- pnpm exec vitest --run extensions/feishu/src/outbound.test.ts
- pnpm exec tsc --noEmit

Co-authored-by: rstar327 <114364448+rstar327@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
rstar327 2026-03-14 22:21:56 -04:00 committed by GitHub
parent f00db91590
commit ba6064cc22
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 418 additions and 20 deletions

View File

@ -12314,14 +12314,14 @@
"filename": "src/config/schema.help.ts", "filename": "src/config/schema.help.ts",
"hashed_secret": "9f4cda226d3868676ac7f86f59e4190eb94bd208", "hashed_secret": "9f4cda226d3868676ac7f86f59e4190eb94bd208",
"is_verified": false, "is_verified": false,
"line_number": 653 "line_number": 657
}, },
{ {
"type": "Secret Keyword", "type": "Secret Keyword",
"filename": "src/config/schema.help.ts", "filename": "src/config/schema.help.ts",
"hashed_secret": "01822c8bbf6a8b136944b14182cb885100ec2eae", "hashed_secret": "01822c8bbf6a8b136944b14182cb885100ec2eae",
"is_verified": false, "is_verified": false,
"line_number": 686 "line_number": 690
} }
], ],
"src/config/schema.irc.ts": [ "src/config/schema.irc.ts": [
@ -12360,14 +12360,14 @@
"filename": "src/config/schema.labels.ts", "filename": "src/config/schema.labels.ts",
"hashed_secret": "e73c9fcad85cd4eecc74181ec4bdb31064d68439", "hashed_secret": "e73c9fcad85cd4eecc74181ec4bdb31064d68439",
"is_verified": false, "is_verified": false,
"line_number": 217 "line_number": 219
}, },
{ {
"type": "Secret Keyword", "type": "Secret Keyword",
"filename": "src/config/schema.labels.ts", "filename": "src/config/schema.labels.ts",
"hashed_secret": "2eda7cd978f39eebec3bf03e4410a40e14167fff", "hashed_secret": "2eda7cd978f39eebec3bf03e4410a40e14167fff",
"is_verified": false, "is_verified": false,
"line_number": 326 "line_number": 328
} }
], ],
"src/config/slack-http-config.test.ts": [ "src/config/slack-http-config.test.ts": [

View File

@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
- Refactor/channels: remove the legacy channel shim directories and point channel-specific imports directly at the extension-owned implementations. (#45967) thanks @scoootscooob. - Refactor/channels: remove the legacy channel shim directories and point channel-specific imports directly at the extension-owned implementations. (#45967) thanks @scoootscooob.
- Feishu/streaming: add `onReasoningStream` and `onReasoningEnd` support to streaming cards, so `/reasoning stream` renders thinking tokens as markdown blockquotes in the same card — matching the Telegram channel's reasoning lane behavior. - Feishu/streaming: add `onReasoningStream` and `onReasoningEnd` support to streaming cards, so `/reasoning stream` renders thinking tokens as markdown blockquotes in the same card — matching the Telegram channel's reasoning lane behavior.
- Feishu/cards: add identity-aware structured card headers and note footers for Feishu replies and direct sends, while keeping that presentation wired through the shared outbound identity path. (#29938) Thanks @nszhsl. - Feishu/cards: add identity-aware structured card headers and note footers for Feishu replies and direct sends, while keeping that presentation wired through the shared outbound identity path. (#29938) Thanks @nszhsl.
- Gateway/health monitor: add configurable stale-event thresholds and restart limits, plus per-channel and per-account `healthMonitor.enabled` overrides, while keeping the existing global disable path on `gateway.channelHealthCheckMinutes=0`. (#42107) Thanks @rstar327.
### Fixes ### Fixes

View File

@ -57,6 +57,10 @@ export type BlueBubblesAccountConfig = {
allowPrivateNetwork?: boolean; allowPrivateNetwork?: boolean;
/** Per-group configuration keyed by chat GUID or identifier. */ /** Per-group configuration keyed by chat GUID or identifier. */
groups?: Record<string, BlueBubblesGroupConfig>; groups?: Record<string, BlueBubblesGroupConfig>;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: {
enabled?: boolean;
};
}; };
export type BlueBubblesActionConfig = { export type BlueBubblesActionConfig = {

View File

@ -212,6 +212,49 @@ describe("gateway.channelHealthCheckMinutes", () => {
expect(res.issues[0]?.path).toBe("gateway.channelHealthCheckMinutes"); expect(res.issues[0]?.path).toBe("gateway.channelHealthCheckMinutes");
} }
}); });
it("rejects stale thresholds shorter than the health check interval", () => {
const res = validateConfigObject({
gateway: {
channelHealthCheckMinutes: 5,
channelStaleEventThresholdMinutes: 4,
},
});
expect(res.ok).toBe(false);
if (!res.ok) {
expect(res.issues[0]?.path).toBe("gateway.channelStaleEventThresholdMinutes");
}
});
it("accepts stale thresholds that match or exceed the health check interval", () => {
const equal = validateConfigObject({
gateway: {
channelHealthCheckMinutes: 5,
channelStaleEventThresholdMinutes: 5,
},
});
expect(equal.ok).toBe(true);
const greater = validateConfigObject({
gateway: {
channelHealthCheckMinutes: 5,
channelStaleEventThresholdMinutes: 6,
},
});
expect(greater.ok).toBe(true);
});
it("rejects stale thresholds shorter than the default health check interval", () => {
const res = validateConfigObject({
gateway: {
channelStaleEventThresholdMinutes: 4,
},
});
expect(res.ok).toBe(false);
if (!res.ok) {
expect(res.issues[0]?.path).toBe("gateway.channelStaleEventThresholdMinutes");
}
});
}); });
describe("cron webhook schema", () => { describe("cron webhook schema", () => {

View File

@ -102,6 +102,10 @@ export const FIELD_HELP: Record<string, string> = {
"Explicit gateway-level tool denylist to block risky tools even if lower-level policies allow them. Use deny rules for emergency response and defense-in-depth hardening.", "Explicit gateway-level tool denylist to block risky tools even if lower-level policies allow them. Use deny rules for emergency response and defense-in-depth hardening.",
"gateway.channelHealthCheckMinutes": "gateway.channelHealthCheckMinutes":
"Interval in minutes for automatic channel health probing and status updates. Use lower intervals for faster detection, or higher intervals to reduce periodic probe noise.", "Interval in minutes for automatic channel health probing and status updates. Use lower intervals for faster detection, or higher intervals to reduce periodic probe noise.",
"gateway.channelStaleEventThresholdMinutes":
"How many minutes a connected channel can go without receiving any event before the health monitor treats it as a stale socket and triggers a restart. Default: 30.",
"gateway.channelMaxRestartsPerHour":
"Maximum number of health-monitor-initiated channel restarts allowed within a rolling one-hour window. Once hit, further restarts are skipped until the window expires. Default: 10.",
"gateway.tailscale": "gateway.tailscale":
"Tailscale integration settings for Serve/Funnel exposure and lifecycle handling on gateway start/exit. Keep off unless your deployment intentionally relies on Tailscale ingress.", "Tailscale integration settings for Serve/Funnel exposure and lifecycle handling on gateway start/exit. Keep off unless your deployment intentionally relies on Tailscale ingress.",
"gateway.tailscale.mode": "gateway.tailscale.mode":

View File

@ -84,6 +84,8 @@ export const FIELD_LABELS: Record<string, string> = {
"gateway.tools.allow": "Gateway Tool Allowlist", "gateway.tools.allow": "Gateway Tool Allowlist",
"gateway.tools.deny": "Gateway Tool Denylist", "gateway.tools.deny": "Gateway Tool Denylist",
"gateway.channelHealthCheckMinutes": "Gateway Channel Health Check Interval (min)", "gateway.channelHealthCheckMinutes": "Gateway Channel Health Check Interval (min)",
"gateway.channelStaleEventThresholdMinutes": "Gateway Channel Stale Event Threshold (min)",
"gateway.channelMaxRestartsPerHour": "Gateway Channel Max Restarts Per Hour",
"gateway.tailscale": "Gateway Tailscale", "gateway.tailscale": "Gateway Tailscale",
"gateway.tailscale.mode": "Gateway Tailscale Mode", "gateway.tailscale.mode": "Gateway Tailscale Mode",
"gateway.tailscale.resetOnExit": "Gateway Tailscale Reset on Exit", "gateway.tailscale.resetOnExit": "Gateway Tailscale Reset on Exit",

View File

@ -4,7 +4,10 @@ import type {
GroupPolicy, GroupPolicy,
MarkdownConfig, MarkdownConfig,
} from "./types.base.js"; } from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js"; import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig } from "./types.messages.js"; import type { DmConfig } from "./types.messages.js";
export type CommonChannelMessagingConfig = { export type CommonChannelMessagingConfig = {
@ -43,6 +46,8 @@ export type CommonChannelMessagingConfig = {
blockStreamingCoalesce?: BlockStreamingCoalesceConfig; blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
/** Heartbeat visibility settings for this channel. */ /** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig; heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Outbound response prefix override for this channel/account. */ /** Outbound response prefix override for this channel/account. */
responsePrefix?: string; responsePrefix?: string;
/** Max outbound media size in MB. */ /** Max outbound media size in MB. */

View File

@ -18,6 +18,14 @@ export type ChannelHeartbeatVisibilityConfig = {
useIndicator?: boolean; useIndicator?: boolean;
}; };
export type ChannelHealthMonitorConfig = {
/**
* Enable channel-health-monitor restarts for this channel or account.
* Inherits the global gateway setting when omitted.
*/
enabled?: boolean;
};
export type ChannelDefaultsConfig = { export type ChannelDefaultsConfig = {
groupPolicy?: GroupPolicy; groupPolicy?: GroupPolicy;
/** Default heartbeat visibility for all channels. */ /** Default heartbeat visibility for all channels. */
@ -39,6 +47,7 @@ export type ExtensionChannelConfig = {
defaultAccount?: string; defaultAccount?: string;
dmPolicy?: string; dmPolicy?: string;
groupPolicy?: GroupPolicy; groupPolicy?: GroupPolicy;
healthMonitor?: ChannelHealthMonitorConfig;
accounts?: Record<string, unknown>; accounts?: Record<string, unknown>;
[key: string]: unknown; [key: string]: unknown;
}; };

View File

@ -8,7 +8,10 @@ import type {
OutboundRetryConfig, OutboundRetryConfig,
ReplyToMode, ReplyToMode,
} from "./types.base.js"; } from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js"; import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js"; import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
import type { SecretInput } from "./types.secrets.js"; import type { SecretInput } from "./types.secrets.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js"; import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -297,6 +300,8 @@ export type DiscordAccountConfig = {
guilds?: Record<string, DiscordGuildEntry>; guilds?: Record<string, DiscordGuildEntry>;
/** Heartbeat visibility settings for this channel. */ /** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig; heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Exec approval forwarding configuration. */ /** Exec approval forwarding configuration. */
execApprovals?: DiscordExecApprovalConfig; execApprovals?: DiscordExecApprovalConfig;
/** Agent-controlled interactive components (buttons, select menus). */ /** Agent-controlled interactive components (buttons, select menus). */

View File

@ -431,4 +431,16 @@ export type GatewayConfig = {
* Set to 0 to disable. Default: 5. * Set to 0 to disable. Default: 5.
*/ */
channelHealthCheckMinutes?: number; channelHealthCheckMinutes?: number;
/**
* Stale event threshold in minutes for the channel health monitor.
* A connected channel that receives no events for this duration is treated
* as a stale socket and restarted. Default: 30.
*/
channelStaleEventThresholdMinutes?: number;
/**
* Maximum number of health-monitor-initiated channel restarts per hour.
* Once this limit is reached, the monitor skips further restarts until
* the rolling window expires. Default: 10.
*/
channelMaxRestartsPerHour?: number;
}; };

View File

@ -4,6 +4,7 @@ import type {
GroupPolicy, GroupPolicy,
ReplyToMode, ReplyToMode,
} from "./types.base.js"; } from "./types.base.js";
import type { ChannelHealthMonitorConfig } from "./types.channels.js";
import type { DmConfig } from "./types.messages.js"; import type { DmConfig } from "./types.messages.js";
import type { SecretRef } from "./types.secrets.js"; import type { SecretRef } from "./types.secrets.js";
@ -99,6 +100,8 @@ export type GoogleChatAccountConfig = {
/** Per-action tool gating (default: true for all). */ /** Per-action tool gating (default: true for all). */
actions?: GoogleChatActionConfig; actions?: GoogleChatActionConfig;
dm?: GoogleChatDmConfig; dm?: GoogleChatDmConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** /**
* Typing indicator mode (default: "message"). * Typing indicator mode (default: "message").
* - "none": No indicator * - "none": No indicator

View File

@ -4,7 +4,10 @@ import type {
GroupPolicy, GroupPolicy,
MarkdownConfig, MarkdownConfig,
} from "./types.base.js"; } from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js"; import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig } from "./types.messages.js"; import type { DmConfig } from "./types.messages.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js"; import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -77,6 +80,8 @@ export type IMessageAccountConfig = {
>; >;
/** Heartbeat visibility settings for this channel. */ /** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig; heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Outbound response prefix override for this channel/account. */ /** Outbound response prefix override for this channel/account. */
responsePrefix?: string; responsePrefix?: string;
}; };

View File

@ -4,7 +4,10 @@ import type {
GroupPolicy, GroupPolicy,
MarkdownConfig, MarkdownConfig,
} from "./types.base.js"; } from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js"; import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig } from "./types.messages.js"; import type { DmConfig } from "./types.messages.js";
import type { SecretInput } from "./types.secrets.js"; import type { SecretInput } from "./types.secrets.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js"; import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -114,6 +117,8 @@ export type MSTeamsConfig = {
sharePointSiteId?: string; sharePointSiteId?: string;
/** Heartbeat visibility settings for this channel. */ /** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig; heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Outbound response prefix override for this channel/account. */ /** Outbound response prefix override for this channel/account. */
responsePrefix?: string; responsePrefix?: string;
}; };

View File

@ -5,7 +5,10 @@ import type {
MarkdownConfig, MarkdownConfig,
ReplyToMode, ReplyToMode,
} from "./types.base.js"; } from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js"; import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js"; import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js"; import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -185,6 +188,8 @@ export type SlackAccountConfig = {
channels?: Record<string, SlackChannelConfig>; channels?: Record<string, SlackChannelConfig>;
/** Heartbeat visibility settings for this channel. */ /** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig; heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Outbound response prefix override for this channel/account. */ /** Outbound response prefix override for this channel/account. */
responsePrefix?: string; responsePrefix?: string;
/** /**

View File

@ -8,7 +8,10 @@ import type {
ReplyToMode, ReplyToMode,
SessionThreadBindingsConfig, SessionThreadBindingsConfig,
} from "./types.base.js"; } from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js"; import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js"; import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js"; import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -179,6 +182,8 @@ export type TelegramAccountConfig = {
reactionLevel?: "off" | "ack" | "minimal" | "extensive"; reactionLevel?: "off" | "ack" | "minimal" | "extensive";
/** Heartbeat visibility settings for this channel. */ /** Heartbeat visibility settings for this channel. */
heartbeat?: ChannelHeartbeatVisibilityConfig; heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
/** Controls whether link previews are shown in outbound messages. Default: true. */ /** Controls whether link previews are shown in outbound messages. Default: true. */
linkPreview?: boolean; linkPreview?: boolean;
/** /**

View File

@ -4,7 +4,10 @@ import type {
GroupPolicy, GroupPolicy,
MarkdownConfig, MarkdownConfig,
} from "./types.base.js"; } from "./types.base.js";
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js"; import type {
ChannelHealthMonitorConfig,
ChannelHeartbeatVisibilityConfig,
} from "./types.channels.js";
import type { DmConfig } from "./types.messages.js"; import type { DmConfig } from "./types.messages.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js"; import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
@ -78,6 +81,8 @@ type WhatsAppSharedConfig = {
debounceMs?: number; debounceMs?: number;
/** Heartbeat visibility settings. */ /** Heartbeat visibility settings. */
heartbeat?: ChannelHeartbeatVisibilityConfig; heartbeat?: ChannelHeartbeatVisibilityConfig;
/** Channel health monitor overrides for this channel/account. */
healthMonitor?: ChannelHealthMonitorConfig;
}; };
type WhatsAppConfigCore = { type WhatsAppConfigCore = {

View File

@ -8,3 +8,10 @@ export const ChannelHeartbeatVisibilitySchema = z
}) })
.strict() .strict()
.optional(); .optional();
export const ChannelHealthMonitorSchema = z
.object({
enabled: z.boolean().optional(),
})
.strict()
.optional();

View File

@ -13,7 +13,10 @@ import {
resolveTelegramCustomCommands, resolveTelegramCustomCommands,
} from "./telegram-custom-commands.js"; } from "./telegram-custom-commands.js";
import { ToolPolicySchema } from "./zod-schema.agent-runtime.js"; import { ToolPolicySchema } from "./zod-schema.agent-runtime.js";
import { ChannelHeartbeatVisibilitySchema } from "./zod-schema.channels.js"; import {
ChannelHealthMonitorSchema,
ChannelHeartbeatVisibilitySchema,
} from "./zod-schema.channels.js";
import { import {
BlockStreamingChunkSchema, BlockStreamingChunkSchema,
BlockStreamingCoalesceSchema, BlockStreamingCoalesceSchema,
@ -271,6 +274,7 @@ export const TelegramAccountSchemaBase = z
reactionNotifications: z.enum(["off", "own", "all"]).optional(), reactionNotifications: z.enum(["off", "own", "all"]).optional(),
reactionLevel: z.enum(["off", "ack", "minimal", "extensive"]).optional(), reactionLevel: z.enum(["off", "ack", "minimal", "extensive"]).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema, heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
linkPreview: z.boolean().optional(), linkPreview: z.boolean().optional(),
responsePrefix: z.string().optional(), responsePrefix: z.string().optional(),
ackReaction: z.string().optional(), ackReaction: z.string().optional(),
@ -511,6 +515,7 @@ export const DiscordAccountSchema = z
dm: DiscordDmSchema.optional(), dm: DiscordDmSchema.optional(),
guilds: z.record(z.string(), DiscordGuildSchema.optional()).optional(), guilds: z.record(z.string(), DiscordGuildSchema.optional()).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema, heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
execApprovals: z execApprovals: z
.object({ .object({
enabled: z.boolean().optional(), enabled: z.boolean().optional(),
@ -782,6 +787,7 @@ export const GoogleChatAccountSchema = z
.strict() .strict()
.optional(), .optional(),
dm: GoogleChatDmSchema.optional(), dm: GoogleChatDmSchema.optional(),
healthMonitor: ChannelHealthMonitorSchema,
typingIndicator: z.enum(["none", "message", "reaction"]).optional(), typingIndicator: z.enum(["none", "message", "reaction"]).optional(),
responsePrefix: z.string().optional(), responsePrefix: z.string().optional(),
}) })
@ -898,6 +904,7 @@ export const SlackAccountSchema = z
dm: SlackDmSchema.optional(), dm: SlackDmSchema.optional(),
channels: z.record(z.string(), SlackChannelSchema.optional()).optional(), channels: z.record(z.string(), SlackChannelSchema.optional()).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema, heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(), responsePrefix: z.string().optional(),
ackReaction: z.string().optional(), ackReaction: z.string().optional(),
typingReaction: z.string().optional(), typingReaction: z.string().optional(),
@ -1032,6 +1039,7 @@ export const SignalAccountSchemaBase = z
.optional(), .optional(),
reactionLevel: z.enum(["off", "ack", "minimal", "extensive"]).optional(), reactionLevel: z.enum(["off", "ack", "minimal", "extensive"]).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema, heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(), responsePrefix: z.string().optional(),
}) })
.strict(); .strict();
@ -1145,6 +1153,7 @@ export const IrcAccountSchemaBase = z
blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
mediaMaxMb: z.number().positive().optional(), mediaMaxMb: z.number().positive().optional(),
heartbeat: ChannelHeartbeatVisibilitySchema, heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(), responsePrefix: z.string().optional(),
}) })
.strict(); .strict();
@ -1272,6 +1281,7 @@ export const IMessageAccountSchemaBase = z
) )
.optional(), .optional(),
heartbeat: ChannelHeartbeatVisibilitySchema, heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(), responsePrefix: z.string().optional(),
}) })
.strict(); .strict();
@ -1383,6 +1393,7 @@ export const BlueBubblesAccountSchemaBase = z
blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
groups: z.record(z.string(), BlueBubblesGroupConfigSchema.optional()).optional(), groups: z.record(z.string(), BlueBubblesGroupConfigSchema.optional()).optional(),
heartbeat: ChannelHeartbeatVisibilitySchema, heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(), responsePrefix: z.string().optional(),
}) })
.strict(); .strict();
@ -1499,6 +1510,7 @@ export const MSTeamsConfigSchema = z
/** SharePoint site ID for file uploads in group chats/channels (e.g., "contoso.sharepoint.com,guid1,guid2") */ /** SharePoint site ID for file uploads in group chats/channels (e.g., "contoso.sharepoint.com,guid1,guid2") */
sharePointSiteId: z.string().optional(), sharePointSiteId: z.string().optional(),
heartbeat: ChannelHeartbeatVisibilitySchema, heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
responsePrefix: z.string().optional(), responsePrefix: z.string().optional(),
}) })
.strict() .strict()

View File

@ -1,6 +1,9 @@
import { z } from "zod"; import { z } from "zod";
import { ToolPolicySchema } from "./zod-schema.agent-runtime.js"; import { ToolPolicySchema } from "./zod-schema.agent-runtime.js";
import { ChannelHeartbeatVisibilitySchema } from "./zod-schema.channels.js"; import {
ChannelHealthMonitorSchema,
ChannelHeartbeatVisibilitySchema,
} from "./zod-schema.channels.js";
import { import {
BlockStreamingCoalesceSchema, BlockStreamingCoalesceSchema,
DmConfigSchema, DmConfigSchema,
@ -56,6 +59,7 @@ const WhatsAppSharedSchema = z.object({
ackReaction: WhatsAppAckReactionSchema, ackReaction: WhatsAppAckReactionSchema,
debounceMs: z.number().int().nonnegative().optional().default(0), debounceMs: z.number().int().nonnegative().optional().default(0),
heartbeat: ChannelHeartbeatVisibilitySchema, heartbeat: ChannelHeartbeatVisibilitySchema,
healthMonitor: ChannelHealthMonitorSchema,
}); });
function enforceOpenDmPolicyAllowFromStar(params: { function enforceOpenDmPolicyAllowFromStar(params: {

View File

@ -696,6 +696,8 @@ export const OpenClawSchema = z
.strict() .strict()
.optional(), .optional(),
channelHealthCheckMinutes: z.number().int().min(0).optional(), channelHealthCheckMinutes: z.number().int().min(0).optional(),
channelStaleEventThresholdMinutes: z.number().int().min(1).optional(),
channelMaxRestartsPerHour: z.number().int().min(1).optional(),
tailscale: z tailscale: z
.object({ .object({
mode: z.union([z.literal("off"), z.literal("serve"), z.literal("funnel")]).optional(), mode: z.union([z.literal("off"), z.literal("serve"), z.literal("funnel")]).optional(),
@ -833,6 +835,21 @@ export const OpenClawSchema = z
.optional(), .optional(),
}) })
.strict() .strict()
.superRefine((gateway, ctx) => {
const effectiveHealthCheckMinutes = gateway.channelHealthCheckMinutes ?? 5;
if (
gateway.channelStaleEventThresholdMinutes != null &&
effectiveHealthCheckMinutes !== 0 &&
gateway.channelStaleEventThresholdMinutes < effectiveHealthCheckMinutes
) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ["channelStaleEventThresholdMinutes"],
message:
"channelStaleEventThresholdMinutes should be >= channelHealthCheckMinutes to avoid delayed stale detection",
});
}
})
.optional(), .optional(),
memory: MemorySchema, memory: MemorySchema,
skills: z skills: z

View File

@ -11,6 +11,7 @@ function createMockChannelManager(overrides?: Partial<ChannelManager>): ChannelM
startChannel: vi.fn(async () => {}), startChannel: vi.fn(async () => {}),
stopChannel: vi.fn(async () => {}), stopChannel: vi.fn(async () => {}),
markChannelLoggedOut: vi.fn(), markChannelLoggedOut: vi.fn(),
isHealthMonitorEnabled: vi.fn(() => true),
isManuallyStopped: vi.fn(() => false), isManuallyStopped: vi.fn(() => false),
resetRestartAttempts: vi.fn(), resetRestartAttempts: vi.fn(),
...overrides, ...overrides,
@ -226,6 +227,53 @@ describe("channel-health-monitor", () => {
await expectNoStart(manager); await expectNoStart(manager);
}); });
it("skips channels with health monitor disabled globally for that account", async () => {
const manager = createSnapshotManager(
{
discord: {
default: { running: false, enabled: true, configured: true },
},
},
{ isHealthMonitorEnabled: vi.fn(() => false) },
);
await expectNoStart(manager);
});
it("still restarts enabled accounts when another account on the same channel is disabled", async () => {
const now = Date.now();
const manager = createSnapshotManager(
{
discord: {
default: {
running: true,
connected: false,
enabled: true,
configured: true,
lastStartAt: now - 300_000,
},
quiet: {
running: true,
connected: false,
enabled: true,
configured: true,
lastStartAt: now - 300_000,
},
},
},
{
isHealthMonitorEnabled: vi.fn((channelId: ChannelId, accountId: string) => {
return !(channelId === "discord" && accountId === "quiet");
}),
},
);
const monitor = await startAndRunCheck(manager);
expect(manager.stopChannel).toHaveBeenCalledWith("discord", "default");
expect(manager.startChannel).toHaveBeenCalledWith("discord", "default");
expect(manager.stopChannel).not.toHaveBeenCalledWith("discord", "quiet");
expect(manager.startChannel).not.toHaveBeenCalledWith("discord", "quiet");
monitor.stop();
});
it("restarts a stuck channel (running but not connected)", async () => { it("restarts a stuck channel (running but not connected)", async () => {
const now = Date.now(); const now = Date.now();
const manager = createSnapshotManager({ const manager = createSnapshotManager({

View File

@ -118,6 +118,9 @@ export function startChannelHealthMonitor(deps: ChannelHealthMonitorDeps): Chann
if (!status) { if (!status) {
continue; continue;
} }
if (!channelManager.isHealthMonitorEnabled(channelId as ChannelId, accountId)) {
continue;
}
if (channelManager.isManuallyStopped(channelId as ChannelId, accountId)) { if (channelManager.isManuallyStopped(channelId as ChannelId, accountId)) {
continue; continue;
} }

View File

@ -41,6 +41,16 @@ const BASE_RELOAD_RULES: ReloadRule[] = [
kind: "hot", kind: "hot",
actions: ["restart-health-monitor"], actions: ["restart-health-monitor"],
}, },
{
prefix: "gateway.channelStaleEventThresholdMinutes",
kind: "hot",
actions: ["restart-health-monitor"],
},
{
prefix: "gateway.channelMaxRestartsPerHour",
kind: "hot",
actions: ["restart-health-monitor"],
},
// Stuck-session warning threshold is read by the diagnostics heartbeat loop. // Stuck-session warning threshold is read by the diagnostics heartbeat loop.
{ prefix: "diagnostics.stuckSessionWarnMs", kind: "none" }, { prefix: "diagnostics.stuckSessionWarnMs", kind: "none" },
{ prefix: "hooks.gmail", kind: "hot", actions: ["restart-gmail-watcher"] }, { prefix: "hooks.gmail", kind: "hot", actions: ["restart-gmail-watcher"] },

View File

@ -44,12 +44,13 @@ function createTestPlugin(params?: {
account?: TestAccount; account?: TestAccount;
startAccount?: NonNullable<ChannelPlugin<TestAccount>["gateway"]>["startAccount"]; startAccount?: NonNullable<ChannelPlugin<TestAccount>["gateway"]>["startAccount"];
includeDescribeAccount?: boolean; includeDescribeAccount?: boolean;
resolveAccount?: ChannelPlugin<TestAccount>["config"]["resolveAccount"];
}): ChannelPlugin<TestAccount> { }): ChannelPlugin<TestAccount> {
const account = params?.account ?? { enabled: true, configured: true }; const account = params?.account ?? { enabled: true, configured: true };
const includeDescribeAccount = params?.includeDescribeAccount !== false; const includeDescribeAccount = params?.includeDescribeAccount !== false;
const config: ChannelPlugin<TestAccount>["config"] = { const config: ChannelPlugin<TestAccount>["config"] = {
listAccountIds: () => [DEFAULT_ACCOUNT_ID], listAccountIds: () => [DEFAULT_ACCOUNT_ID],
resolveAccount: () => account, resolveAccount: params?.resolveAccount ?? (() => account),
isEnabled: (resolved) => resolved.enabled !== false, isEnabled: (resolved) => resolved.enabled !== false,
}; };
if (includeDescribeAccount) { if (includeDescribeAccount) {
@ -88,13 +89,16 @@ function installTestRegistry(plugin: ChannelPlugin<TestAccount>) {
setActivePluginRegistry(registry); setActivePluginRegistry(registry);
} }
function createManager(options?: { channelRuntime?: PluginRuntime["channel"] }) { function createManager(options?: {
channelRuntime?: PluginRuntime["channel"];
loadConfig?: () => Record<string, unknown>;
}) {
const log = createSubsystemLogger("gateway/server-channels-test"); const log = createSubsystemLogger("gateway/server-channels-test");
const channelLogs = { discord: log } as Record<ChannelId, SubsystemLogger>; const channelLogs = { discord: log } as Record<ChannelId, SubsystemLogger>;
const runtime = runtimeForLogger(log); const runtime = runtimeForLogger(log);
const channelRuntimeEnvs = { discord: runtime } as Record<ChannelId, RuntimeEnv>; const channelRuntimeEnvs = { discord: runtime } as Record<ChannelId, RuntimeEnv>;
return createChannelManager({ return createChannelManager({
loadConfig: () => ({}), loadConfig: () => options?.loadConfig?.() ?? {},
channelLogs, channelLogs,
channelRuntimeEnvs, channelRuntimeEnvs,
...(options?.channelRuntime ? { channelRuntime: options.channelRuntime } : {}), ...(options?.channelRuntime ? { channelRuntime: options.channelRuntime } : {}),
@ -180,4 +184,104 @@ describe("server-channels auto restart", () => {
await manager.startChannels(); await manager.startChannels();
expect(startAccount).toHaveBeenCalledTimes(1); expect(startAccount).toHaveBeenCalledTimes(1);
}); });
it("reuses plugin account resolution for health monitor overrides", () => {
installTestRegistry(
createTestPlugin({
resolveAccount: (cfg, accountId) => {
const accounts = (
cfg as {
channels?: {
discord?: {
accounts?: Record<
string,
TestAccount & { healthMonitor?: { enabled?: boolean } }
>;
};
};
}
).channels?.discord?.accounts;
if (!accounts) {
return { enabled: true, configured: true };
}
const direct = accounts[accountId ?? DEFAULT_ACCOUNT_ID];
if (direct) {
return direct;
}
const normalized = (accountId ?? DEFAULT_ACCOUNT_ID).toLowerCase().replaceAll(" ", "-");
const matchKey = Object.keys(accounts).find(
(key) => key.toLowerCase().replaceAll(" ", "-") === normalized,
);
return matchKey ? (accounts[matchKey] ?? { enabled: true, configured: true }) : {};
},
}),
);
const manager = createManager({
loadConfig: () => ({
channels: {
discord: {
accounts: {
"Router D": {
enabled: true,
configured: true,
healthMonitor: { enabled: false },
},
},
},
},
}),
});
expect(manager.isHealthMonitorEnabled("discord", "router-d")).toBe(false);
});
it("falls back to channel-level health monitor overrides when account resolution omits them", () => {
installTestRegistry(
createTestPlugin({
resolveAccount: () => ({
enabled: true,
configured: true,
}),
}),
);
const manager = createManager({
loadConfig: () => ({
channels: {
discord: {
healthMonitor: { enabled: false },
},
},
}),
});
expect(manager.isHealthMonitorEnabled("discord", DEFAULT_ACCOUNT_ID)).toBe(false);
});
it("uses wrapped account config health monitor overrides", () => {
installTestRegistry(
createTestPlugin({
resolveAccount: () => ({
enabled: true,
configured: true,
config: {
healthMonitor: { enabled: false },
},
}),
}),
);
const manager = createManager({
loadConfig: () => ({
channels: {
discord: {
healthMonitor: { enabled: true },
},
},
}),
});
expect(manager.isHealthMonitorEnabled("discord", DEFAULT_ACCOUNT_ID)).toBe(false);
});
}); });

View File

@ -105,6 +105,7 @@ export type ChannelManager = {
markChannelLoggedOut: (channelId: ChannelId, cleared: boolean, accountId?: string) => void; markChannelLoggedOut: (channelId: ChannelId, cleared: boolean, accountId?: string) => void;
isManuallyStopped: (channelId: ChannelId, accountId: string) => boolean; isManuallyStopped: (channelId: ChannelId, accountId: string) => boolean;
resetRestartAttempts: (channelId: ChannelId, accountId: string) => void; resetRestartAttempts: (channelId: ChannelId, accountId: string) => void;
isHealthMonitorEnabled: (channelId: ChannelId, accountId: string) => boolean;
}; };
// Channel docking: lifecycle hooks (`plugin.gateway`) flow through this manager. // Channel docking: lifecycle hooks (`plugin.gateway`) flow through this manager.
@ -119,6 +120,48 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
const restartKey = (channelId: ChannelId, accountId: string) => `${channelId}:${accountId}`; const restartKey = (channelId: ChannelId, accountId: string) => `${channelId}:${accountId}`;
const isHealthMonitorEnabled = (channelId: ChannelId, accountId: string): boolean => {
const cfg = loadConfig();
const plugin = getChannelPlugin(channelId);
const resolvedAccount = plugin?.config.resolveAccount(cfg, accountId) as
| {
healthMonitor?: {
enabled?: boolean;
};
config?: {
healthMonitor?: {
enabled?: boolean;
};
};
}
| undefined;
const accountOverride = resolvedAccount?.healthMonitor?.enabled;
const wrappedAccountOverride = resolvedAccount?.config?.healthMonitor?.enabled;
const channelOverride = (
cfg.channels?.[channelId] as
| {
healthMonitor?: {
enabled?: boolean;
};
}
| undefined
)?.healthMonitor?.enabled;
if (typeof accountOverride === "boolean") {
return accountOverride;
}
if (typeof wrappedAccountOverride === "boolean") {
return wrappedAccountOverride;
}
if (typeof channelOverride === "boolean") {
return channelOverride;
}
return true;
};
const getStore = (channelId: ChannelId): ChannelRuntimeStore => { const getStore = (channelId: ChannelId): ChannelRuntimeStore => {
const existing = channelStores.get(channelId); const existing = channelStores.get(channelId);
if (existing) { if (existing) {
@ -453,5 +496,6 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
markChannelLoggedOut, markChannelLoggedOut,
isManuallyStopped: isManuallyStopped_, isManuallyStopped: isManuallyStopped_,
resetRestartAttempts: resetRestartAttempts_, resetRestartAttempts: resetRestartAttempts_,
isHealthMonitorEnabled,
}; };
} }

View File

@ -50,7 +50,11 @@ export function createGatewayReloadHandlers(params: {
logChannels: { info: (msg: string) => void; error: (msg: string) => void }; logChannels: { info: (msg: string) => void; error: (msg: string) => void };
logCron: { error: (msg: string) => void }; logCron: { error: (msg: string) => void };
logReload: { info: (msg: string) => void; warn: (msg: string) => void }; logReload: { info: (msg: string) => void; warn: (msg: string) => void };
createHealthMonitor: (checkIntervalMs: number) => ChannelHealthMonitor; createHealthMonitor: (opts: {
checkIntervalMs: number;
staleEventThresholdMs?: number;
maxRestartsPerHour?: number;
}) => ChannelHealthMonitor;
}) { }) {
const applyHotReload = async ( const applyHotReload = async (
plan: GatewayReloadPlan, plan: GatewayReloadPlan,
@ -101,8 +105,17 @@ export function createGatewayReloadHandlers(params: {
if (plan.restartHealthMonitor) { if (plan.restartHealthMonitor) {
state.channelHealthMonitor?.stop(); state.channelHealthMonitor?.stop();
const minutes = nextConfig.gateway?.channelHealthCheckMinutes; const minutes = nextConfig.gateway?.channelHealthCheckMinutes;
const staleMinutes = nextConfig.gateway?.channelStaleEventThresholdMinutes;
nextState.channelHealthMonitor = nextState.channelHealthMonitor =
minutes === 0 ? null : params.createHealthMonitor((minutes ?? 5) * 60_000); minutes === 0
? null
: params.createHealthMonitor({
checkIntervalMs: (minutes ?? 5) * 60_000,
...(staleMinutes != null && { staleEventThresholdMs: staleMinutes * 60_000 }),
...(nextConfig.gateway?.channelMaxRestartsPerHour != null && {
maxRestartsPerHour: nextConfig.gateway.channelMaxRestartsPerHour,
}),
});
} }
if (plan.restartGmailWatcher) { if (plan.restartGmailWatcher) {

View File

@ -757,11 +757,17 @@ export async function startGatewayServer(
const healthCheckMinutes = cfgAtStart.gateway?.channelHealthCheckMinutes; const healthCheckMinutes = cfgAtStart.gateway?.channelHealthCheckMinutes;
const healthCheckDisabled = healthCheckMinutes === 0; const healthCheckDisabled = healthCheckMinutes === 0;
const staleEventThresholdMinutes = cfgAtStart.gateway?.channelStaleEventThresholdMinutes;
const maxRestartsPerHour = cfgAtStart.gateway?.channelMaxRestartsPerHour;
let channelHealthMonitor = healthCheckDisabled let channelHealthMonitor = healthCheckDisabled
? null ? null
: startChannelHealthMonitor({ : startChannelHealthMonitor({
channelManager, channelManager,
checkIntervalMs: (healthCheckMinutes ?? 5) * 60_000, checkIntervalMs: (healthCheckMinutes ?? 5) * 60_000,
...(staleEventThresholdMinutes != null && {
staleEventThresholdMs: staleEventThresholdMinutes * 60_000,
}),
...(maxRestartsPerHour != null && { maxRestartsPerHour }),
}); });
if (!minimalTestGateway) { if (!minimalTestGateway) {
@ -980,8 +986,21 @@ export async function startGatewayServer(
logChannels, logChannels,
logCron, logCron,
logReload, logReload,
createHealthMonitor: (checkIntervalMs: number) => createHealthMonitor: (opts: {
startChannelHealthMonitor({ channelManager, checkIntervalMs }), checkIntervalMs: number;
staleEventThresholdMs?: number;
maxRestartsPerHour?: number;
}) =>
startChannelHealthMonitor({
channelManager,
checkIntervalMs: opts.checkIntervalMs,
...(opts.staleEventThresholdMs != null && {
staleEventThresholdMs: opts.staleEventThresholdMs,
}),
...(opts.maxRestartsPerHour != null && {
maxRestartsPerHour: opts.maxRestartsPerHour,
}),
}),
}); });
return startGatewayConfigReloader({ return startGatewayConfigReloader({

View File

@ -109,6 +109,9 @@ const hoisted = vi.hoisted(() => {
startChannel: vi.fn(async () => {}), startChannel: vi.fn(async () => {}),
stopChannel: vi.fn(async () => {}), stopChannel: vi.fn(async () => {}),
markChannelLoggedOut: vi.fn(), markChannelLoggedOut: vi.fn(),
isHealthMonitorEnabled: vi.fn(() => true),
isManuallyStopped: vi.fn(() => false),
resetRestartAttempts: vi.fn(),
}; };
const createChannelManager = vi.fn(() => providerManager); const createChannelManager = vi.fn(() => providerManager);

View File

@ -26,6 +26,7 @@ function createManager(snapshot: ChannelRuntimeSnapshot): ChannelManager {
startChannel: vi.fn(), startChannel: vi.fn(),
stopChannel: vi.fn(), stopChannel: vi.fn(),
markChannelLoggedOut: vi.fn(), markChannelLoggedOut: vi.fn(),
isHealthMonitorEnabled: vi.fn(() => true),
isManuallyStopped: vi.fn(() => false), isManuallyStopped: vi.fn(() => false),
resetRestartAttempts: vi.fn(), resetRestartAttempts: vi.fn(),
}; };