mirror of https://github.com/openclaw/openclaw.git
feat(gateway): make health monitor stale threshold and max restarts configurable (openclaw#42107)
Verified: - pnpm exec vitest --run src/config/config-misc.test.ts -t "gateway.channelHealthCheckMinutes" - pnpm exec vitest --run src/gateway/server-channels.test.ts -t "health monitor" - pnpm exec vitest --run src/gateway/channel-health-monitor.test.ts src/gateway/server/readiness.test.ts - pnpm exec vitest --run extensions/feishu/src/outbound.test.ts - pnpm exec tsc --noEmit Co-authored-by: rstar327 <114364448+rstar327@users.noreply.github.com> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
parent
f00db91590
commit
ba6064cc22
|
|
@ -12314,14 +12314,14 @@
|
|||
"filename": "src/config/schema.help.ts",
|
||||
"hashed_secret": "9f4cda226d3868676ac7f86f59e4190eb94bd208",
|
||||
"is_verified": false,
|
||||
"line_number": 653
|
||||
"line_number": 657
|
||||
},
|
||||
{
|
||||
"type": "Secret Keyword",
|
||||
"filename": "src/config/schema.help.ts",
|
||||
"hashed_secret": "01822c8bbf6a8b136944b14182cb885100ec2eae",
|
||||
"is_verified": false,
|
||||
"line_number": 686
|
||||
"line_number": 690
|
||||
}
|
||||
],
|
||||
"src/config/schema.irc.ts": [
|
||||
|
|
@ -12360,14 +12360,14 @@
|
|||
"filename": "src/config/schema.labels.ts",
|
||||
"hashed_secret": "e73c9fcad85cd4eecc74181ec4bdb31064d68439",
|
||||
"is_verified": false,
|
||||
"line_number": 217
|
||||
"line_number": 219
|
||||
},
|
||||
{
|
||||
"type": "Secret Keyword",
|
||||
"filename": "src/config/schema.labels.ts",
|
||||
"hashed_secret": "2eda7cd978f39eebec3bf03e4410a40e14167fff",
|
||||
"is_verified": false,
|
||||
"line_number": 326
|
||||
"line_number": 328
|
||||
}
|
||||
],
|
||||
"src/config/slack-http-config.test.ts": [
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
|
|||
- Refactor/channels: remove the legacy channel shim directories and point channel-specific imports directly at the extension-owned implementations. (#45967) thanks @scoootscooob.
|
||||
- Feishu/streaming: add `onReasoningStream` and `onReasoningEnd` support to streaming cards, so `/reasoning stream` renders thinking tokens as markdown blockquotes in the same card — matching the Telegram channel's reasoning lane behavior.
|
||||
- Feishu/cards: add identity-aware structured card headers and note footers for Feishu replies and direct sends, while keeping that presentation wired through the shared outbound identity path. (#29938) Thanks @nszhsl.
|
||||
- Gateway/health monitor: add configurable stale-event thresholds and restart limits, plus per-channel and per-account `healthMonitor.enabled` overrides, while keeping the existing global disable path on `gateway.channelHealthCheckMinutes=0`. (#42107) Thanks @rstar327.
|
||||
|
||||
### Fixes
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,10 @@ export type BlueBubblesAccountConfig = {
|
|||
allowPrivateNetwork?: boolean;
|
||||
/** Per-group configuration keyed by chat GUID or identifier. */
|
||||
groups?: Record<string, BlueBubblesGroupConfig>;
|
||||
/** Channel health monitor overrides for this channel/account. */
|
||||
healthMonitor?: {
|
||||
enabled?: boolean;
|
||||
};
|
||||
};
|
||||
|
||||
export type BlueBubblesActionConfig = {
|
||||
|
|
|
|||
|
|
@ -212,6 +212,49 @@ describe("gateway.channelHealthCheckMinutes", () => {
|
|||
expect(res.issues[0]?.path).toBe("gateway.channelHealthCheckMinutes");
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects stale thresholds shorter than the health check interval", () => {
|
||||
const res = validateConfigObject({
|
||||
gateway: {
|
||||
channelHealthCheckMinutes: 5,
|
||||
channelStaleEventThresholdMinutes: 4,
|
||||
},
|
||||
});
|
||||
expect(res.ok).toBe(false);
|
||||
if (!res.ok) {
|
||||
expect(res.issues[0]?.path).toBe("gateway.channelStaleEventThresholdMinutes");
|
||||
}
|
||||
});
|
||||
|
||||
it("accepts stale thresholds that match or exceed the health check interval", () => {
|
||||
const equal = validateConfigObject({
|
||||
gateway: {
|
||||
channelHealthCheckMinutes: 5,
|
||||
channelStaleEventThresholdMinutes: 5,
|
||||
},
|
||||
});
|
||||
expect(equal.ok).toBe(true);
|
||||
|
||||
const greater = validateConfigObject({
|
||||
gateway: {
|
||||
channelHealthCheckMinutes: 5,
|
||||
channelStaleEventThresholdMinutes: 6,
|
||||
},
|
||||
});
|
||||
expect(greater.ok).toBe(true);
|
||||
});
|
||||
|
||||
it("rejects stale thresholds shorter than the default health check interval", () => {
|
||||
const res = validateConfigObject({
|
||||
gateway: {
|
||||
channelStaleEventThresholdMinutes: 4,
|
||||
},
|
||||
});
|
||||
expect(res.ok).toBe(false);
|
||||
if (!res.ok) {
|
||||
expect(res.issues[0]?.path).toBe("gateway.channelStaleEventThresholdMinutes");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("cron webhook schema", () => {
|
||||
|
|
|
|||
|
|
@ -102,6 +102,10 @@ export const FIELD_HELP: Record<string, string> = {
|
|||
"Explicit gateway-level tool denylist to block risky tools even if lower-level policies allow them. Use deny rules for emergency response and defense-in-depth hardening.",
|
||||
"gateway.channelHealthCheckMinutes":
|
||||
"Interval in minutes for automatic channel health probing and status updates. Use lower intervals for faster detection, or higher intervals to reduce periodic probe noise.",
|
||||
"gateway.channelStaleEventThresholdMinutes":
|
||||
"How many minutes a connected channel can go without receiving any event before the health monitor treats it as a stale socket and triggers a restart. Default: 30.",
|
||||
"gateway.channelMaxRestartsPerHour":
|
||||
"Maximum number of health-monitor-initiated channel restarts allowed within a rolling one-hour window. Once hit, further restarts are skipped until the window expires. Default: 10.",
|
||||
"gateway.tailscale":
|
||||
"Tailscale integration settings for Serve/Funnel exposure and lifecycle handling on gateway start/exit. Keep off unless your deployment intentionally relies on Tailscale ingress.",
|
||||
"gateway.tailscale.mode":
|
||||
|
|
|
|||
|
|
@ -84,6 +84,8 @@ export const FIELD_LABELS: Record<string, string> = {
|
|||
"gateway.tools.allow": "Gateway Tool Allowlist",
|
||||
"gateway.tools.deny": "Gateway Tool Denylist",
|
||||
"gateway.channelHealthCheckMinutes": "Gateway Channel Health Check Interval (min)",
|
||||
"gateway.channelStaleEventThresholdMinutes": "Gateway Channel Stale Event Threshold (min)",
|
||||
"gateway.channelMaxRestartsPerHour": "Gateway Channel Max Restarts Per Hour",
|
||||
"gateway.tailscale": "Gateway Tailscale",
|
||||
"gateway.tailscale.mode": "Gateway Tailscale Mode",
|
||||
"gateway.tailscale.resetOnExit": "Gateway Tailscale Reset on Exit",
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@ import type {
|
|||
GroupPolicy,
|
||||
MarkdownConfig,
|
||||
} from "./types.base.js";
|
||||
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
|
||||
import type {
|
||||
ChannelHealthMonitorConfig,
|
||||
ChannelHeartbeatVisibilityConfig,
|
||||
} from "./types.channels.js";
|
||||
import type { DmConfig } from "./types.messages.js";
|
||||
|
||||
export type CommonChannelMessagingConfig = {
|
||||
|
|
@ -43,6 +46,8 @@ export type CommonChannelMessagingConfig = {
|
|||
blockStreamingCoalesce?: BlockStreamingCoalesceConfig;
|
||||
/** Heartbeat visibility settings for this channel. */
|
||||
heartbeat?: ChannelHeartbeatVisibilityConfig;
|
||||
/** Channel health monitor overrides for this channel/account. */
|
||||
healthMonitor?: ChannelHealthMonitorConfig;
|
||||
/** Outbound response prefix override for this channel/account. */
|
||||
responsePrefix?: string;
|
||||
/** Max outbound media size in MB. */
|
||||
|
|
|
|||
|
|
@ -18,6 +18,14 @@ export type ChannelHeartbeatVisibilityConfig = {
|
|||
useIndicator?: boolean;
|
||||
};
|
||||
|
||||
export type ChannelHealthMonitorConfig = {
|
||||
/**
|
||||
* Enable channel-health-monitor restarts for this channel or account.
|
||||
* Inherits the global gateway setting when omitted.
|
||||
*/
|
||||
enabled?: boolean;
|
||||
};
|
||||
|
||||
export type ChannelDefaultsConfig = {
|
||||
groupPolicy?: GroupPolicy;
|
||||
/** Default heartbeat visibility for all channels. */
|
||||
|
|
@ -39,6 +47,7 @@ export type ExtensionChannelConfig = {
|
|||
defaultAccount?: string;
|
||||
dmPolicy?: string;
|
||||
groupPolicy?: GroupPolicy;
|
||||
healthMonitor?: ChannelHealthMonitorConfig;
|
||||
accounts?: Record<string, unknown>;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -8,7 +8,10 @@ import type {
|
|||
OutboundRetryConfig,
|
||||
ReplyToMode,
|
||||
} from "./types.base.js";
|
||||
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
|
||||
import type {
|
||||
ChannelHealthMonitorConfig,
|
||||
ChannelHeartbeatVisibilityConfig,
|
||||
} from "./types.channels.js";
|
||||
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
|
||||
import type { SecretInput } from "./types.secrets.js";
|
||||
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
|
||||
|
|
@ -297,6 +300,8 @@ export type DiscordAccountConfig = {
|
|||
guilds?: Record<string, DiscordGuildEntry>;
|
||||
/** Heartbeat visibility settings for this channel. */
|
||||
heartbeat?: ChannelHeartbeatVisibilityConfig;
|
||||
/** Channel health monitor overrides for this channel/account. */
|
||||
healthMonitor?: ChannelHealthMonitorConfig;
|
||||
/** Exec approval forwarding configuration. */
|
||||
execApprovals?: DiscordExecApprovalConfig;
|
||||
/** Agent-controlled interactive components (buttons, select menus). */
|
||||
|
|
|
|||
|
|
@ -431,4 +431,16 @@ export type GatewayConfig = {
|
|||
* Set to 0 to disable. Default: 5.
|
||||
*/
|
||||
channelHealthCheckMinutes?: number;
|
||||
/**
|
||||
* Stale event threshold in minutes for the channel health monitor.
|
||||
* A connected channel that receives no events for this duration is treated
|
||||
* as a stale socket and restarted. Default: 30.
|
||||
*/
|
||||
channelStaleEventThresholdMinutes?: number;
|
||||
/**
|
||||
* Maximum number of health-monitor-initiated channel restarts per hour.
|
||||
* Once this limit is reached, the monitor skips further restarts until
|
||||
* the rolling window expires. Default: 10.
|
||||
*/
|
||||
channelMaxRestartsPerHour?: number;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import type {
|
|||
GroupPolicy,
|
||||
ReplyToMode,
|
||||
} from "./types.base.js";
|
||||
import type { ChannelHealthMonitorConfig } from "./types.channels.js";
|
||||
import type { DmConfig } from "./types.messages.js";
|
||||
import type { SecretRef } from "./types.secrets.js";
|
||||
|
||||
|
|
@ -99,6 +100,8 @@ export type GoogleChatAccountConfig = {
|
|||
/** Per-action tool gating (default: true for all). */
|
||||
actions?: GoogleChatActionConfig;
|
||||
dm?: GoogleChatDmConfig;
|
||||
/** Channel health monitor overrides for this channel/account. */
|
||||
healthMonitor?: ChannelHealthMonitorConfig;
|
||||
/**
|
||||
* Typing indicator mode (default: "message").
|
||||
* - "none": No indicator
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@ import type {
|
|||
GroupPolicy,
|
||||
MarkdownConfig,
|
||||
} from "./types.base.js";
|
||||
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
|
||||
import type {
|
||||
ChannelHealthMonitorConfig,
|
||||
ChannelHeartbeatVisibilityConfig,
|
||||
} from "./types.channels.js";
|
||||
import type { DmConfig } from "./types.messages.js";
|
||||
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
|
||||
|
||||
|
|
@ -77,6 +80,8 @@ export type IMessageAccountConfig = {
|
|||
>;
|
||||
/** Heartbeat visibility settings for this channel. */
|
||||
heartbeat?: ChannelHeartbeatVisibilityConfig;
|
||||
/** Channel health monitor overrides for this channel/account. */
|
||||
healthMonitor?: ChannelHealthMonitorConfig;
|
||||
/** Outbound response prefix override for this channel/account. */
|
||||
responsePrefix?: string;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@ import type {
|
|||
GroupPolicy,
|
||||
MarkdownConfig,
|
||||
} from "./types.base.js";
|
||||
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
|
||||
import type {
|
||||
ChannelHealthMonitorConfig,
|
||||
ChannelHeartbeatVisibilityConfig,
|
||||
} from "./types.channels.js";
|
||||
import type { DmConfig } from "./types.messages.js";
|
||||
import type { SecretInput } from "./types.secrets.js";
|
||||
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
|
||||
|
|
@ -114,6 +117,8 @@ export type MSTeamsConfig = {
|
|||
sharePointSiteId?: string;
|
||||
/** Heartbeat visibility settings for this channel. */
|
||||
heartbeat?: ChannelHeartbeatVisibilityConfig;
|
||||
/** Channel health monitor overrides for this channel. */
|
||||
healthMonitor?: ChannelHealthMonitorConfig;
|
||||
/** Outbound response prefix override for this channel/account. */
|
||||
responsePrefix?: string;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -5,7 +5,10 @@ import type {
|
|||
MarkdownConfig,
|
||||
ReplyToMode,
|
||||
} from "./types.base.js";
|
||||
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
|
||||
import type {
|
||||
ChannelHealthMonitorConfig,
|
||||
ChannelHeartbeatVisibilityConfig,
|
||||
} from "./types.channels.js";
|
||||
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
|
||||
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
|
||||
|
||||
|
|
@ -185,6 +188,8 @@ export type SlackAccountConfig = {
|
|||
channels?: Record<string, SlackChannelConfig>;
|
||||
/** Heartbeat visibility settings for this channel. */
|
||||
heartbeat?: ChannelHeartbeatVisibilityConfig;
|
||||
/** Channel health monitor overrides for this channel/account. */
|
||||
healthMonitor?: ChannelHealthMonitorConfig;
|
||||
/** Outbound response prefix override for this channel/account. */
|
||||
responsePrefix?: string;
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -8,7 +8,10 @@ import type {
|
|||
ReplyToMode,
|
||||
SessionThreadBindingsConfig,
|
||||
} from "./types.base.js";
|
||||
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
|
||||
import type {
|
||||
ChannelHealthMonitorConfig,
|
||||
ChannelHeartbeatVisibilityConfig,
|
||||
} from "./types.channels.js";
|
||||
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
|
||||
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
|
||||
|
||||
|
|
@ -179,6 +182,8 @@ export type TelegramAccountConfig = {
|
|||
reactionLevel?: "off" | "ack" | "minimal" | "extensive";
|
||||
/** Heartbeat visibility settings for this channel. */
|
||||
heartbeat?: ChannelHeartbeatVisibilityConfig;
|
||||
/** Channel health monitor overrides for this channel/account. */
|
||||
healthMonitor?: ChannelHealthMonitorConfig;
|
||||
/** Controls whether link previews are shown in outbound messages. Default: true. */
|
||||
linkPreview?: boolean;
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@ import type {
|
|||
GroupPolicy,
|
||||
MarkdownConfig,
|
||||
} from "./types.base.js";
|
||||
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
|
||||
import type {
|
||||
ChannelHealthMonitorConfig,
|
||||
ChannelHeartbeatVisibilityConfig,
|
||||
} from "./types.channels.js";
|
||||
import type { DmConfig } from "./types.messages.js";
|
||||
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
|
||||
|
||||
|
|
@ -78,6 +81,8 @@ type WhatsAppSharedConfig = {
|
|||
debounceMs?: number;
|
||||
/** Heartbeat visibility settings. */
|
||||
heartbeat?: ChannelHeartbeatVisibilityConfig;
|
||||
/** Channel health monitor overrides for this channel/account. */
|
||||
healthMonitor?: ChannelHealthMonitorConfig;
|
||||
};
|
||||
|
||||
type WhatsAppConfigCore = {
|
||||
|
|
|
|||
|
|
@ -8,3 +8,10 @@ export const ChannelHeartbeatVisibilitySchema = z
|
|||
})
|
||||
.strict()
|
||||
.optional();
|
||||
|
||||
export const ChannelHealthMonitorSchema = z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional();
|
||||
|
|
|
|||
|
|
@ -13,7 +13,10 @@ import {
|
|||
resolveTelegramCustomCommands,
|
||||
} from "./telegram-custom-commands.js";
|
||||
import { ToolPolicySchema } from "./zod-schema.agent-runtime.js";
|
||||
import { ChannelHeartbeatVisibilitySchema } from "./zod-schema.channels.js";
|
||||
import {
|
||||
ChannelHealthMonitorSchema,
|
||||
ChannelHeartbeatVisibilitySchema,
|
||||
} from "./zod-schema.channels.js";
|
||||
import {
|
||||
BlockStreamingChunkSchema,
|
||||
BlockStreamingCoalesceSchema,
|
||||
|
|
@ -271,6 +274,7 @@ export const TelegramAccountSchemaBase = z
|
|||
reactionNotifications: z.enum(["off", "own", "all"]).optional(),
|
||||
reactionLevel: z.enum(["off", "ack", "minimal", "extensive"]).optional(),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
linkPreview: z.boolean().optional(),
|
||||
responsePrefix: z.string().optional(),
|
||||
ackReaction: z.string().optional(),
|
||||
|
|
@ -511,6 +515,7 @@ export const DiscordAccountSchema = z
|
|||
dm: DiscordDmSchema.optional(),
|
||||
guilds: z.record(z.string(), DiscordGuildSchema.optional()).optional(),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
execApprovals: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
|
|
@ -782,6 +787,7 @@ export const GoogleChatAccountSchema = z
|
|||
.strict()
|
||||
.optional(),
|
||||
dm: GoogleChatDmSchema.optional(),
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
typingIndicator: z.enum(["none", "message", "reaction"]).optional(),
|
||||
responsePrefix: z.string().optional(),
|
||||
})
|
||||
|
|
@ -898,6 +904,7 @@ export const SlackAccountSchema = z
|
|||
dm: SlackDmSchema.optional(),
|
||||
channels: z.record(z.string(), SlackChannelSchema.optional()).optional(),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
responsePrefix: z.string().optional(),
|
||||
ackReaction: z.string().optional(),
|
||||
typingReaction: z.string().optional(),
|
||||
|
|
@ -1032,6 +1039,7 @@ export const SignalAccountSchemaBase = z
|
|||
.optional(),
|
||||
reactionLevel: z.enum(["off", "ack", "minimal", "extensive"]).optional(),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
responsePrefix: z.string().optional(),
|
||||
})
|
||||
.strict();
|
||||
|
|
@ -1145,6 +1153,7 @@ export const IrcAccountSchemaBase = z
|
|||
blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
|
||||
mediaMaxMb: z.number().positive().optional(),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
responsePrefix: z.string().optional(),
|
||||
})
|
||||
.strict();
|
||||
|
|
@ -1272,6 +1281,7 @@ export const IMessageAccountSchemaBase = z
|
|||
)
|
||||
.optional(),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
responsePrefix: z.string().optional(),
|
||||
})
|
||||
.strict();
|
||||
|
|
@ -1383,6 +1393,7 @@ export const BlueBubblesAccountSchemaBase = z
|
|||
blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(),
|
||||
groups: z.record(z.string(), BlueBubblesGroupConfigSchema.optional()).optional(),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
responsePrefix: z.string().optional(),
|
||||
})
|
||||
.strict();
|
||||
|
|
@ -1499,6 +1510,7 @@ export const MSTeamsConfigSchema = z
|
|||
/** SharePoint site ID for file uploads in group chats/channels (e.g., "contoso.sharepoint.com,guid1,guid2") */
|
||||
sharePointSiteId: z.string().optional(),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
responsePrefix: z.string().optional(),
|
||||
})
|
||||
.strict()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
import { z } from "zod";
|
||||
import { ToolPolicySchema } from "./zod-schema.agent-runtime.js";
|
||||
import { ChannelHeartbeatVisibilitySchema } from "./zod-schema.channels.js";
|
||||
import {
|
||||
ChannelHealthMonitorSchema,
|
||||
ChannelHeartbeatVisibilitySchema,
|
||||
} from "./zod-schema.channels.js";
|
||||
import {
|
||||
BlockStreamingCoalesceSchema,
|
||||
DmConfigSchema,
|
||||
|
|
@ -56,6 +59,7 @@ const WhatsAppSharedSchema = z.object({
|
|||
ackReaction: WhatsAppAckReactionSchema,
|
||||
debounceMs: z.number().int().nonnegative().optional().default(0),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
healthMonitor: ChannelHealthMonitorSchema,
|
||||
});
|
||||
|
||||
function enforceOpenDmPolicyAllowFromStar(params: {
|
||||
|
|
|
|||
|
|
@ -696,6 +696,8 @@ export const OpenClawSchema = z
|
|||
.strict()
|
||||
.optional(),
|
||||
channelHealthCheckMinutes: z.number().int().min(0).optional(),
|
||||
channelStaleEventThresholdMinutes: z.number().int().min(1).optional(),
|
||||
channelMaxRestartsPerHour: z.number().int().min(1).optional(),
|
||||
tailscale: z
|
||||
.object({
|
||||
mode: z.union([z.literal("off"), z.literal("serve"), z.literal("funnel")]).optional(),
|
||||
|
|
@ -833,6 +835,21 @@ export const OpenClawSchema = z
|
|||
.optional(),
|
||||
})
|
||||
.strict()
|
||||
.superRefine((gateway, ctx) => {
|
||||
const effectiveHealthCheckMinutes = gateway.channelHealthCheckMinutes ?? 5;
|
||||
if (
|
||||
gateway.channelStaleEventThresholdMinutes != null &&
|
||||
effectiveHealthCheckMinutes !== 0 &&
|
||||
gateway.channelStaleEventThresholdMinutes < effectiveHealthCheckMinutes
|
||||
) {
|
||||
ctx.addIssue({
|
||||
code: z.ZodIssueCode.custom,
|
||||
path: ["channelStaleEventThresholdMinutes"],
|
||||
message:
|
||||
"channelStaleEventThresholdMinutes should be >= channelHealthCheckMinutes to avoid delayed stale detection",
|
||||
});
|
||||
}
|
||||
})
|
||||
.optional(),
|
||||
memory: MemorySchema,
|
||||
skills: z
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ function createMockChannelManager(overrides?: Partial<ChannelManager>): ChannelM
|
|||
startChannel: vi.fn(async () => {}),
|
||||
stopChannel: vi.fn(async () => {}),
|
||||
markChannelLoggedOut: vi.fn(),
|
||||
isHealthMonitorEnabled: vi.fn(() => true),
|
||||
isManuallyStopped: vi.fn(() => false),
|
||||
resetRestartAttempts: vi.fn(),
|
||||
...overrides,
|
||||
|
|
@ -226,6 +227,53 @@ describe("channel-health-monitor", () => {
|
|||
await expectNoStart(manager);
|
||||
});
|
||||
|
||||
it("skips channels with health monitor disabled globally for that account", async () => {
|
||||
const manager = createSnapshotManager(
|
||||
{
|
||||
discord: {
|
||||
default: { running: false, enabled: true, configured: true },
|
||||
},
|
||||
},
|
||||
{ isHealthMonitorEnabled: vi.fn(() => false) },
|
||||
);
|
||||
await expectNoStart(manager);
|
||||
});
|
||||
|
||||
it("still restarts enabled accounts when another account on the same channel is disabled", async () => {
|
||||
const now = Date.now();
|
||||
const manager = createSnapshotManager(
|
||||
{
|
||||
discord: {
|
||||
default: {
|
||||
running: true,
|
||||
connected: false,
|
||||
enabled: true,
|
||||
configured: true,
|
||||
lastStartAt: now - 300_000,
|
||||
},
|
||||
quiet: {
|
||||
running: true,
|
||||
connected: false,
|
||||
enabled: true,
|
||||
configured: true,
|
||||
lastStartAt: now - 300_000,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
isHealthMonitorEnabled: vi.fn((channelId: ChannelId, accountId: string) => {
|
||||
return !(channelId === "discord" && accountId === "quiet");
|
||||
}),
|
||||
},
|
||||
);
|
||||
const monitor = await startAndRunCheck(manager);
|
||||
expect(manager.stopChannel).toHaveBeenCalledWith("discord", "default");
|
||||
expect(manager.startChannel).toHaveBeenCalledWith("discord", "default");
|
||||
expect(manager.stopChannel).not.toHaveBeenCalledWith("discord", "quiet");
|
||||
expect(manager.startChannel).not.toHaveBeenCalledWith("discord", "quiet");
|
||||
monitor.stop();
|
||||
});
|
||||
|
||||
it("restarts a stuck channel (running but not connected)", async () => {
|
||||
const now = Date.now();
|
||||
const manager = createSnapshotManager({
|
||||
|
|
|
|||
|
|
@ -118,6 +118,9 @@ export function startChannelHealthMonitor(deps: ChannelHealthMonitorDeps): Chann
|
|||
if (!status) {
|
||||
continue;
|
||||
}
|
||||
if (!channelManager.isHealthMonitorEnabled(channelId as ChannelId, accountId)) {
|
||||
continue;
|
||||
}
|
||||
if (channelManager.isManuallyStopped(channelId as ChannelId, accountId)) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,16 @@ const BASE_RELOAD_RULES: ReloadRule[] = [
|
|||
kind: "hot",
|
||||
actions: ["restart-health-monitor"],
|
||||
},
|
||||
{
|
||||
prefix: "gateway.channelStaleEventThresholdMinutes",
|
||||
kind: "hot",
|
||||
actions: ["restart-health-monitor"],
|
||||
},
|
||||
{
|
||||
prefix: "gateway.channelMaxRestartsPerHour",
|
||||
kind: "hot",
|
||||
actions: ["restart-health-monitor"],
|
||||
},
|
||||
// Stuck-session warning threshold is read by the diagnostics heartbeat loop.
|
||||
{ prefix: "diagnostics.stuckSessionWarnMs", kind: "none" },
|
||||
{ prefix: "hooks.gmail", kind: "hot", actions: ["restart-gmail-watcher"] },
|
||||
|
|
|
|||
|
|
@ -44,12 +44,13 @@ function createTestPlugin(params?: {
|
|||
account?: TestAccount;
|
||||
startAccount?: NonNullable<ChannelPlugin<TestAccount>["gateway"]>["startAccount"];
|
||||
includeDescribeAccount?: boolean;
|
||||
resolveAccount?: ChannelPlugin<TestAccount>["config"]["resolveAccount"];
|
||||
}): ChannelPlugin<TestAccount> {
|
||||
const account = params?.account ?? { enabled: true, configured: true };
|
||||
const includeDescribeAccount = params?.includeDescribeAccount !== false;
|
||||
const config: ChannelPlugin<TestAccount>["config"] = {
|
||||
listAccountIds: () => [DEFAULT_ACCOUNT_ID],
|
||||
resolveAccount: () => account,
|
||||
resolveAccount: params?.resolveAccount ?? (() => account),
|
||||
isEnabled: (resolved) => resolved.enabled !== false,
|
||||
};
|
||||
if (includeDescribeAccount) {
|
||||
|
|
@ -88,13 +89,16 @@ function installTestRegistry(plugin: ChannelPlugin<TestAccount>) {
|
|||
setActivePluginRegistry(registry);
|
||||
}
|
||||
|
||||
function createManager(options?: { channelRuntime?: PluginRuntime["channel"] }) {
|
||||
function createManager(options?: {
|
||||
channelRuntime?: PluginRuntime["channel"];
|
||||
loadConfig?: () => Record<string, unknown>;
|
||||
}) {
|
||||
const log = createSubsystemLogger("gateway/server-channels-test");
|
||||
const channelLogs = { discord: log } as Record<ChannelId, SubsystemLogger>;
|
||||
const runtime = runtimeForLogger(log);
|
||||
const channelRuntimeEnvs = { discord: runtime } as Record<ChannelId, RuntimeEnv>;
|
||||
return createChannelManager({
|
||||
loadConfig: () => ({}),
|
||||
loadConfig: () => options?.loadConfig?.() ?? {},
|
||||
channelLogs,
|
||||
channelRuntimeEnvs,
|
||||
...(options?.channelRuntime ? { channelRuntime: options.channelRuntime } : {}),
|
||||
|
|
@ -180,4 +184,104 @@ describe("server-channels auto restart", () => {
|
|||
await manager.startChannels();
|
||||
expect(startAccount).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("reuses plugin account resolution for health monitor overrides", () => {
|
||||
installTestRegistry(
|
||||
createTestPlugin({
|
||||
resolveAccount: (cfg, accountId) => {
|
||||
const accounts = (
|
||||
cfg as {
|
||||
channels?: {
|
||||
discord?: {
|
||||
accounts?: Record<
|
||||
string,
|
||||
TestAccount & { healthMonitor?: { enabled?: boolean } }
|
||||
>;
|
||||
};
|
||||
};
|
||||
}
|
||||
).channels?.discord?.accounts;
|
||||
if (!accounts) {
|
||||
return { enabled: true, configured: true };
|
||||
}
|
||||
const direct = accounts[accountId ?? DEFAULT_ACCOUNT_ID];
|
||||
if (direct) {
|
||||
return direct;
|
||||
}
|
||||
const normalized = (accountId ?? DEFAULT_ACCOUNT_ID).toLowerCase().replaceAll(" ", "-");
|
||||
const matchKey = Object.keys(accounts).find(
|
||||
(key) => key.toLowerCase().replaceAll(" ", "-") === normalized,
|
||||
);
|
||||
return matchKey ? (accounts[matchKey] ?? { enabled: true, configured: true }) : {};
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const manager = createManager({
|
||||
loadConfig: () => ({
|
||||
channels: {
|
||||
discord: {
|
||||
accounts: {
|
||||
"Router D": {
|
||||
enabled: true,
|
||||
configured: true,
|
||||
healthMonitor: { enabled: false },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
expect(manager.isHealthMonitorEnabled("discord", "router-d")).toBe(false);
|
||||
});
|
||||
|
||||
it("falls back to channel-level health monitor overrides when account resolution omits them", () => {
|
||||
installTestRegistry(
|
||||
createTestPlugin({
|
||||
resolveAccount: () => ({
|
||||
enabled: true,
|
||||
configured: true,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const manager = createManager({
|
||||
loadConfig: () => ({
|
||||
channels: {
|
||||
discord: {
|
||||
healthMonitor: { enabled: false },
|
||||
},
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
expect(manager.isHealthMonitorEnabled("discord", DEFAULT_ACCOUNT_ID)).toBe(false);
|
||||
});
|
||||
|
||||
it("uses wrapped account config health monitor overrides", () => {
|
||||
installTestRegistry(
|
||||
createTestPlugin({
|
||||
resolveAccount: () => ({
|
||||
enabled: true,
|
||||
configured: true,
|
||||
config: {
|
||||
healthMonitor: { enabled: false },
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const manager = createManager({
|
||||
loadConfig: () => ({
|
||||
channels: {
|
||||
discord: {
|
||||
healthMonitor: { enabled: true },
|
||||
},
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
expect(manager.isHealthMonitorEnabled("discord", DEFAULT_ACCOUNT_ID)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ export type ChannelManager = {
|
|||
markChannelLoggedOut: (channelId: ChannelId, cleared: boolean, accountId?: string) => void;
|
||||
isManuallyStopped: (channelId: ChannelId, accountId: string) => boolean;
|
||||
resetRestartAttempts: (channelId: ChannelId, accountId: string) => void;
|
||||
isHealthMonitorEnabled: (channelId: ChannelId, accountId: string) => boolean;
|
||||
};
|
||||
|
||||
// Channel docking: lifecycle hooks (`plugin.gateway`) flow through this manager.
|
||||
|
|
@ -119,6 +120,48 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
|||
|
||||
const restartKey = (channelId: ChannelId, accountId: string) => `${channelId}:${accountId}`;
|
||||
|
||||
const isHealthMonitorEnabled = (channelId: ChannelId, accountId: string): boolean => {
|
||||
const cfg = loadConfig();
|
||||
const plugin = getChannelPlugin(channelId);
|
||||
const resolvedAccount = plugin?.config.resolveAccount(cfg, accountId) as
|
||||
| {
|
||||
healthMonitor?: {
|
||||
enabled?: boolean;
|
||||
};
|
||||
config?: {
|
||||
healthMonitor?: {
|
||||
enabled?: boolean;
|
||||
};
|
||||
};
|
||||
}
|
||||
| undefined;
|
||||
const accountOverride = resolvedAccount?.healthMonitor?.enabled;
|
||||
const wrappedAccountOverride = resolvedAccount?.config?.healthMonitor?.enabled;
|
||||
const channelOverride = (
|
||||
cfg.channels?.[channelId] as
|
||||
| {
|
||||
healthMonitor?: {
|
||||
enabled?: boolean;
|
||||
};
|
||||
}
|
||||
| undefined
|
||||
)?.healthMonitor?.enabled;
|
||||
|
||||
if (typeof accountOverride === "boolean") {
|
||||
return accountOverride;
|
||||
}
|
||||
|
||||
if (typeof wrappedAccountOverride === "boolean") {
|
||||
return wrappedAccountOverride;
|
||||
}
|
||||
|
||||
if (typeof channelOverride === "boolean") {
|
||||
return channelOverride;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
const getStore = (channelId: ChannelId): ChannelRuntimeStore => {
|
||||
const existing = channelStores.get(channelId);
|
||||
if (existing) {
|
||||
|
|
@ -453,5 +496,6 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
|||
markChannelLoggedOut,
|
||||
isManuallyStopped: isManuallyStopped_,
|
||||
resetRestartAttempts: resetRestartAttempts_,
|
||||
isHealthMonitorEnabled,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,7 +50,11 @@ export function createGatewayReloadHandlers(params: {
|
|||
logChannels: { info: (msg: string) => void; error: (msg: string) => void };
|
||||
logCron: { error: (msg: string) => void };
|
||||
logReload: { info: (msg: string) => void; warn: (msg: string) => void };
|
||||
createHealthMonitor: (checkIntervalMs: number) => ChannelHealthMonitor;
|
||||
createHealthMonitor: (opts: {
|
||||
checkIntervalMs: number;
|
||||
staleEventThresholdMs?: number;
|
||||
maxRestartsPerHour?: number;
|
||||
}) => ChannelHealthMonitor;
|
||||
}) {
|
||||
const applyHotReload = async (
|
||||
plan: GatewayReloadPlan,
|
||||
|
|
@ -101,8 +105,17 @@ export function createGatewayReloadHandlers(params: {
|
|||
if (plan.restartHealthMonitor) {
|
||||
state.channelHealthMonitor?.stop();
|
||||
const minutes = nextConfig.gateway?.channelHealthCheckMinutes;
|
||||
const staleMinutes = nextConfig.gateway?.channelStaleEventThresholdMinutes;
|
||||
nextState.channelHealthMonitor =
|
||||
minutes === 0 ? null : params.createHealthMonitor((minutes ?? 5) * 60_000);
|
||||
minutes === 0
|
||||
? null
|
||||
: params.createHealthMonitor({
|
||||
checkIntervalMs: (minutes ?? 5) * 60_000,
|
||||
...(staleMinutes != null && { staleEventThresholdMs: staleMinutes * 60_000 }),
|
||||
...(nextConfig.gateway?.channelMaxRestartsPerHour != null && {
|
||||
maxRestartsPerHour: nextConfig.gateway.channelMaxRestartsPerHour,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
if (plan.restartGmailWatcher) {
|
||||
|
|
|
|||
|
|
@ -757,11 +757,17 @@ export async function startGatewayServer(
|
|||
|
||||
const healthCheckMinutes = cfgAtStart.gateway?.channelHealthCheckMinutes;
|
||||
const healthCheckDisabled = healthCheckMinutes === 0;
|
||||
const staleEventThresholdMinutes = cfgAtStart.gateway?.channelStaleEventThresholdMinutes;
|
||||
const maxRestartsPerHour = cfgAtStart.gateway?.channelMaxRestartsPerHour;
|
||||
let channelHealthMonitor = healthCheckDisabled
|
||||
? null
|
||||
: startChannelHealthMonitor({
|
||||
channelManager,
|
||||
checkIntervalMs: (healthCheckMinutes ?? 5) * 60_000,
|
||||
...(staleEventThresholdMinutes != null && {
|
||||
staleEventThresholdMs: staleEventThresholdMinutes * 60_000,
|
||||
}),
|
||||
...(maxRestartsPerHour != null && { maxRestartsPerHour }),
|
||||
});
|
||||
|
||||
if (!minimalTestGateway) {
|
||||
|
|
@ -980,8 +986,21 @@ export async function startGatewayServer(
|
|||
logChannels,
|
||||
logCron,
|
||||
logReload,
|
||||
createHealthMonitor: (checkIntervalMs: number) =>
|
||||
startChannelHealthMonitor({ channelManager, checkIntervalMs }),
|
||||
createHealthMonitor: (opts: {
|
||||
checkIntervalMs: number;
|
||||
staleEventThresholdMs?: number;
|
||||
maxRestartsPerHour?: number;
|
||||
}) =>
|
||||
startChannelHealthMonitor({
|
||||
channelManager,
|
||||
checkIntervalMs: opts.checkIntervalMs,
|
||||
...(opts.staleEventThresholdMs != null && {
|
||||
staleEventThresholdMs: opts.staleEventThresholdMs,
|
||||
}),
|
||||
...(opts.maxRestartsPerHour != null && {
|
||||
maxRestartsPerHour: opts.maxRestartsPerHour,
|
||||
}),
|
||||
}),
|
||||
});
|
||||
|
||||
return startGatewayConfigReloader({
|
||||
|
|
|
|||
|
|
@ -109,6 +109,9 @@ const hoisted = vi.hoisted(() => {
|
|||
startChannel: vi.fn(async () => {}),
|
||||
stopChannel: vi.fn(async () => {}),
|
||||
markChannelLoggedOut: vi.fn(),
|
||||
isHealthMonitorEnabled: vi.fn(() => true),
|
||||
isManuallyStopped: vi.fn(() => false),
|
||||
resetRestartAttempts: vi.fn(),
|
||||
};
|
||||
|
||||
const createChannelManager = vi.fn(() => providerManager);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ function createManager(snapshot: ChannelRuntimeSnapshot): ChannelManager {
|
|||
startChannel: vi.fn(),
|
||||
stopChannel: vi.fn(),
|
||||
markChannelLoggedOut: vi.fn(),
|
||||
isHealthMonitorEnabled: vi.fn(() => true),
|
||||
isManuallyStopped: vi.fn(() => false),
|
||||
resetRestartAttempts: vi.fn(),
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue