fix(mattermost): address review feedback for DM retry logic

- Fix timeoutMs: pass AbortSignal to createMattermostDirectChannel and fetch
- Fix isRetryableError false positives: check for explicit 4xx status codes
  before falling back to network pattern matching
- Fix onRetry callback: preserve user's callback while adding logging
- Add Zod schema refinement: validate initialDelayMs <= maxDelayMs
- Add tests for 4xx false positive cases and AbortSignal propagation
This commit is contained in:
Jonathan Jing 2026-03-10 10:37:23 -07:00 committed by Muhammed Mukhthar CM
parent 1fb94b5460
commit be0e870d94
4 changed files with 109 additions and 9 deletions

View File

@ -21,6 +21,18 @@ const DmChannelRetrySchema = z
timeoutMs: z.number().int().min(5000).max(120000).optional(),
})
.strict()
.refine(
(data) => {
if (data.initialDelayMs !== undefined && data.maxDelayMs !== undefined) {
return data.initialDelayMs <= data.maxDelayMs;
}
return true;
},
{
message: "initialDelayMs must be less than or equal to maxDelayMs",
path: ["initialDelayMs"],
},
)
.optional();
const MattermostSlashCommandsSchema = z

View File

@ -278,4 +278,70 @@ describe("createMattermostDirectChannelWithRetry", () => {
expect(delay).toBeLessThanOrEqual(2500);
});
});
it("does not retry on 4xx errors even if message contains retryable keywords", async () => {
// This tests the fix for false positives where a 400 error with "timeout" in the message
// would incorrectly be retried
mockFetch.mockResolvedValueOnce({
ok: false,
status: 400,
headers: new Headers({ "content-type": "application/json" }),
json: async () => ({ message: "Request timeout: connection timed out" }),
text: async () => "Request timeout: connection timed out",
} as Response);
const client = createMockClient();
await expect(
createMattermostDirectChannelWithRetry(client, ["user-1", "user-2"], {
maxRetries: 3,
initialDelayMs: 10,
}),
).rejects.toThrow("400");
// Should not retry - only called once
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it("does not retry on 403 Forbidden even with 'abort' in message", async () => {
mockFetch.mockResolvedValueOnce({
ok: false,
status: 403,
headers: new Headers({ "content-type": "application/json" }),
json: async () => ({ message: "Request aborted: forbidden" }),
text: async () => "Request aborted: forbidden",
} as Response);
const client = createMockClient();
await expect(
createMattermostDirectChannelWithRetry(client, ["user-1", "user-2"], {
maxRetries: 3,
initialDelayMs: 10,
}),
).rejects.toThrow("403");
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it("passes AbortSignal to fetch for timeout support", async () => {
let capturedSignal: AbortSignal | undefined;
mockFetch.mockImplementationOnce((url, init) => {
capturedSignal = init?.signal;
return Promise.resolve({
ok: true,
status: 201,
headers: new Headers({ "content-type": "application/json" }),
json: async () => ({ id: "dm-channel-signal" }),
} as Response);
});
const client = createMockClient();
await createMattermostDirectChannelWithRetry(client, ["user-1", "user-2"], {
timeoutMs: 5000,
});
expect(capturedSignal).toBeDefined();
expect(capturedSignal).toBeInstanceOf(AbortSignal);
});
});

View File

@ -168,10 +168,12 @@ export async function sendMattermostTyping(
export async function createMattermostDirectChannel(
client: MattermostClient,
userIds: string[],
signal?: AbortSignal,
): Promise<MattermostChannel> {
return await client.request<MattermostChannel>("/channels/direct", {
method: "POST",
body: JSON.stringify(userIds),
signal,
});
}
@ -215,7 +217,7 @@ export async function createMattermostDirectChannelWithRetry(
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
try {
const result = await createMattermostDirectChannel(client, userIds);
const result = await createMattermostDirectChannel(client, userIds, controller.signal);
return result;
} finally {
clearTimeout(timeoutId);
@ -259,12 +261,29 @@ function isRetryableError(error: Error): boolean {
return true;
}
// Check for explicit 4xx status codes - these are client errors and should NOT be retried
// (except 429 which is handled above)
const clientErrorMatch = message.match(/\b4\d{2}\b/);
if (clientErrorMatch) {
const statusCode = parseInt(clientErrorMatch[0], 10);
if (statusCode >= 400 && statusCode < 500 && statusCode !== 429) {
return false;
}
}
// Retry on 5xx server errors
if (/\b5\d{2}\b/.test(message)) {
return true;
}
// Retry on network/transient errors
// Retry on network/transient errors only if no explicit HTTP status code is present
// This avoids false positives like "400 Bad Request: connection timed out"
const hasExplicitStatusCode = /\b\d{3}\b/.test(message);
if (hasExplicitStatusCode) {
return false;
}
// Retry on network/transient errors (no explicit HTTP status code in message)
const retryablePatterns = [
"network error",
"timeout",

View File

@ -224,13 +224,16 @@ async function resolveTargetChannelId(params: ResolveTargetChannelIdParams): Pro
const channel = await createMattermostDirectChannelWithRetry(client, [botUser.id, userId], {
...params.dmRetryOptions,
onRetry: params.logger
? (attempt, delayMs, error) => {
params.logger?.warn?.(
`DM channel creation retry ${attempt} after ${delayMs}ms: ${error.message}`,
);
}
: undefined,
onRetry: (attempt, delayMs, error) => {
// Call user's onRetry if provided
params.dmRetryOptions?.onRetry?.(attempt, delayMs, error);
// Log if verbose mode is enabled
if (params.logger) {
params.logger.warn?.(
`DM channel creation retry ${attempt} after ${delayMs}ms: ${error.message}`,
);
}
},
});
dmChannelCache.set(dmKey, channel.id);
return channel.id;