diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 68a843469cc..c2ba6b22e43 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -27,6 +27,10 @@ import { isTimeoutErrorMessage, matchesFormatErrorPattern, } from "./failover-matches.js"; +import { + classifyProviderSpecificError, + matchesProviderContextOverflow, +} from "./provider-error-patterns.js"; import type { FailoverReason } from "./types.js"; export { @@ -235,7 +239,9 @@ export function isContextOverflowError(errorMessage?: string): boolean { errorMessage.includes("上下文超出") || errorMessage.includes("上下文长度超") || errorMessage.includes("超出最大上下文") || - errorMessage.includes("请压缩上下文") + errorMessage.includes("请压缩上下文") || + // Provider-specific patterns (Bedrock, Azure, Ollama, Mistral, Cohere, etc.) + matchesProviderContextOverflow(errorMessage) ); } @@ -1090,6 +1096,11 @@ export function classifyFailoverReason(raw: string): FailoverReason | null { if (isTimeoutErrorMessage(raw)) { return "timeout"; } + // Provider-specific patterns as a final catch (Bedrock, Groq, Together AI, etc.) + const providerSpecific = classifyProviderSpecificError(raw); + if (providerSpecific) { + return providerSpecific; + } return null; } diff --git a/src/agents/pi-embedded-helpers/provider-error-patterns.test.ts b/src/agents/pi-embedded-helpers/provider-error-patterns.test.ts new file mode 100644 index 00000000000..ca4db4dfd19 --- /dev/null +++ b/src/agents/pi-embedded-helpers/provider-error-patterns.test.ts @@ -0,0 +1,100 @@ +import { describe, expect, it } from "vitest"; +import { classifyFailoverReason, isContextOverflowError } from "./errors.js"; +import { + classifyProviderSpecificError, + matchesProviderContextOverflow, +} from "./provider-error-patterns.js"; + +describe("matchesProviderContextOverflow", () => { + it.each([ + // AWS Bedrock + "ValidationException: The input is too long for the model", + "ValidationException: Input token count exceeds the maximum number of input tokens", + "ModelStreamErrorException: Input is too long for this model", + + // Google Vertex + "INVALID_ARGUMENT: input exceeds the maximum number of tokens", + + // Ollama + "ollama error: context length exceeded, too many tokens", + + // Mistral + "mistral: input is too long for this model", + + // Cohere + "total tokens exceeds the model's maximum limit of 4096", + + // Generic + "input is too long for model gpt-5.4", + ])("matches provider-specific overflow: %s", (msg) => { + expect(matchesProviderContextOverflow(msg)).toBe(true); + }); + + it("does not match unrelated errors", () => { + expect(matchesProviderContextOverflow("rate limit exceeded")).toBe(false); + expect(matchesProviderContextOverflow("invalid api key")).toBe(false); + expect(matchesProviderContextOverflow("internal server error")).toBe(false); + }); +}); + +describe("classifyProviderSpecificError", () => { + it("classifies Bedrock ThrottlingException as rate_limit", () => { + expect(classifyProviderSpecificError("ThrottlingException: Too many requests")).toBe( + "rate_limit", + ); + }); + + it("classifies Bedrock ModelNotReadyException as overloaded", () => { + expect(classifyProviderSpecificError("ModelNotReadyException: model is not ready")).toBe( + "overloaded", + ); + }); + + it("classifies Groq model_deactivated as model_not_found", () => { + expect(classifyProviderSpecificError("model_is_deactivated")).toBe("model_not_found"); + }); + + it("classifies concurrency limit as rate_limit", () => { + expect(classifyProviderSpecificError("concurrency limit has been reached")).toBe("rate_limit"); + expect(classifyProviderSpecificError("concurrency limit reached")).toBe("rate_limit"); + }); + + it("does not match generic 'model is not ready' without Bedrock prefix", () => { + expect(classifyProviderSpecificError("model is not ready")).toBeNull(); + }); + + it("returns null for unmatched errors", () => { + expect(classifyProviderSpecificError("some random error")).toBeNull(); + }); +}); + +describe("isContextOverflowError with provider patterns", () => { + it("detects Bedrock ValidationException as context overflow", () => { + expect(isContextOverflowError("ValidationException: The input is too long for the model")).toBe( + true, + ); + }); + + it("detects Ollama context overflow", () => { + expect(isContextOverflowError("ollama error: context length exceeded")).toBe(true); + }); + + it("still detects standard context overflow patterns", () => { + expect(isContextOverflowError("context length exceeded")).toBe(true); + expect(isContextOverflowError("prompt is too long: 150000 tokens > 128000 maximum")).toBe(true); + }); +}); + +describe("classifyFailoverReason with provider patterns", () => { + it("classifies Bedrock ThrottlingException via provider patterns", () => { + expect(classifyFailoverReason("ThrottlingException: Too many concurrent requests")).toBe( + "rate_limit", + ); + }); + + it("classifies Groq model_deactivated via provider patterns", () => { + expect(classifyFailoverReason("model_is_deactivated: this model has been deactivated")).toBe( + "model_not_found", + ); + }); +}); diff --git a/src/agents/pi-embedded-helpers/provider-error-patterns.ts b/src/agents/pi-embedded-helpers/provider-error-patterns.ts new file mode 100644 index 00000000000..391d727c717 --- /dev/null +++ b/src/agents/pi-embedded-helpers/provider-error-patterns.ts @@ -0,0 +1,111 @@ +/** + * Provider-specific error patterns that improve failover classification accuracy. + * + * Many providers return errors in non-standard formats. Without these patterns, + * errors get misclassified (e.g., a context overflow classified as "format"), + * causing the failover engine to choose wrong recovery strategies. + */ + +import type { FailoverReason } from "./types.js"; + +type ProviderErrorPattern = { + /** Regex to match against the raw error message. */ + test: RegExp; + /** The failover reason this pattern maps to. */ + reason: FailoverReason; +}; + +/** + * Provider-specific context overflow patterns not covered by the generic + * `isContextOverflowError()` in errors.ts. Called from `isContextOverflowError()` + * to catch provider-specific wording that the generic regex misses. + */ +export const PROVIDER_CONTEXT_OVERFLOW_PATTERNS: readonly RegExp[] = [ + // AWS Bedrock + /ValidationException.*(?:input is too long|max input token|input token.*exceed)/i, + /ValidationException.*(?:exceeds? the (?:maximum|max) (?:number of )?(?:input )?tokens)/i, + /ModelStreamErrorException.*(?:Input is too long|too many input tokens)/i, + + // Azure OpenAI (sometimes wraps OpenAI errors differently) + /content_filter.*(?:prompt|input).*(?:too long|exceed)/i, + + // Ollama / local models + /\bollama\b.*(?:context length|too many tokens|context window)/i, + /\btruncating input\b.*\btoo long\b/i, + + // Mistral + /\bmistral\b.*(?:input.*too long|token limit.*exceeded)/i, + + // Cohere + /\btotal tokens?.*exceeds? (?:the )?(?:model(?:'s)? )?(?:max|maximum|limit)/i, + + // DeepSeek + /\bdeepseek\b.*(?:input.*too long|context.*exceed)/i, + + // Google Vertex / Gemini: INVALID_ARGUMENT with token-related messages is context overflow. + /INVALID_ARGUMENT.*(?:exceeds? the (?:maximum|max)|input.*too (?:long|large))/i, + + // Generic "input too long" pattern that isn't covered by existing checks + /\binput (?:is )?too long for (?:the )?model\b/i, +]; + +/** + * Provider-specific patterns that map to specific failover reasons. + * These handle cases where the generic classifiers in failover-matches.ts + * produce wrong results for specific providers. + */ +export const PROVIDER_SPECIFIC_PATTERNS: readonly ProviderErrorPattern[] = [ + // AWS Bedrock: ThrottlingException is rate limit + { + test: /ThrottlingException|Too many concurrent requests/i, + reason: "rate_limit", + }, + + // AWS Bedrock: ModelNotReadyException (require class prefix to avoid false positives) + { + test: /ModelNotReadyException/i, + reason: "overloaded", + }, + + // Azure: content_policy_violation should not trigger failover + // (it's a content moderation rejection, not a transient error) + + // Groq: model_deactivated is permanent + { + test: /model(?:_is)?_deactivated|model has been deactivated/i, + reason: "model_not_found", + }, + + // Together AI / Fireworks: specific rate limit messages + { + test: /\bconcurrency limit\b.*\breached\b/i, + reason: "rate_limit", + }, + + // Cloudflare Workers AI + { + test: /\bworkers?_ai\b.*\b(?:rate|limit|quota)\b/i, + reason: "rate_limit", + }, +]; + +/** + * Check if an error message matches any provider-specific context overflow pattern. + * Called from `isContextOverflowError()` to catch provider-specific wording. + */ +export function matchesProviderContextOverflow(errorMessage: string): boolean { + return PROVIDER_CONTEXT_OVERFLOW_PATTERNS.some((pattern) => pattern.test(errorMessage)); +} + +/** + * Try to classify an error using provider-specific patterns. + * Returns null if no provider-specific pattern matches (fall through to generic classification). + */ +export function classifyProviderSpecificError(errorMessage: string): FailoverReason | null { + for (const pattern of PROVIDER_SPECIFIC_PATTERNS) { + if (pattern.test.test(errorMessage)) { + return pattern.reason; + } + } + return null; +}