openclaw/extensions/guardian/guardian-client.ts

import { completeSimple } from "@mariozechner/pi-ai";
import type { Api, Model, TextContent, ThinkingContent } from "@mariozechner/pi-ai";
import type { GuardianDecision, ResolvedGuardianModel } from "./types.js";

/**
 * Optional logger interface for debug logging.
 * When provided, the guardian client will log detailed information about
 * the request, response, and timing of each guardian LLM call.
 */
export type GuardianLogger = {
  info: (msg: string) => void;
  warn: (msg: string) => void;
};

/**
 * Parameters for a guardian LLM call.
 */
export type GuardianCallParams = {
  /** Resolved model info (baseUrl, apiKey, modelId, api type) */
  model: ResolvedGuardianModel;
  /** System prompt */
  systemPrompt: string;
  /** User prompt (tool call review request) */
  userPrompt: string;
  /** Timeout in ms */
  timeoutMs: number;
  /** Fallback policy on error */
  fallbackOnError: "allow" | "block";
  /** Optional logger for debug output */
  logger?: GuardianLogger;
};

// ---------------------------------------------------------------------------
// Model conversion — ResolvedGuardianModel → pi-ai Model<Api>
// ---------------------------------------------------------------------------

/**
 * Convert a ResolvedGuardianModel to pi-ai's Model<Api> type.
 *
 * The guardian only needs short text responses, so we use sensible defaults
 * for fields like reasoning, cost, contextWindow, etc.
 */
function toModelSpec(resolved: ResolvedGuardianModel): Model<Api> {
  return {
    id: resolved.modelId,
    name: resolved.modelId,
    api: (resolved.api || "openai-completions") as Api,
    provider: resolved.provider,
    baseUrl: resolved.baseUrl ?? "",
    reasoning: false,
    input: ["text"],
    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
    contextWindow: 128_000,
    maxTokens: 4096,
    headers: resolved.headers,
  };
}

// ---------------------------------------------------------------------------
// Main entry point
// ---------------------------------------------------------------------------

/**
 * Call the guardian LLM to review a tool call.
 *
 * Uses pi-ai's `completeSimple()` to call the model — the same SDK-level
 * HTTP stack that the main OpenClaw agent uses. This ensures consistent
 * behavior (User-Agent headers, auth handling, retry logic, etc.) across
 * all providers.
 *
 * On any error (network, timeout, parse), returns the configured fallback decision.
 */
export async function callGuardian(params: GuardianCallParams): Promise<GuardianDecision> {
  const { model, systemPrompt, userPrompt, timeoutMs, fallbackOnError, logger } = params;
  const fallback = makeFallbackDecision(fallbackOnError);

  const controller = new AbortController();
  const timeoutId = setTimeout(() => controller.abort(), timeoutMs);

  const startTime = Date.now();
  const api = model.api || "openai-completions";

  // Log the request details
  if (logger) {
    logger.info(
      `[guardian] ▶ Calling guardian LLM: provider=${model.provider}, model=${model.modelId}, ` +
        `api=${api}, baseUrl=${model.baseUrl}, timeout=${timeoutMs}ms`,
    );
    logger.info(
      `[guardian]   Prompt (user): ${userPrompt.slice(0, 500)}${userPrompt.length > 500 ? "..." : ""}`,
    );
  }

  try {
    const modelSpec = toModelSpec(model);

    const res = await completeSimple(
      modelSpec,
      {
        systemPrompt,
        messages: [
          {
            role: "user" as const,
            content: userPrompt,
            timestamp: Date.now(),
          },
        ],
      },
      {
        apiKey: model.apiKey,
        maxTokens: 150,
        temperature: 0,
        signal: controller.signal,
      },
    );

    // Race condition guard: the abort signal may have fired just as
    // completeSimple() returned, producing empty/truncated content instead
    // of throwing. Detect this and treat as a proper timeout.
    if (controller.signal.aborted) {
      const elapsed = Date.now() - startTime;
      const decision = {
        ...fallback,
        reason: `Guardian timed out after ${timeoutMs}ms: ${fallback.reason || "fallback"}`,
      };
      if (logger) {
        logger.warn(
          `[guardian] ◀ Guardian TIMED OUT after ${elapsed}ms (abort race) — fallback=${fallback.action}`,
        );
      }
      return decision;
    }

    // Extract text content from AssistantMessage.
    // Some reasoning models (e.g. kimi-coding) return thinking blocks
    // instead of text blocks — fall back to those if no text found.
    const content = extractResponseText(res.content, logger);

    const result = parseGuardianResponse(content, fallback);

    const elapsed = Date.now() - startTime;
    if (logger) {
      logger.info(
        `[guardian] ◀ Guardian responded in ${elapsed}ms: action=${result.action.toUpperCase()}` +
          `${result.reason ? `, reason="${result.reason}"` : ""}`,
      );
    }

    return result;
  } catch (err) {
    const elapsed = Date.now() - startTime;
    const errMsg = err instanceof Error ? err.message : String(err);

    if (errMsg.includes("abort") || controller.signal.aborted) {
      const decision = {
        ...fallback,
        reason: `Guardian timed out after ${timeoutMs}ms: ${fallback.reason || "fallback"}`,
      };
      if (logger) {
        logger.warn(
          `[guardian] ◀ Guardian TIMED OUT after ${elapsed}ms — fallback=${fallback.action}`,
        );
      }
      return decision;
    }

    const decision = {
      ...fallback,
      reason: `Guardian error: ${errMsg}: ${fallback.reason || "fallback"}`,
    };
    if (logger) {
      logger.warn(
        `[guardian] ◀ Guardian ERROR after ${elapsed}ms: ${errMsg} — fallback=${fallback.action}`,
      );
    }
    return decision;
  } finally {
    clearTimeout(timeoutId);
  }
}

// ---------------------------------------------------------------------------
// Shared helpers
// ---------------------------------------------------------------------------

/**
 * Extract text from an assistant response's content blocks.
 *
 * Primary: `text` blocks (standard response format).
 * Fallback: `thinking` blocks — some reasoning models (e.g. kimi-coding)
 * return their answer in thinking blocks instead of text blocks.
 *
 * Logs block types when the response is empty or falls back to thinking,
 * to aid debugging provider-specific behavior.
 */
function extractResponseText(
  contentBlocks: (TextContent | ThinkingContent | { type: string })[],
  logger?: GuardianLogger,
): string {
  // Try text blocks first (preferred)
  const textContent = contentBlocks
    .filter((block): block is TextContent => block.type === "text")
    .map((block) => block.text.trim())
    .filter(Boolean)
    .join(" ")
    .trim();

  if (textContent) {
    return textContent;
  }

  // Fallback: extract from thinking blocks (reasoning models)
  const thinkingContent = contentBlocks
    .filter((block): block is ThinkingContent => block.type === "thinking")
    .map((block) => block.thinking.trim())
    .filter(Boolean)
    .join(" ")
    .trim();

  if (thinkingContent) {
    if (logger) {
      logger.info(`[guardian] No text blocks in response — extracted from thinking blocks instead`);
    }
    return thinkingContent;
  }

  // Neither text nor thinking blocks had content
  if (logger) {
    const types = contentBlocks.map((b) => b.type).join(", ");
    logger.warn(`[guardian] Empty response — block types received: [${types || "none"}]`);
  }
  return "";
}

/**
 * Parse the guardian LLM's response text into a decision.
 *
 * Scans from the FIRST line forward to find the verdict. The prompt strictly
 * requires a single-line response starting with ALLOW or BLOCK, so the first
 * matching line is the intended verdict.
 *
 * Forward scanning is also more secure: if an attacker embeds "ALLOW: ..."
 * in tool arguments and the model echoes it, it would appear AFTER the
 * model's own verdict. Scanning forward ensures the model's output takes
 * priority over any attacker-injected text.
 */
function parseGuardianResponse(content: string, fallback: GuardianDecision): GuardianDecision {
  const lines = content.split("\n");

  for (const rawLine of lines) {
    const line = rawLine.trim();
    if (!line) continue;
    const upper = line.toUpperCase();

    // Require a delimiter after ALLOW/BLOCK to avoid matching words like
    // "ALLOWING" or "BLOCKED" which are not valid verdicts.
    if (upper === "ALLOW" || upper.startsWith("ALLOW:") || upper.startsWith("ALLOW ")) {
      const colonIndex = line.indexOf(":");
      const reason = colonIndex >= 0 ? line.slice(colonIndex + 1).trim() : line.slice(5).trim();
      return { action: "allow", reason: reason || undefined };
    }

    if (upper === "BLOCK" || upper.startsWith("BLOCK:") || upper.startsWith("BLOCK ")) {
      const colonIndex = line.indexOf(":");
      const reason = colonIndex >= 0 ? line.slice(colonIndex + 1).trim() : line.slice(5).trim();
      return { action: "block", reason: reason || "Blocked by guardian" };
    }
  }

  return {
    ...fallback,
    reason: `Guardian response not recognized ("${content.trim().slice(0, 60)}"): ${fallback.reason || "fallback"}`,
  };
}

/** Build the fallback decision from config. */
function makeFallbackDecision(fallbackPolicy: "allow" | "block"): GuardianDecision {
  if (fallbackPolicy === "block") {
    return { action: "block", reason: "Guardian unavailable (fallback: block)" };
  }
  return { action: "allow", reason: "Guardian unavailable (fallback: allow)" };
}

// ---------------------------------------------------------------------------
// Raw text completion — used for summary generation
// ---------------------------------------------------------------------------

/**
 * Parameters for a raw text completion call.
 */
export type TextCallParams = {
  model: ResolvedGuardianModel;
  systemPrompt: string;
  userPrompt: string;
  timeoutMs: number;
  logger?: GuardianLogger;
};

/**
 * Call the guardian's LLM and return raw text output.
 *
 * Unlike `callGuardian()`, this does NOT parse ALLOW/BLOCK — it returns
 * the raw text response. Used for summary generation.
 *
 * Returns undefined on error/timeout.
 */
export async function callForText(params: TextCallParams): Promise<string | undefined> {
  const { model, systemPrompt, userPrompt, timeoutMs, logger } = params;

  const controller = new AbortController();
  const timeoutId = setTimeout(() => controller.abort(), timeoutMs);

  try {
    const modelSpec = toModelSpec(model);

    const res = await completeSimple(
      modelSpec,
      {
        systemPrompt,
        messages: [
          {
            role: "user" as const,
            content: userPrompt,
            timestamp: Date.now(),
          },
        ],
      },
      {
        apiKey: model.apiKey,
        maxTokens: 200,
        temperature: 0,
        signal: controller.signal,
      },
    );

    // Abort race guard (same as callGuardian)
    if (controller.signal.aborted) {
      if (logger) {
        logger.warn(`[guardian] Summary call timed out after ${timeoutMs}ms (abort race)`);
      }
      return undefined;
    }

    const content = extractResponseText(res.content, logger);

    if (logger) {
      logger.info(
        `[guardian] Summary response: "${content.slice(0, 200)}${content.length > 200 ? "..." : ""}"`,
      );
    }

    return content || undefined;
  } catch (err) {
    const errMsg = err instanceof Error ? err.message : String(err);
    if (logger) {
      logger.warn(`[guardian] Summary call failed: ${errMsg}`);
    }
    return undefined;
  } finally {
    clearTimeout(timeoutId);
  }
}