This commit is contained in:
Joseph Krug 2026-03-15 18:51:18 -04:00 committed by GitHub
commit 32cbd365c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 2647 additions and 117 deletions

View File

@ -5,7 +5,6 @@ import pathlib
import re
import sys
INPUT_INTERPOLATION_RE = re.compile(r"\$\{\{\s*inputs\.")
RUN_LINE_RE = re.compile(r"^(\s*)run:\s*(.*)$")
USING_COMPOSITE_RE = re.compile(r"^\s*using:\s*composite\s*$", re.MULTILINE)

View File

@ -9,12 +9,12 @@ from __future__ import annotations
import argparse
import json
import os
import subprocess
import sys
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import date, datetime, timedelta
from typing import Any, Dict, Iterable, List, Optional, Tuple
from typing import Any
def positive_int(value: str) -> int:
@ -31,7 +31,7 @@ def eprint(msg: str) -> None:
print(msg, file=sys.stderr)
def run_codexbar_cost(provider: str) -> List[Dict[str, Any]]:
def run_codexbar_cost(provider: str) -> list[dict[str, Any]]:
cmd = ["codexbar", "cost", "--format", "json", "--provider", provider]
try:
output = subprocess.check_output(cmd, text=True)
@ -48,12 +48,12 @@ def run_codexbar_cost(provider: str) -> List[Dict[str, Any]]:
return payload
def load_payload(input_path: Optional[str], provider: str) -> Dict[str, Any]:
def load_payload(input_path: str | None, provider: str) -> dict[str, Any]:
if input_path:
if input_path == "-":
raw = sys.stdin.read()
else:
with open(input_path, "r", encoding="utf-8") as handle:
with open(input_path, encoding="utf-8") as handle:
raw = handle.read()
data = json.loads(raw)
else:
@ -77,7 +77,7 @@ class ModelCost:
cost: float
def parse_daily_entries(payload: Dict[str, Any]) -> List[Dict[str, Any]]:
def parse_daily_entries(payload: dict[str, Any]) -> list[dict[str, Any]]:
daily = payload.get("daily")
if not daily:
return []
@ -86,18 +86,18 @@ def parse_daily_entries(payload: Dict[str, Any]) -> List[Dict[str, Any]]:
return [entry for entry in daily if isinstance(entry, dict)]
def parse_date(value: str) -> Optional[date]:
def parse_date(value: str) -> date | None:
try:
return datetime.strptime(value, "%Y-%m-%d").date()
except Exception:
return None
def filter_by_days(entries: List[Dict[str, Any]], days: Optional[int]) -> List[Dict[str, Any]]:
def filter_by_days(entries: list[dict[str, Any]], days: int | None) -> list[dict[str, Any]]:
if not days:
return entries
cutoff = date.today() - timedelta(days=days - 1)
filtered: List[Dict[str, Any]] = []
filtered: list[dict[str, Any]] = []
for entry in entries:
day = entry.get("date")
if not isinstance(day, str):
@ -108,8 +108,8 @@ def filter_by_days(entries: List[Dict[str, Any]], days: Optional[int]) -> List[D
return filtered
def aggregate_costs(entries: Iterable[Dict[str, Any]]) -> Dict[str, float]:
totals: Dict[str, float] = {}
def aggregate_costs(entries: Iterable[dict[str, Any]]) -> dict[str, float]:
totals: dict[str, float] = {}
for entry in entries:
breakdowns = entry.get("modelBreakdowns")
if not breakdowns:
@ -129,7 +129,7 @@ def aggregate_costs(entries: Iterable[Dict[str, Any]]) -> Dict[str, float]:
return totals
def pick_current_model(entries: List[Dict[str, Any]]) -> Tuple[Optional[str], Optional[str]]:
def pick_current_model(entries: list[dict[str, Any]]) -> tuple[str | None, str | None]:
if not entries:
return None, None
sorted_entries = sorted(
@ -139,7 +139,7 @@ def pick_current_model(entries: List[Dict[str, Any]]) -> Tuple[Optional[str], Op
for entry in reversed(sorted_entries):
breakdowns = entry.get("modelBreakdowns")
if isinstance(breakdowns, list) and breakdowns:
scored: List[ModelCost] = []
scored: list[ModelCost] = []
for item in breakdowns:
if not isinstance(item, dict):
continue
@ -158,13 +158,13 @@ def pick_current_model(entries: List[Dict[str, Any]]) -> Tuple[Optional[str], Op
return None, None
def usd(value: Optional[float]) -> str:
def usd(value: float | None) -> str:
if value is None:
return ""
return f"${value:,.2f}"
def latest_day_cost(entries: List[Dict[str, Any]], model: str) -> Tuple[Optional[str], Optional[float]]:
def latest_day_cost(entries: list[dict[str, Any]], model: str) -> tuple[str | None, float | None]:
if not entries:
return None, None
sorted_entries = sorted(
@ -188,10 +188,10 @@ def latest_day_cost(entries: List[Dict[str, Any]], model: str) -> Tuple[Optional
def render_text_current(
provider: str,
model: str,
latest_date: Optional[str],
total_cost: Optional[float],
latest_cost: Optional[float],
latest_cost_date: Optional[str],
latest_date: str | None,
total_cost: float | None,
latest_cost: float | None,
latest_cost_date: str | None,
entry_count: int,
) -> str:
lines = [f"Provider: {provider}", f"Current model: {model}"]
@ -204,7 +204,7 @@ def render_text_current(
return "\n".join(lines)
def render_text_all(provider: str, totals: Dict[str, float]) -> str:
def render_text_all(provider: str, totals: dict[str, float]) -> str:
lines = [f"Provider: {provider}", "Models:"]
for model, cost in sorted(totals.items(), key=lambda item: item[1], reverse=True):
lines.append(f"- {model}: {usd(cost)}")
@ -214,12 +214,12 @@ def render_text_all(provider: str, totals: Dict[str, float]) -> str:
def build_json_current(
provider: str,
model: str,
latest_date: Optional[str],
total_cost: Optional[float],
latest_cost: Optional[float],
latest_cost_date: Optional[str],
latest_date: str | None,
total_cost: float | None,
latest_cost: float | None,
latest_cost_date: str | None,
entry_count: int,
) -> Dict[str, Any]:
) -> dict[str, Any]:
return {
"provider": provider,
"mode": "current",
@ -232,7 +232,7 @@ def build_json_current(
}
def build_json_all(provider: str, totals: Dict[str, float]) -> Dict[str, Any]:
def build_json_all(provider: str, totals: dict[str, float]) -> dict[str, Any]:
return {
"provider": provider,
"mode": "all",

View File

@ -70,42 +70,32 @@ def choose_output_resolution(
def main():
parser = argparse.ArgumentParser(
description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)"
)
parser = argparse.ArgumentParser(description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)")
parser.add_argument("--prompt", "-p", required=True, help="Image description/prompt")
parser.add_argument("--filename", "-f", required=True, help="Output filename (e.g., sunset-mountains.png)")
parser.add_argument(
"--prompt", "-p",
required=True,
help="Image description/prompt"
)
parser.add_argument(
"--filename", "-f",
required=True,
help="Output filename (e.g., sunset-mountains.png)"
)
parser.add_argument(
"--input-image", "-i",
"--input-image",
"-i",
action="append",
dest="input_images",
metavar="IMAGE",
help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images)."
help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images).",
)
parser.add_argument(
"--resolution", "-r",
"--resolution",
"-r",
choices=["1K", "2K", "4K"],
default=None,
help="Output resolution: 1K, 2K, or 4K. If omitted with input images, auto-detect from largest image dimension."
help="Output resolution: 1K, 2K, or 4K. If omitted with input images, auto-detect from largest image dimension.",
)
parser.add_argument(
"--aspect-ratio", "-a",
"--aspect-ratio",
"-a",
choices=SUPPORTED_ASPECT_RATIOS,
default=None,
help=f"Output aspect ratio (default: model decides). Options: {', '.join(SUPPORTED_ASPECT_RATIOS)}"
)
parser.add_argument(
"--api-key", "-k",
help="Gemini API key (overrides GEMINI_API_KEY env var)"
help=f"Output aspect ratio (default: model decides). Options: {', '.join(SUPPORTED_ASPECT_RATIOS)}",
)
parser.add_argument("--api-key", "-k", help="Gemini API key (overrides GEMINI_API_KEY env var)")
args = parser.parse_args()
@ -158,10 +148,7 @@ def main():
has_input_images=bool(input_images),
)
if auto_detected:
print(
f"Auto-detected resolution: {output_resolution} "
f"(from max input dimension {max_input_dim})"
)
print(f"Auto-detected resolution: {output_resolution} (from max input dimension {max_input_dim})")
# Build contents (images first if editing, prompt only if generating)
if input_images:
@ -182,9 +169,8 @@ def main():
model="gemini-3-pro-image-preview",
contents=contents,
config=types.GenerateContentConfig(
response_modalities=["TEXT", "IMAGE"],
image_config=types.ImageConfig(**image_cfg_kwargs)
)
response_modalities=["TEXT", "IMAGE"], image_config=types.ImageConfig(**image_cfg_kwargs)
),
)
# Process response and convert to PNG
@ -201,19 +187,20 @@ def main():
if isinstance(image_data, str):
# If it's a string, it might be base64
import base64
image_data = base64.b64decode(image_data)
image = PILImage.open(BytesIO(image_data))
# Ensure RGB mode for PNG (convert RGBA to RGB with white background if needed)
if image.mode == 'RGBA':
rgb_image = PILImage.new('RGB', image.size, (255, 255, 255))
if image.mode == "RGBA":
rgb_image = PILImage.new("RGB", image.size, (255, 255, 255))
rgb_image.paste(image, mask=image.split()[3])
rgb_image.save(str(output_path), 'PNG')
elif image.mode == 'RGB':
image.save(str(output_path), 'PNG')
rgb_image.save(str(output_path), "PNG")
elif image.mode == "RGB":
image.save(str(output_path), "PNG")
else:
image.convert('RGB').save(str(output_path), 'PNG')
image.convert("RGB").save(str(output_path), "PNG")
image_saved = True
if image_saved:

View File

@ -58,9 +58,7 @@ def pick_prompts(count: int) -> list[str]:
]
prompts: list[str] = []
for _ in range(count):
prompts.append(
f"{random.choice(styles)} of {random.choice(subjects)}, {random.choice(lighting)}"
)
prompts.append(f"{random.choice(styles)} of {random.choice(subjects)}, {random.choice(lighting)}")
return prompts
@ -100,9 +98,7 @@ def normalize_optional_flag(
value = aliases.get(value, value)
if value not in allowed:
raise ValueError(
f"Invalid --{flag_name} '{raw_value}'. Allowed values: {allowed_text}."
)
raise ValueError(f"Invalid --{flag_name} '{raw_value}'. Allowed values: {allowed_text}.")
return value
@ -115,10 +111,7 @@ def normalize_background(model: str, background: str) -> str:
supported=lambda candidate: candidate.startswith("gpt-image"),
allowed={"transparent", "opaque", "auto"},
allowed_text="transparent, opaque, auto",
unsupported_message=(
"Warning: --background is only supported for gpt-image models; "
"ignoring for '{model}'."
),
unsupported_message=("Warning: --background is only supported for gpt-image models; ignoring for '{model}'."),
)
@ -131,9 +124,7 @@ def normalize_style(model: str, style: str) -> str:
supported=lambda candidate: candidate == "dall-e-3",
allowed={"vivid", "natural"},
allowed_text="vivid, natural",
unsupported_message=(
"Warning: --style is only supported for dall-e-3; ignoring for '{model}'."
),
unsupported_message=("Warning: --style is only supported for dall-e-3; ignoring for '{model}'."),
)
@ -147,8 +138,7 @@ def normalize_output_format(model: str, output_format: str) -> str:
allowed={"png", "jpeg", "webp"},
allowed_text="png, jpeg, webp",
unsupported_message=(
"Warning: --output-format is only supported for gpt-image models; "
"ignoring for '{model}'."
"Warning: --output-format is only supported for gpt-image models; ignoring for '{model}'."
),
aliases={"jpg": "jpeg"},
)
@ -245,9 +235,15 @@ def main() -> int:
ap.add_argument("--prompt", help="Single prompt. If omitted, random prompts are generated.")
ap.add_argument("--count", type=int, default=8, help="How many images to generate.")
ap.add_argument("--model", default="gpt-image-1", help="Image model id.")
ap.add_argument("--size", default="", help="Image size (e.g. 1024x1024, 1536x1024). Defaults based on model if not specified.")
ap.add_argument("--quality", default="", help="Image quality (e.g. high, standard). Defaults based on model if not specified.")
ap.add_argument("--background", default="", help="Background transparency (GPT models only): transparent, opaque, or auto.")
ap.add_argument(
"--size", default="", help="Image size (e.g. 1024x1024, 1536x1024). Defaults based on model if not specified."
)
ap.add_argument(
"--quality", default="", help="Image quality (e.g. high, standard). Defaults based on model if not specified."
)
ap.add_argument(
"--background", default="", help="Background transparency (GPT models only): transparent, opaque, or auto."
)
ap.add_argument("--output-format", default="", help="Output format (GPT models only): png, jpeg, or webp.")
ap.add_argument("--style", default="", help="Image style (dall-e-3 only): vivid or natural.")
ap.add_argument("--out-dir", default="", help="Output directory (default: ./tmp/openai-image-gen-<ts>).")
@ -265,7 +261,10 @@ def main() -> int:
count = args.count
if args.model == "dall-e-3" and count > 1:
print(f"Warning: dall-e-3 only supports generating 1 image at a time. Reducing count from {count} to 1.", file=sys.stderr)
print(
f"Warning: dall-e-3 only supports generating 1 image at a time. Reducing count from {count} to 1.",
file=sys.stderr,
)
count = 1
out_dir = Path(args.out_dir).expanduser() if args.out_dir else default_out_dir()

View File

@ -82,6 +82,8 @@ def test_normalize_output_format_normalizes_case_for_supported_values():
def test_normalize_output_format_strips_whitespace_for_supported_values():
assert normalize_output_format("gpt-image-1", " png ") == "png"
def test_normalize_output_format_keeps_supported_values():
assert normalize_output_format("gpt-image-1", "png") == "png"
assert normalize_output_format("gpt-image-1", "jpeg") == "jpeg"

View File

@ -6,7 +6,6 @@ Quick validation script for skills - minimal version
import re
import sys
from pathlib import Path
from typing import Optional
try:
import yaml
@ -16,7 +15,7 @@ except ModuleNotFoundError:
MAX_SKILL_NAME_LENGTH = 64
def _extract_frontmatter(content: str) -> Optional[str]:
def _extract_frontmatter(content: str) -> str | None:
lines = content.splitlines()
if not lines or lines[0].strip() != "---":
return None
@ -26,13 +25,13 @@ def _extract_frontmatter(content: str) -> Optional[str]:
return None
def _parse_simple_frontmatter(frontmatter_text: str) -> Optional[dict[str, str]]:
def _parse_simple_frontmatter(frontmatter_text: str) -> dict[str, str] | None:
"""
Minimal fallback parser used when PyYAML is unavailable.
Supports simple `key: value` mappings used by SKILL.md frontmatter.
"""
parsed: dict[str, str] = {}
current_key: Optional[str] = None
current_key: str | None = None
for raw_line in frontmatter_text.splitlines():
stripped = raw_line.strip()
if not stripped or stripped.startswith("#"):
@ -43,9 +42,7 @@ def _parse_simple_frontmatter(frontmatter_text: str) -> Optional[dict[str, str]]
if current_key is None:
return None
current_value = parsed[current_key]
parsed[current_key] = (
f"{current_value}\n{stripped}" if current_value else stripped
)
parsed[current_key] = f"{current_value}\n{stripped}" if current_value else stripped
continue
if ":" not in stripped:
@ -55,9 +52,7 @@ def _parse_simple_frontmatter(frontmatter_text: str) -> Optional[dict[str, str]]
value = value.strip()
if not key:
return None
if (value.startswith('"') and value.endswith('"')) or (
value.startswith("'") and value.endswith("'")
):
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
value = value[1:-1]
parsed[key] = value
current_key = key
@ -129,8 +124,7 @@ def validate_skill(skill_path):
if len(name) > MAX_SKILL_NAME_LENGTH:
return (
False,
f"Name is too long ({len(name)} characters). "
f"Maximum is {MAX_SKILL_NAME_LENGTH} characters.",
f"Name is too long ({len(name)} characters). Maximum is {MAX_SKILL_NAME_LENGTH} characters.",
)
description = frontmatter.get("description", "")

View File

@ -0,0 +1,121 @@
import { describe, expect, it } from "vitest";
import {
_findLastOccurrenceOutsideFileBlocks as findLastOccurrenceOutsideFileBlocks,
_normalizeUpdatedBody as normalizeUpdatedBody,
_rebuildQueuedPromptWithMediaUnderstanding as rebuildQueuedPromptWithMediaUnderstanding,
} from "./followup-media.js";
const FILE_BLOCK = '<file name="doc.pdf" type="application/pdf">\nPDF content\n</file>';
describe("findLastOccurrenceOutsideFileBlocks", () => {
it("returns -1 for empty search", () => {
expect(findLastOccurrenceOutsideFileBlocks("hello", "")).toBe(-1);
});
it("finds last occurrence in body region before file blocks", () => {
const value = `hello world hello\n${FILE_BLOCK}`;
// "hello" appears at 0 and 12 — both before the file block
expect(findLastOccurrenceOutsideFileBlocks(value, "hello")).toBe(12);
});
it("skips matches inside file block content", () => {
// "PDF content" appears only inside the file block — no valid match outside.
const value = `some text\n${FILE_BLOCK}`;
expect(findLastOccurrenceOutsideFileBlocks(value, "PDF content")).toBe(-1);
});
it("finds trailing occurrence outside file block even when also inside one", () => {
const value = `some text\n${FILE_BLOCK}\nPDF content`;
// "PDF content" appears inside the file block AND after it — the function
// should return the trailing occurrence that is outside the block.
const expected = value.lastIndexOf("PDF content");
expect(findLastOccurrenceOutsideFileBlocks(value, "PDF content")).toBe(expected);
});
it("finds occurrence when search itself contains file blocks", () => {
const bodyWithFile = `caption\n${FILE_BLOCK}`;
const value = `previous\n${bodyWithFile}\nlater\n${bodyWithFile}`;
// Should find the *last* (trailing) occurrence
const expected = value.lastIndexOf(bodyWithFile);
expect(findLastOccurrenceOutsideFileBlocks(value, bodyWithFile)).toBe(expected);
expect(expected).toBeGreaterThan(value.indexOf(bodyWithFile));
});
it("returns index when no file blocks exist in value", () => {
expect(findLastOccurrenceOutsideFileBlocks("abc abc", "abc")).toBe(4);
});
it("finds body text after thread-history file blocks", () => {
const value = `Thread history\n${FILE_BLOCK}\n\ncheck this out`;
// The body "check this out" appears after a file block from thread history.
// The old truncation approach would miss this; the new approach finds it.
expect(findLastOccurrenceOutsideFileBlocks(value, "check this out")).toBe(
value.lastIndexOf("check this out"),
);
});
});
describe("normalizeUpdatedBody", () => {
it("returns empty string when updatedBody is empty", () => {
expect(normalizeUpdatedBody({ originalBody: "foo", updatedBody: "" })).toBe("");
});
it("returns updatedBody when originalBody is empty", () => {
expect(normalizeUpdatedBody({ updatedBody: "hello" })).toBe("hello");
});
it("strips directives when updatedBody equals originalBody", () => {
const body = "/think high tell me a joke";
const result = normalizeUpdatedBody({ originalBody: body, updatedBody: body });
expect(result).toBe("tell me a joke");
});
it("does not corrupt file block content during directive cleanup", () => {
const originalBody = "/think high tell me about this file";
// updatedBody has the original body plus a file block appended by media processing
const updatedBody = `${originalBody}\n${FILE_BLOCK}`;
const result = normalizeUpdatedBody({ originalBody, updatedBody });
// The directive should be stripped from the body portion, file block preserved
expect(result).toContain("tell me about this file");
expect(result).toContain(FILE_BLOCK);
expect(result).not.toContain("/think");
});
it("replaces in body region, not inside file blocks", () => {
const originalBody = "PDF content";
const updatedBody = `PDF content\n<file name="doc.pdf" type="application/pdf">\nPDF content\n</file>`;
// The replacement should target the body region "PDF content" before the
// file block, not the "PDF content" inside the <file> block.
const result = normalizeUpdatedBody({ originalBody, updatedBody });
// With no directives to strip, original === cleaned, updatedBody !== originalBody
// because updatedBody has the file block appended. The replacement targets the
// body-region occurrence.
expect(result).toContain('<file name="doc.pdf"');
expect(result).toContain("PDF content\n</file>");
});
});
describe("rebuildQueuedPromptWithMediaUnderstanding", () => {
it("replaces original body with updated body in prompt", () => {
const result = rebuildQueuedPromptWithMediaUnderstanding({
prompt: "thread context\nhello world",
originalBody: "hello world",
updatedBody: 'hello world\n<file name="a.pdf">data</file>',
});
expect(result).toContain('<file name="a.pdf">data</file>');
expect(result).toContain("thread context");
});
it("preserves file blocks in thread history when body is replaced", () => {
const prompt = `history\n<file name="old.pdf">old</file>\nhello world`;
const result = rebuildQueuedPromptWithMediaUnderstanding({
prompt,
originalBody: "hello world",
updatedBody: "hello world transcribed",
});
// The old file block from history should be preserved since updatedBody
// has no file blocks of its own.
expect(result).toContain('<file name="old.pdf">old</file>');
expect(result).toContain("hello world transcribed");
});
});

View File

@ -0,0 +1,423 @@
import path from "node:path";
import { logVerbose } from "../../globals.js";
import { applyMediaUnderstanding } from "../../media-understanding/apply.js";
import {
normalizeAttachments,
resolveAttachmentKind,
} from "../../media-understanding/attachments.js";
import { buildInboundMediaNote } from "../media-note.js";
import type { MsgContext } from "../templating.js";
import { parseInlineDirectives } from "./directive-handling.js";
import type { FollowupMediaContext, FollowupRun } from "./queue/types.js";
const MEDIA_ONLY_PLACEHOLDER = "[User sent media without caption]";
const MEDIA_REPLY_HINT_PREFIX = "To send an image back, prefer the message tool";
const LEADING_MEDIA_ATTACHED_LINE_RE = /^\[media attached(?: \d+\/\d+)?: [^\r\n]*\]$/;
const FILE_BLOCK_RE = /<file\s+name="/i;
const FILE_BLOCK_FULL_RE = /<file\s+name="[^"]*"[^>]*>[\s\S]*?<\/file>\n?/gi;
function stripExistingFileBlocks(text: string): string {
return text.replace(FILE_BLOCK_FULL_RE, "").trim();
}
function stripLeadingMediaAttachedLines(prompt: string): string {
const lines = prompt.split("\n");
let index = 0;
while (index < lines.length) {
const trimmed = lines[index]?.trim() ?? "";
if (!LEADING_MEDIA_ATTACHED_LINE_RE.test(trimmed)) {
break;
}
index += 1;
}
return lines.slice(index).join("\n").trim();
}
function stripLeadingMediaReplyHint(prompt: string): string {
const lines = prompt.split("\n");
if ((lines[0] ?? "").startsWith(MEDIA_REPLY_HINT_PREFIX)) {
return lines.slice(1).join("\n").trim();
}
return prompt.trim();
}
/** Collect the [start, end) ranges of every `<file …>…</file>` block in `value`. */
function collectFileBlockRanges(value: string): Array<[number, number]> {
const ranges: Array<[number, number]> = [];
const re = new RegExp(FILE_BLOCK_FULL_RE.source, FILE_BLOCK_FULL_RE.flags);
let m: RegExpExecArray | null;
while ((m = re.exec(value)) !== null) {
ranges.push([m.index, m.index + m[0].length]);
}
return ranges;
}
function isInsideFileBlock(
position: number,
length: number,
ranges: Array<[number, number]>,
): boolean {
for (const [start, end] of ranges) {
if (position >= start && position + length <= end) {
return true;
}
}
return false;
}
/**
* Find the last occurrence of `search` in `value` that is NOT inside a
* `<file …>…</file>` block. Searches the full string with lastIndexOf,
* then walks backward past any matches that fall inside file blocks.
*/
function findLastOccurrenceOutsideFileBlocks(value: string, search: string): number {
if (!search) {
return -1;
}
const ranges = collectFileBlockRanges(value);
let pos = value.lastIndexOf(search);
while (pos >= 0 && isInsideFileBlock(pos, search.length, ranges)) {
pos = value.lastIndexOf(search, pos - 1);
}
return pos;
}
function replaceLastOccurrenceOutsideFileBlocks(
value: string,
search: string,
replacement: string,
): string | undefined {
if (!search) {
return undefined;
}
const index = findLastOccurrenceOutsideFileBlocks(value, search);
if (index < 0) {
return undefined;
}
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
}
function findTrailingReplacementTargetBeforeFileBlocks(
value: string,
targets: string[],
): { index: number; target: string } | undefined {
let bestMatch: { index: number; target: string } | undefined;
for (const target of targets) {
const index = findLastOccurrenceOutsideFileBlocks(value, target);
if (index < 0) {
continue;
}
if (!bestMatch || index > bestMatch.index) {
bestMatch = { index, target };
}
}
return bestMatch;
}
function replaceOccurrenceAtIndex(
value: string,
search: string,
replacement: string,
index: number,
): string {
return `${value.slice(0, index)}${replacement}${value.slice(index + search.length)}`;
}
function decodeXmlAttr(value: string): string {
return value
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&amp;/g, "&");
}
function extractAttachmentFileName(value?: string): string | undefined {
const trimmed = value?.trim();
if (!trimmed) {
return undefined;
}
if (/^[a-zA-Z][a-zA-Z\d+.-]*:/.test(trimmed)) {
try {
const pathname = new URL(trimmed).pathname;
const basename = path.posix.basename(pathname);
return basename || undefined;
} catch {
// Fall back to path-style parsing below.
}
}
const normalized = trimmed.replace(/\\/g, "/");
const basename = path.posix.basename(normalized);
return basename || undefined;
}
function bodyContainsMatchingFileBlock(mediaContext: FollowupMediaContext): boolean {
const body = mediaContext.Body?.trim();
if (!body || !FILE_BLOCK_RE.test(body)) {
return false;
}
const bodyFileNames = new Set<string>();
for (const match of body.matchAll(/<file\s+name="([^"]*)"[^>]*>/gi)) {
const fileName = match[1]?.trim();
if (fileName) {
bodyFileNames.add(decodeXmlAttr(fileName));
}
}
if (bodyFileNames.size === 0) {
return false;
}
return normalizeAttachments(mediaContext as MsgContext).some((attachment) => {
const fileName = extractAttachmentFileName(attachment.path ?? attachment.url);
return Boolean(fileName && bodyFileNames.has(fileName));
});
}
function stripInlineDirectives(text: string | undefined): string {
return parseInlineDirectives(text ?? "").cleaned.trim();
}
function normalizeUpdatedBody(params: { originalBody?: string; updatedBody?: string }): string {
const updatedBody = params.updatedBody?.trim();
if (!updatedBody) {
return "";
}
const originalBody = params.originalBody?.trim();
if (!originalBody) {
return updatedBody;
}
const cleanedOriginalBody = stripInlineDirectives(originalBody);
if (!cleanedOriginalBody) {
return updatedBody;
}
if (updatedBody === originalBody) {
return cleanedOriginalBody;
}
return (
replaceLastOccurrenceOutsideFileBlocks(updatedBody, originalBody, cleanedOriginalBody) ??
updatedBody
).trim();
}
function rebuildQueuedPromptWithMediaUnderstanding(params: {
prompt: string;
originalBody?: string;
updatedBody?: string;
mediaNote?: string;
}): string {
let stripped = stripLeadingMediaAttachedLines(params.prompt);
if (!params.mediaNote) {
stripped = stripLeadingMediaReplyHint(stripped);
}
const replacementTargets = [
params.originalBody?.trim(),
stripInlineDirectives(params.originalBody),
MEDIA_ONLY_PLACEHOLDER,
].filter(
(value, index, list): value is string => Boolean(value) && list.indexOf(value) === index,
);
// Strip pre-existing file blocks from the body region when the updated body
// contains new file blocks. Mixed messages (audio + PDF) can arrive with
// file extraction already applied in the primary path; without this strip
// the old block stays in the prompt while the updated body adds a new one,
// duplicating potentially large file payloads.
// Scope stripping to the confirmed body segment so quoted/replied text,
// thread history above the body, and prompts whose original body no longer
// appears all retain any legitimate <file> blocks.
if (params.updatedBody && FILE_BLOCK_RE.test(params.updatedBody)) {
const trailingMatch = findTrailingReplacementTargetBeforeFileBlocks(
stripped,
replacementTargets,
);
if (trailingMatch) {
stripped =
stripped.slice(0, trailingMatch.index) +
stripExistingFileBlocks(stripped.slice(trailingMatch.index));
}
}
const updatedBody = normalizeUpdatedBody({
originalBody: params.originalBody,
updatedBody: params.updatedBody,
});
if (!updatedBody) {
return [params.mediaNote?.trim(), stripped].filter(Boolean).join("\n").trim();
}
let rebuilt = stripped;
const trailingMatch = findTrailingReplacementTargetBeforeFileBlocks(rebuilt, replacementTargets);
if (trailingMatch) {
rebuilt = replaceOccurrenceAtIndex(
rebuilt,
trailingMatch.target,
updatedBody,
trailingMatch.index,
);
return [params.mediaNote?.trim(), rebuilt.trim()].filter(Boolean).join("\n").trim();
}
rebuilt = [rebuilt, updatedBody].filter(Boolean).join("\n\n");
return [params.mediaNote?.trim(), rebuilt.trim()].filter(Boolean).join("\n").trim();
}
function hasMediaAttachments(mediaContext: FollowupMediaContext): boolean {
return Boolean(
mediaContext.MediaPath?.trim() ||
mediaContext.MediaUrl?.trim() ||
(Array.isArray(mediaContext.MediaPaths) && mediaContext.MediaPaths.length > 0) ||
(Array.isArray(mediaContext.MediaUrls) && mediaContext.MediaUrls.length > 0),
);
}
function hasOnlyFileLikeAttachments(mediaContext: FollowupMediaContext): boolean {
const attachments = normalizeAttachments(mediaContext as MsgContext);
return (
attachments.length > 0 &&
attachments.every((attachment) => {
const kind = resolveAttachmentKind(attachment);
return kind !== "audio" && kind !== "image" && kind !== "video";
})
);
}
function hasAnyFileAttachments(mediaContext: FollowupMediaContext): boolean {
return normalizeAttachments(mediaContext as MsgContext).some((attachment) => {
const kind = resolveAttachmentKind(attachment);
return kind !== "audio" && kind !== "image" && kind !== "video";
});
}
function snapshotUpdatedMediaContext(params: {
original: FollowupMediaContext;
mediaCtx: MsgContext;
updatedBody?: string;
appliedFile?: boolean;
}): FollowupMediaContext {
return {
...params.original,
Body: params.updatedBody ?? params.original.Body,
Transcript:
typeof params.mediaCtx.Transcript === "string"
? params.mediaCtx.Transcript
: params.original.Transcript,
MediaUnderstanding: Array.isArray(params.mediaCtx.MediaUnderstanding)
? [...params.mediaCtx.MediaUnderstanding]
: params.original.MediaUnderstanding,
MediaUnderstandingDecisions: Array.isArray(params.mediaCtx.MediaUnderstandingDecisions)
? [...params.mediaCtx.MediaUnderstandingDecisions]
: params.original.MediaUnderstandingDecisions,
DeferredMediaApplied: true,
DeferredFileBlocksExtracted:
params.original.DeferredFileBlocksExtracted || params.appliedFile || undefined,
};
}
// Exported for unit testing — these are pure string helpers with no side effects.
export {
findLastOccurrenceOutsideFileBlocks as _findLastOccurrenceOutsideFileBlocks,
normalizeUpdatedBody as _normalizeUpdatedBody,
rebuildQueuedPromptWithMediaUnderstanding as _rebuildQueuedPromptWithMediaUnderstanding,
};
export async function applyDeferredMediaUnderstandingToQueuedRun(
queued: FollowupRun,
params: { logLabel?: string } = {},
): Promise<void> {
// NOTE: collect-mode and overflow-summary queue drains create synthetic
// followup runs without mediaContext — those paths are not covered here
// and rely on their own prompt-building logic in queue/drain.ts.
const mediaContext = queued.mediaContext;
if (!mediaContext || mediaContext.DeferredMediaApplied) {
return;
}
if (!hasMediaAttachments(mediaContext)) {
mediaContext.DeferredMediaApplied = true;
return;
}
const referenceBody = mediaContext.RawBody ?? mediaContext.Body;
// Prefer RawBody-vs-Body comparison when RawBody exists. If RawBody is
// missing, fall back to explicit file-extraction signals instead of
// re-running extraction just because the clean pre-extraction body is gone.
if (!mediaContext.DeferredFileBlocksExtracted && hasAnyFileAttachments(mediaContext)) {
const rawBodyMissing = typeof mediaContext.RawBody !== "string";
if (mediaContext.Body !== referenceBody) {
mediaContext.DeferredFileBlocksExtracted = true;
} else if (
rawBodyMissing &&
(Boolean(mediaContext.MediaUnderstanding?.length) ||
bodyContainsMatchingFileBlock(mediaContext))
) {
mediaContext.DeferredFileBlocksExtracted = true;
}
}
if (mediaContext.MediaUnderstanding?.length) {
mediaContext.DeferredMediaApplied = true;
return;
}
if (mediaContext.DeferredFileBlocksExtracted && hasOnlyFileLikeAttachments(mediaContext)) {
mediaContext.DeferredMediaApplied = true;
return;
}
const resolvedOriginalBody =
mediaContext.CommandBody ?? mediaContext.RawBody ?? mediaContext.Body;
try {
const mediaCtx = {
...mediaContext,
Body: resolvedOriginalBody,
Provider:
mediaContext.Provider ??
queued.run.messageProvider ??
(typeof mediaContext.OriginatingChannel === "string"
? mediaContext.OriginatingChannel
: undefined),
Surface: mediaContext.Surface,
} as MsgContext;
const muResult = await applyMediaUnderstanding({
ctx: mediaCtx,
cfg: queued.run.config,
agentDir: queued.run.agentDir,
activeModel: {
provider: queued.run.provider,
model: queued.run.model,
},
});
const shouldRebuildPrompt =
muResult.outputs.length > 0 ||
muResult.appliedAudio ||
muResult.appliedImage ||
muResult.appliedVideo ||
(muResult.appliedFile && !mediaContext.DeferredFileBlocksExtracted);
if (shouldRebuildPrompt) {
const newMediaNote = buildInboundMediaNote(mediaCtx);
queued.prompt = rebuildQueuedPromptWithMediaUnderstanding({
prompt: queued.prompt,
originalBody: resolvedOriginalBody,
updatedBody: mediaCtx.Body,
mediaNote: newMediaNote,
});
logVerbose(
`${params.logLabel ?? "followup"}: applied media understanding (audio=${muResult.appliedAudio}, image=${muResult.appliedImage}, video=${muResult.appliedVideo}, file=${muResult.appliedFile})`,
);
}
queued.mediaContext = snapshotUpdatedMediaContext({
original: mediaContext,
mediaCtx,
updatedBody: shouldRebuildPrompt ? mediaCtx.Body : undefined,
appliedFile: muResult.appliedFile,
});
} catch (err) {
mediaContext.DeferredMediaApplied = true;
logVerbose(
`${params.logLabel ?? "followup"}: media understanding failed, proceeding with raw content: ${err instanceof Error ? err.message : String(err)}`,
);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,7 @@ import type { OriginatingChannelType } from "../templating.js";
import { isSilentReplyText, SILENT_REPLY_TOKEN } from "../tokens.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js";
import { resolveRunAuthProfile } from "./agent-runner-utils.js";
import { applyDeferredMediaUnderstandingToQueuedRun } from "./followup-media.js";
import {
resolveOriginAccountId,
resolveOriginMessageProvider,
@ -154,6 +155,8 @@ export function createFollowupRunner(params: {
let bootstrapPromptWarningSignaturesSeen = resolveBootstrapWarningSignaturesSeen(
activeSessionEntry?.systemPromptReport,
);
await applyDeferredMediaUnderstandingToQueuedRun(queued, { logLabel: "followup" });
try {
const fallbackResult = await runWithModelFallback({
cfg: queued.run.config,

View File

@ -172,6 +172,45 @@ describe("runPreparedReply media-only handling", () => {
expect(call?.followupRun.prompt).toContain("[User sent media without caption]");
});
it("snapshots URL-only attachments into followup mediaContext", async () => {
await runPreparedReply(
baseParams({
ctx: {
Body: "check this attachment",
RawBody: "check this attachment",
CommandBody: "check this attachment",
ThreadHistoryBody: "Earlier message in this thread",
OriginatingChannel: "slack",
OriginatingTo: "C123",
ChatType: "group",
MediaUrl: "https://cdn.example.com/input.png",
MediaUrls: ["https://cdn.example.com/input.png"],
MediaType: "image/png",
MediaTypes: ["image/png"],
},
sessionCtx: {
Body: "check this attachment",
BodyStripped: "check this attachment",
ThreadHistoryBody: "Earlier message in this thread",
Provider: "slack",
ChatType: "group",
OriginatingChannel: "slack",
OriginatingTo: "C123",
},
}),
);
const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
expect(call?.followupRun.mediaContext).toEqual(
expect.objectContaining({
MediaUrl: "https://cdn.example.com/input.png",
MediaUrls: ["https://cdn.example.com/input.png"],
MediaType: "image/png",
MediaTypes: ["image/png"],
}),
);
});
it("keeps thread history context on follow-up turns", async () => {
const result = await runPreparedReply(
baseParams({
@ -186,6 +225,41 @@ describe("runPreparedReply media-only handling", () => {
expect(call?.followupRun.prompt).toContain("Earlier message in this thread");
});
it("snapshots mediaContext for URL-only deferred attachments", async () => {
await runPreparedReply(
baseParams({
ctx: {
Body: "",
RawBody: "",
CommandBody: "",
MediaUrl: "https://cdn.example.com/audio.ogg",
MediaUrls: ["https://cdn.example.com/audio.ogg"],
MediaType: "audio/ogg",
MediaTypes: ["audio/ogg"],
ThreadHistoryBody: "Earlier message in this thread",
OriginatingChannel: "slack",
OriginatingTo: "C123",
ChatType: "group",
},
sessionCtx: {
Body: "",
BodyStripped: "",
ThreadHistoryBody: "Earlier message in this thread",
Provider: "slack",
ChatType: "group",
OriginatingChannel: "slack",
OriginatingTo: "C123",
},
}),
);
const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0];
expect(call?.followupRun.mediaContext?.MediaUrl).toBe("https://cdn.example.com/audio.ogg");
expect(call?.followupRun.mediaContext?.MediaUrls).toEqual([
"https://cdn.example.com/audio.ogg",
]);
});
it("returns the empty-body reply when there is no text and no media", async () => {
const result = await runPreparedReply(
baseParams({

View File

@ -307,7 +307,14 @@ export async function runPreparedReply(
: [inboundUserContext, baseBodyFinal].filter(Boolean).join("\n\n");
const baseBodyTrimmed = baseBodyForPrompt.trim();
const hasMediaAttachment = Boolean(
sessionCtx.MediaPath || (sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0),
sessionCtx.MediaPath ||
sessionCtx.MediaUrl ||
(sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0) ||
(sessionCtx.MediaUrls && sessionCtx.MediaUrls.length > 0) ||
ctx.MediaPath?.trim() ||
ctx.MediaUrl?.trim() ||
(Array.isArray(ctx.MediaPaths) && ctx.MediaPaths.length > 0) ||
(Array.isArray(ctx.MediaUrls) && ctx.MediaUrls.length > 0),
);
if (!baseBodyTrimmed && !hasMediaAttachment) {
await typing.onReplyStart();
@ -384,7 +391,7 @@ export async function runPreparedReply(
const mediaReplyHint = mediaNote
? "To send an image back, prefer the message tool (media/path/filePath). If you must inline, use MEDIA:https://example.com/image.jpg (spaces ok, quote if needed) or a safe relative path like MEDIA:./image.jpg. Avoid absolute paths (MEDIA:/...) and ~ paths — they are blocked for security. Keep caption in the text body."
: undefined;
let prefixedCommandBody = mediaNote
const prefixedCommandBody = mediaNote
? [mediaNote, mediaReplyHint, prefixedBody ?? ""].filter(Boolean).join("\n").trim()
: prefixedBody;
if (!resolvedThinkLevel) {
@ -469,11 +476,48 @@ export async function runPreparedReply(
isNewSession,
});
const authProfileIdSource = sessionEntry?.authProfileOverrideSource;
// Snapshot media-related context for deferred media understanding in the
// followup runner. When MediaUnderstanding is already populated the runner
// knows transcription already succeeded and skips re-application.
const hasMediaAttachments = Boolean(
ctx.MediaPath?.trim() ||
ctx.MediaUrl?.trim() ||
(Array.isArray(ctx.MediaPaths) && ctx.MediaPaths.length > 0) ||
(Array.isArray(ctx.MediaUrls) && ctx.MediaUrls.length > 0),
);
const mediaContext = hasMediaAttachments
? {
Body: ctx.Body,
CommandBody: ctx.CommandBody,
RawBody: ctx.RawBody,
Provider: ctx.Provider ?? sessionCtx.Provider,
Surface: ctx.Surface ?? sessionCtx.Surface,
MediaPath: ctx.MediaPath,
MediaUrl: ctx.MediaUrl,
MediaType: ctx.MediaType,
MediaDir: ctx.MediaDir,
MediaPaths: ctx.MediaPaths ? [...ctx.MediaPaths] : undefined,
MediaUrls: ctx.MediaUrls ? [...ctx.MediaUrls] : undefined,
MediaTypes: ctx.MediaTypes ? [...ctx.MediaTypes] : undefined,
MediaRemoteHost: ctx.MediaRemoteHost,
Transcript: ctx.Transcript,
MediaUnderstanding: ctx.MediaUnderstanding ? [...ctx.MediaUnderstanding] : undefined,
MediaUnderstandingDecisions: ctx.MediaUnderstandingDecisions
? [...ctx.MediaUnderstandingDecisions]
: undefined,
OriginatingChannel: ctx.OriginatingChannel,
OriginatingTo: ctx.OriginatingTo,
AccountId: ctx.AccountId,
MessageThreadId: ctx.MessageThreadId,
}
: undefined;
const followupRun = {
prompt: queuedBody,
messageId: sessionCtx.MessageSidFull ?? sessionCtx.MessageSid,
summaryLine: baseBodyTrimmedRaw,
enqueuedAt: Date.now(),
mediaContext,
// Originating channel for reply routing.
originatingChannel: ctx.OriginatingChannel,
originatingTo: ctx.OriginatingTo,

View File

@ -1,6 +1,6 @@
export { extractQueueDirective } from "./queue/directive.js";
export { clearSessionQueues } from "./queue/cleanup.js";
export type { ClearSessionQueueResult } from "./queue/cleanup.js";
export { clearSessionQueues } from "./queue/cleanup.js";
export { extractQueueDirective } from "./queue/directive.js";
export { scheduleFollowupDrain } from "./queue/drain.js";
export {
enqueueFollowupRun,
@ -10,6 +10,7 @@ export {
export { resolveQueueSettings } from "./queue/settings.js";
export { clearFollowupQueue } from "./queue/state.js";
export type {
FollowupMediaContext,
FollowupRun,
QueueDedupeMode,
QueueDropPolicy,

View File

@ -3,15 +3,17 @@ import { resolveGlobalMap } from "../../../shared/global-singleton.js";
import {
buildCollectPrompt,
beginQueueDrain,
buildQueueSummaryLine,
buildQueueSummaryPrompt,
clearQueueSummaryState,
drainCollectQueueStep,
drainNextQueueItem,
hasCrossChannelItems,
previewQueueSummaryPrompt,
waitForQueueDebounce,
} from "../../../utils/queue-helpers.js";
import { applyDeferredMediaUnderstandingToQueuedRun } from "../followup-media.js";
import { isRoutableChannel } from "../route-reply.js";
import { FOLLOWUP_QUEUES } from "./state.js";
import { FOLLOWUP_QUEUES, type FollowupQueueState } from "./state.js";
import type { FollowupRun } from "./types.js";
// Persists the most recent runFollowup callback per queue key so that
@ -68,6 +70,60 @@ function resolveCrossChannelKey(item: FollowupRun): { cross?: true; key?: string
};
}
function clearFollowupQueueSummaryState(queue: FollowupQueueState): void {
clearQueueSummaryState(queue);
queue.summaryItems = [];
}
export async function applyDeferredMediaToQueuedRuns(items: FollowupRun[]): Promise<void> {
await Promise.allSettled(
items.map(
async (item) =>
await applyDeferredMediaUnderstandingToQueuedRun(item, { logLabel: "followup queue" }),
),
);
}
async function resolveSummaryLines(items: FollowupRun[]): Promise<string[]> {
// Parallelize the media understanding API calls upfront (same pattern as
// applyDeferredMediaToQueuedRuns), then build summary lines sequentially
// so line order matches the original item order.
await Promise.allSettled(
items.map((item) =>
applyDeferredMediaUnderstandingToQueuedRun(item, { logLabel: "followup queue" }),
),
);
// After deferred media, prefer the updated prompt (which includes transcripts)
// over the original summaryLine (which may just be the caption text).
return items.map((item) =>
buildQueueSummaryLine(item.prompt.trim() || item.summaryLine?.trim() || ""),
);
}
export async function buildMediaAwareQueueSummaryPrompt(params: {
dropPolicy: FollowupQueueState["dropPolicy"];
droppedCount: number;
summaryLines: string[];
summaryItems: FollowupRun[];
noun: string;
}): Promise<string | undefined> {
if (params.dropPolicy !== "summarize" || params.droppedCount <= 0) {
return undefined;
}
const summaryLines =
params.summaryItems.length > 0
? await resolveSummaryLines(params.summaryItems)
: params.summaryLines;
return buildQueueSummaryPrompt({
state: {
dropPolicy: params.dropPolicy,
droppedCount: params.droppedCount,
summaryLines: [...summaryLines],
},
noun: params.noun,
});
}
export function scheduleFollowupDrain(
key: string,
runFollowup: (run: FollowupRun) => Promise<void>,
@ -107,7 +163,14 @@ export function scheduleFollowupDrain(
}
const items = queue.items.slice();
const summary = previewQueueSummaryPrompt({ state: queue, noun: "message" });
await applyDeferredMediaToQueuedRuns(items);
const summary = await buildMediaAwareQueueSummaryPrompt({
dropPolicy: queue.dropPolicy,
droppedCount: queue.droppedCount,
summaryLines: queue.summaryLines,
summaryItems: queue.summaryItems,
noun: "message",
});
const run = items.at(-1)?.run ?? queue.lastRun;
if (!run) {
break;
@ -129,12 +192,18 @@ export function scheduleFollowupDrain(
});
queue.items.splice(0, items.length);
if (summary) {
clearQueueSummaryState(queue);
clearFollowupQueueSummaryState(queue);
}
continue;
}
const summaryPrompt = previewQueueSummaryPrompt({ state: queue, noun: "message" });
const summaryPrompt = await buildMediaAwareQueueSummaryPrompt({
dropPolicy: queue.dropPolicy,
droppedCount: queue.droppedCount,
summaryLines: queue.summaryLines,
summaryItems: queue.summaryItems,
noun: "message",
});
if (summaryPrompt) {
const run = queue.lastRun;
if (!run) {
@ -155,7 +224,7 @@ export function scheduleFollowupDrain(
) {
break;
}
clearQueueSummaryState(queue);
clearFollowupQueueSummaryState(queue);
continue;
}

View File

@ -1,8 +1,8 @@
import { createDedupeCache } from "../../../infra/dedupe.js";
import { resolveGlobalSingleton } from "../../../shared/global-singleton.js";
import { applyQueueDropPolicy, shouldSkipQueueItem } from "../../../utils/queue-helpers.js";
import { buildQueueSummaryLine, shouldSkipQueueItem } from "../../../utils/queue-helpers.js";
import { kickFollowupDrainIfIdle } from "./drain.js";
import { getExistingFollowupQueue, getFollowupQueue } from "./state.js";
import { getExistingFollowupQueue, getFollowupQueue, type FollowupQueueState } from "./state.js";
import type { FollowupRun, QueueDedupeMode, QueueSettings } from "./types.js";
/**
@ -57,6 +57,34 @@ function isRunAlreadyQueued(
return items.some((item) => item.prompt === run.prompt && hasSameRouting(item));
}
function applyFollowupQueueDropPolicy(queue: FollowupQueueState): boolean {
const cap = queue.cap;
if (cap <= 0 || queue.items.length < cap) {
return true;
}
if (queue.dropPolicy === "new") {
return false;
}
const dropCount = queue.items.length - cap + 1;
const dropped = queue.items.splice(0, dropCount);
if (queue.dropPolicy === "summarize") {
for (const item of dropped) {
queue.droppedCount += 1;
queue.summaryItems.push(item);
queue.summaryLines.push(
buildQueueSummaryLine(item.summaryLine?.trim() || item.prompt.trim()),
);
}
const limit = Math.max(0, cap);
while (queue.summaryLines.length > limit) {
queue.summaryLines.shift();
queue.summaryItems.shift();
}
}
return true;
}
export function enqueueFollowupRun(
key: string,
run: FollowupRun,
@ -83,10 +111,7 @@ export function enqueueFollowupRun(
queue.lastEnqueuedAt = Date.now();
queue.lastRun = run.run;
const shouldEnqueue = applyQueueDropPolicy({
queue,
summarize: (item) => item.summaryLine?.trim() || item.prompt.trim(),
});
const shouldEnqueue = applyFollowupQueueDropPolicy(queue);
if (!shouldEnqueue) {
return false;
}

View File

@ -4,6 +4,7 @@ import type { FollowupRun, QueueDropPolicy, QueueMode, QueueSettings } from "./t
export type FollowupQueueState = {
items: FollowupRun[];
summaryItems: FollowupRun[];
draining: boolean;
lastEnqueuedAt: number;
mode: QueueMode;
@ -47,6 +48,7 @@ export function getFollowupQueue(key: string, settings: QueueSettings): Followup
const created: FollowupQueueState = {
items: [],
summaryItems: [],
draining: false,
lastEnqueuedAt: 0,
mode: settings.mode,
@ -78,6 +80,7 @@ export function clearFollowupQueue(key: string): number {
}
const cleared = queue.items.length + queue.droppedCount;
queue.items.length = 0;
queue.summaryItems.length = 0;
queue.droppedCount = 0;
queue.summaryLines = [];
queue.lastRun = undefined;

View File

@ -2,6 +2,10 @@ import type { ExecToolDefaults } from "../../../agents/bash-tools.js";
import type { SkillSnapshot } from "../../../agents/skills.js";
import type { OpenClawConfig } from "../../../config/config.js";
import type { SessionEntry } from "../../../config/sessions.js";
import type {
MediaUnderstandingDecision,
MediaUnderstandingOutput,
} from "../../../media-understanding/types.js";
import type { InputProvenance } from "../../../sessions/input-provenance.js";
import type { OriginatingChannelType } from "../../templating.js";
import type { ElevatedLevel, ReasoningLevel, ThinkLevel, VerboseLevel } from "../directives.js";
@ -19,12 +23,55 @@ export type QueueSettings = {
export type QueueDedupeMode = "message-id" | "prompt" | "none";
/**
* Snapshot of media-related context fields carried on a FollowupRun so that
* the followup runner can apply media understanding (e.g. voice-note
* transcription) when it was not applied or failed in the primary path.
*/
export type FollowupMediaContext = {
Body?: string;
CommandBody?: string;
RawBody?: string;
Provider?: string;
Surface?: string;
MediaPath?: string;
MediaUrl?: string;
MediaType?: string;
MediaDir?: string;
MediaPaths?: string[];
MediaUrls?: string[];
MediaTypes?: string[];
MediaRemoteHost?: string;
Transcript?: string;
MediaUnderstanding?: MediaUnderstandingOutput[];
MediaUnderstandingDecisions?: MediaUnderstandingDecision[];
OriginatingChannel?: OriginatingChannelType;
OriginatingTo?: string;
AccountId?: string;
MessageThreadId?: string | number;
DeferredMediaApplied?: boolean;
/**
* Set when file extraction has already been applied to Body (either in the
* primary path or by a previous deferred-media run). Checked instead of
* scanning body text for `<file` patterns to avoid false-positives on user
* messages that contain literal XML-like text.
*/
DeferredFileBlocksExtracted?: boolean;
};
export type FollowupRun = {
prompt: string;
/** Provider message ID, when available (for deduplication). */
messageId?: string;
summaryLine?: string;
enqueuedAt: number;
/**
* Media context snapshot from the original inbound message.
* When present and MediaUnderstanding is empty, the followup runner will
* attempt to apply media understanding (audio transcription, etc.) before
* passing the prompt to the agent.
*/
mediaContext?: FollowupMediaContext;
/**
* Originating channel for reply routing.
* When set, replies should be routed back to this provider