Files
aiagent/backend/app/core/compaction.py

498 lines
20 KiB
Python
Raw Normal View History

"""
对话自动压缩引擎 三级压缩体系
参考 Claude Code:
- src/services/compact/microCompact.ts Tier 1: 工具结果打桩
- src/services/compact/compact.ts Tier 2: LLM 摘要替换
- src/services/compact/reactiveCompact.ts Tier 3: 错误触发压缩
- src/services/compact/grouping.ts 安全分割点识别
"""
from __future__ import annotations
import logging
import time
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple
from app.core.token_counter import (
TokenCounter,
get_model_context_window,
is_context_length_error,
)
from app.core.compaction_config import CompactionConfig
logger = logging.getLogger(__name__)
# ──────────────────────────── 数据结构 ────────────────────────────
class CompactionStrategy(str, Enum):
NONE = "none"
MICRO = "micro" # Tier 1: 工具结果打桩
FULL = "full" # Tier 2: LLM 摘要替换
REACTIVE = "reactive" # Tier 3: 错误触发
class CompactionResult:
"""压缩操作结果。"""
def __init__(
self,
messages: List[Dict[str, Any]],
strategy: CompactionStrategy,
tokens_before: int,
tokens_after: int,
details: Optional[str] = None,
):
self.messages = messages
self.strategy = strategy
self.tokens_before = tokens_before
self.tokens_after = tokens_after
self.tokens_saved = tokens_before - tokens_after
self.details = details
self.timestamp = time.time()
def __repr__(self) -> str:
return (
f"CompactionResult(strategy={self.strategy.value}, "
f"saved={self.tokens_saved} tokens, "
f"before={self.tokens_before} after={self.tokens_after})"
)
# ──────────────────────────── 压缩摘要提示词 ────────────────────────────
COMPACT_SUMMARY_SYSTEM = """你是一个对话摘要专家。你需要将一段 AI 助手与用户的对话历史压缩为简洁的摘要。
规则
- 保留关键事实和决策文件路径数值结论用户偏好
- 保留未完成的任务或待处理事项
- 忽略纯粹的问候闲聊和中间工具调用细节
- 用第三人称中文描述
- 控制在你被要求的字数范围内
返回格式直接返回摘要文本不要加任何前缀或标记"""
def _build_compact_user_prompt(older_messages: List[Dict[str, Any]], max_chars: int = 2000) -> str:
"""从旧消息中构建压缩提示词。"""
parts = []
total_chars = 0
for msg in older_messages:
role = msg.get("role", "?")
content = msg.get("content", "") or ""
# 截断长内容
if len(content) > 500:
content = content[:500] + "..."
line = f"[{role}]: {content}"
if total_chars + len(line) > max_chars:
parts.append("...(更早的消息已省略)")
break
parts.append(line)
total_chars += len(line)
return "\n".join(parts)
# ──────────────────────────── CompactionEngine ────────────────────────────
class CompactionEngine:
"""三级对话压缩引擎。
Claude Code 一样在每轮 LLM 调用前检查 token 用量
- >70% MicroCompact旧工具结果打桩
- >85% FullCompactLLM 摘要替换
- >95% API 报错后 ReactiveCompact
"""
def __init__(
self,
config: CompactionConfig,
token_counter: Optional[TokenCounter] = None,
model: str = "deepseek-v4-flash",
):
self.config = config
self.token_counter = token_counter or TokenCounter(model=model)
self.model = model
# 熔断状态
self._consecutive_failures = 0
self._last_compact_time: float = 0
self._compact_count = 0
# ──────────────────── 入口 ────────────────────
async def maybe_compact(
self,
messages: List[Dict[str, Any]],
context_window: Optional[int] = None,
) -> CompactionResult:
"""根据当前 token 用量决定是否压缩,返回(可能压缩后的)消息列表。
Args:
messages: 当前消息列表 ( system prompt)
context_window: 模型上下文窗口大小None=自动检测
Returns:
CompactionResult包含可能压缩后的消息列表
"""
if not self.config.enabled:
return CompactionResult(
messages, CompactionStrategy.NONE,
tokens_before=0, tokens_after=0,
details="压缩已禁用",
)
# 确定上下文窗口
if context_window is None:
context_window = get_model_context_window(self.model)
if self.config.context_window_override > 0:
context_window = self.config.context_window_override
# 有效窗口 = 模型窗口 - 输出余量
effective_window = context_window - self.config.output_reserve_tokens
# 计算当前 token 数
tokens_before = self.token_counter.count_messages(messages)
usage_ratio = tokens_before / effective_window if effective_window > 0 else 0
# ── 决策 ──
# Tier 1: MicroCompact
if (
self.config.micro_compact_enabled
and usage_ratio >= self.config.micro_compact_threshold
and usage_ratio < self.config.full_compact_threshold
):
return await self._micro_compact(messages, tokens_before)
# Tier 2: FullCompact
if (
self.config.full_compact_enabled
and usage_ratio >= self.config.full_compact_threshold
):
return await self._full_compact(messages, tokens_before)
# 不需要压缩
return CompactionResult(
messages, CompactionStrategy.NONE,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"usage={usage_ratio:.1%} < threshold={self.config.micro_compact_threshold:.0%}",
)
async def reactive_compact(
self,
messages: List[Dict[str, Any]],
error: Exception,
context_window: Optional[int] = None,
) -> CompactionResult:
"""响应 API 上下文超限错误的紧急压缩Tier 3
Args:
messages: 当前消息列表
error: 触发的异常
context_window: 上下文窗口大小
Returns:
CompactionResult
"""
if not self.config.reactive_compact_enabled:
raise error
if context_window is None:
context_window = get_model_context_window(self.model)
tokens_before = self.token_counter.count_messages(messages)
logger.warning(
"ReactiveCompact 触发: %s, tokens=%d, window=%d",
str(error)[:100], tokens_before, context_window,
)
return await self._full_compact(messages, tokens_before, is_reactive=True)
# ──────────────────── Tier 1: MicroCompact ────────────────────
async def _micro_compact(
self, messages: List[Dict[str, Any]], tokens_before: int
) -> CompactionResult:
"""MicroCompact: 将旧工具结果替换为桩标记。
核心逻辑参考 Claude Code microCompact.ts
1. 找到所有 "可压缩" 工具类型的结果消息
2. 保留最近 N 轮的工具结果不动
3. 更早的工具结果 替换 content "[Tool result compacted]"
4. 保护 assistant(tool_calls) 消息 它们包含推理链
5. 保护破坏性工具结果write/edit/deploy
"""
try:
compactable = set(self.config.compactable_tools)
protected = set(self.config.protected_tools)
# ── 第 1 步: 识别各消息角色 ──
# 从后往前数 user 消息来确定"轮次"
user_indices = []
for i, msg in enumerate(messages):
if msg.get("role") == "user":
user_indices.append(i)
if len(user_indices) <= self.config.min_preserve_messages // 2:
# 对话轮次太少,不需要压缩
return CompactionResult(
messages, CompactionStrategy.MICRO,
tokens_before=tokens_before, tokens_after=tokens_before,
details="对话轮次不足,跳过 MicroCompact",
)
# 找到"保护线":倒数第 compact_older_than_rounds 个 user 消息的位置
preserve_idx = max(0, len(user_indices) - self.config.compact_older_than_rounds)
compact_before = user_indices[preserve_idx] if preserve_idx < len(user_indices) else 0
# ── 第 2 步: 识别 tool_call → tool_result 配对 ──
# 收集 assistant(tool_calls) 的 tool_call_id 集合
active_tool_ids = set()
for msg in messages:
if msg.get("role") == "assistant" and msg.get("tool_calls"):
for tc in msg["tool_calls"]:
tc_id = tc.get("id") or tc.get("tool_call_id")
if tc_id:
active_tool_ids.add(tc_id)
# ── 第 3 步: 在保护线之前压缩可压缩工具结果 ──
stubbed_count = 0
result = []
for i, msg in enumerate(messages):
if i >= compact_before:
# 在保护线之后,保留原样
result.append(msg)
continue
role = msg.get("role", "")
tool_name = msg.get("name", "")
if role == "tool" and tool_name in compactable and tool_name not in protected:
# 检查是否有对应的 assistant(tool_calls) 也早于保护线
tc_id = msg.get("tool_call_id", "")
result.append({
"role": "tool",
"tool_call_id": tc_id or "compacted",
"content": "[Tool result compacted]",
"name": tool_name,
})
stubbed_count += 1
else:
result.append(msg)
if stubbed_count == 0:
return CompactionResult(
result, CompactionStrategy.MICRO,
tokens_before=tokens_before,
tokens_after=self.token_counter.count_messages(result),
details="没有可压缩的旧工具结果",
)
tokens_after = self.token_counter.count_messages(result)
logger.info(
"MicroCompact: %d 条工具结果打桩, %d%d tokens (节省 %d)",
stubbed_count, tokens_before, tokens_after,
tokens_before - tokens_after,
)
return CompactionResult(
result, CompactionStrategy.MICRO,
tokens_before=tokens_before, tokens_after=tokens_after,
details=f"{stubbed_count} 条工具结果已压缩",
)
except Exception as e:
self._consecutive_failures += 1
logger.error("MicroCompact 失败 (%d/%d): %s",
self._consecutive_failures,
self.config.max_consecutive_failures, e)
if self._consecutive_failures >= self.config.max_consecutive_failures:
logger.warning("MicroCompact 熔断!跳过本次压缩")
return CompactionResult(
messages, CompactionStrategy.MICRO,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"熔断 ({self._consecutive_failures}次连续失败)",
)
return CompactionResult(
messages, CompactionStrategy.MICRO,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"失败: {e}",
)
# ──────────────────── Tier 2: FullCompact ────────────────────
async def _full_compact(
self,
messages: List[Dict[str, Any]],
tokens_before: int,
is_reactive: bool = False,
llm_client=None, # 可选:外部传入 LLM 客户端
) -> CompactionResult:
"""FullCompact: 用 LLM 将旧对话压缩为摘要消息。
核心逻辑参考 Claude Code compact.ts
1. 保留 system 消息 + 最近 N 条消息
2. 中间部分 调用轻量 LLM 生成摘要
3. 将摘要作为 compact_boundary 消息插入
4. 熔断保护连续失败 N 次后放弃
"""
try:
preserve_count = self.config.min_preserve_messages
if len(messages) <= preserve_count + 4:
return CompactionResult(
messages, CompactionStrategy.FULL,
tokens_before=tokens_before, tokens_after=tokens_before,
details="消息数不足,跳过 FullCompact",
)
# ── 分离各段 ──
# 找到 system 消息
system_msgs = [m for m in messages if m.get("role") == "system"]
non_system = [m for m in messages if m.get("role") != "system"]
middle_start = 0
# 跳过最前面的几条 system 后的过渡消息(通常是首次问候等)
# 保留至少 preserve_count 条在末尾
if len(non_system) > preserve_count + 4:
middle_end = len(non_system) - preserve_count
# 只压缩中间部分:跳过前 2 条(通常是 system 后的首次交互)到倒数 preserve_count 条之间
middle_start = max(2, 0)
older = non_system[middle_start:middle_end]
recent = non_system[middle_end:]
else:
# 消息太少,只保留最近的
older = non_system[:-preserve_count] if len(non_system) > preserve_count else []
recent = non_system[-preserve_count:] if len(non_system) >= preserve_count else non_system
if len(older) < 3:
return CompactionResult(
messages, CompactionStrategy.FULL,
tokens_before=tokens_before, tokens_after=tokens_before,
details="旧消息不足以压缩",
)
# ── 调用 LLM 生成摘要 ──
summary_text = await self._generate_summary(older, llm_client)
# ── 组装结果: system + compact_boundary + recent ──
compact_boundary = {
"role": "user",
"content": (
f"[对话上下文摘要 — 之前的关键信息]\n\n{summary_text}\n\n"
f"[以上为自动生成的对话摘要,共压缩 {len(older)} 条消息。"
f"以下是最近的对话延续]"
),
}
new_messages = system_msgs + [compact_boundary] + recent
tokens_after = self.token_counter.count_messages(new_messages)
strategy = CompactionStrategy.REACTIVE if is_reactive else CompactionStrategy.FULL
logger.info(
"FullCompact (%s): %d 条消息→摘要 (%d 字), %d%d tokens (节省 %d)",
"被动" if is_reactive else "主动",
len(older), len(summary_text),
tokens_before, tokens_after, tokens_before - tokens_after,
)
self._consecutive_failures = 0 # 成功后重置
self._last_compact_time = time.time()
self._compact_count += 1
return CompactionResult(
new_messages, strategy,
tokens_before=tokens_before, tokens_after=tokens_after,
details=f"压缩 {len(older)} 条→{len(summary_text)} 字摘要",
)
except Exception as e:
self._consecutive_failures += 1
logger.error("FullCompact 失败 (%d/%d): %s",
self._consecutive_failures,
self.config.max_consecutive_failures, e)
if self._consecutive_failures >= self.config.max_consecutive_failures:
logger.warning("FullCompact 熔断!返回原始消息")
return CompactionResult(
messages, CompactionStrategy.FULL,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"熔断 ({self._consecutive_failures}次连续失败)",
)
return CompactionResult(
messages, CompactionStrategy.FULL,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"失败: {e}",
)
async def _generate_summary(
self,
older_messages: List[Dict[str, Any]],
llm_client=None,
) -> str:
"""调用轻量 LLM 生成对话摘要。"""
# 构建提示词
user_content = _build_compact_user_prompt(
older_messages,
max_chars=3000,
)
if llm_client is not None:
# 使用外部传入的 LLM 客户端
from app.agent_runtime.core import _LLMClient
from app.agent_runtime.schemas import AgentLLMConfig
if not isinstance(llm_client, _LLMClient):
# 创建临时客户端
summary_config = AgentLLMConfig(
provider="deepseek",
model=self.config.summary_model,
temperature=self.config.summary_temperature,
max_tokens=self.config.summary_max_tokens,
request_timeout=30.0,
)
llm_client = _LLMClient(summary_config)
messages = [
{"role": "system", "content": COMPACT_SUMMARY_SYSTEM},
{"role": "user", "content": f"请将以下对话历史压缩为不超过{self.config.summary_max_tokens // 2}字的摘要:\n\n{user_content}"},
]
response = await llm_client.chat(messages=messages, tools=None, iteration=-1)
content = getattr(response, 'content', '') or (
response.get('content', '') if isinstance(response, dict) else ""
)
return content.strip() or self._fallback_summary(older_messages)
else:
# 无 LLM 客户端,使用 fallback
return self._fallback_summary(older_messages)
@staticmethod
def _fallback_summary(older_messages: List[Dict[str, Any]]) -> str:
"""无 LLM 时的降级摘要(提取关键信息)。"""
topics = set()
for msg in older_messages:
if msg.get("role") == "user":
content = msg.get("content", "")
if len(content) > 60:
content = content[:60] + "..."
if content:
topics.add(content)
if not topics:
return "此段对话为助手与用户的交互。"
topic_list = "".join(list(topics)[:10])
return f"对话涉及以下话题: {topic_list}"
# ──────────────────────────── 工厂函数 ────────────────────────────
def create_compaction_engine(
config: Optional[CompactionConfig] = None,
model: str = "deepseek-v4-flash",
) -> CompactionEngine:
"""创建 CompactionEngine 实例的便捷工厂。"""
if config is None:
config = CompactionConfig()
return CompactionEngine(config=config, model=model)