Files
aiagent/backend/app/core/compaction.py
renjianbo beff3fac8d fix: delete agent 500 error + dynamic personality + deployment guide
- Fix delete agent 500: clean up FK records (agent_llm_logs, permissions,
  schedules, executions, team_members) and unbind goals/tasks before delete
- Remove hardcoded personality templates in Android, replace with dynamic
  system prompt generation from name + description
- Set promptSectionsEnabled=false to bypass PromptComposer for personality
- Add Tencent Cloud Linux deployment guide (Docker Compose)
- Accumulated backend service updates, frontend UI fixes, Android app changes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-29 01:17:21 +08:00

498 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
对话自动压缩引擎 — 三级压缩体系
参考 Claude Code:
- src/services/compact/microCompact.ts — Tier 1: 工具结果打桩
- src/services/compact/compact.ts — Tier 2: LLM 摘要替换
- src/services/compact/reactiveCompact.ts — Tier 3: 错误触发压缩
- src/services/compact/grouping.ts — 安全分割点识别
"""
from __future__ import annotations
import logging
import time
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple
from app.core.token_counter import (
TokenCounter,
get_model_context_window,
is_context_length_error,
)
from app.core.compaction_config import CompactionConfig
logger = logging.getLogger(__name__)
# ──────────────────────────── 数据结构 ────────────────────────────
class CompactionStrategy(str, Enum):
NONE = "none"
MICRO = "micro" # Tier 1: 工具结果打桩
FULL = "full" # Tier 2: LLM 摘要替换
REACTIVE = "reactive" # Tier 3: 错误触发
class CompactionResult:
"""压缩操作结果。"""
def __init__(
self,
messages: List[Dict[str, Any]],
strategy: CompactionStrategy,
tokens_before: int,
tokens_after: int,
details: Optional[str] = None,
):
self.messages = messages
self.strategy = strategy
self.tokens_before = tokens_before
self.tokens_after = tokens_after
self.tokens_saved = tokens_before - tokens_after
self.details = details
self.timestamp = time.time()
def __repr__(self) -> str:
return (
f"CompactionResult(strategy={self.strategy.value}, "
f"saved={self.tokens_saved} tokens, "
f"before={self.tokens_before} after={self.tokens_after})"
)
# ──────────────────────────── 压缩摘要提示词 ────────────────────────────
COMPACT_SUMMARY_SYSTEM = """你是一个对话摘要专家。你需要将一段 AI 助手与用户的对话历史压缩为简洁的摘要。
规则:
- 保留关键事实和决策(文件路径、数值、结论、用户偏好)
- 保留未完成的任务或待处理事项
- 忽略纯粹的问候、闲聊和中间工具调用细节
- 用第三人称中文描述
- 控制在你被要求的字数范围内
返回格式:直接返回摘要文本,不要加任何前缀或标记。"""
def _build_compact_user_prompt(older_messages: List[Dict[str, Any]], max_chars: int = 2000) -> str:
"""从旧消息中构建压缩提示词。"""
parts = []
total_chars = 0
for msg in older_messages:
role = msg.get("role", "?")
content = msg.get("content", "") or ""
# 截断长内容
if len(content) > 500:
content = content[:500] + "..."
line = f"[{role}]: {content}"
if total_chars + len(line) > max_chars:
parts.append("...(更早的消息已省略)")
break
parts.append(line)
total_chars += len(line)
return "\n".join(parts)
# ──────────────────────────── CompactionEngine ────────────────────────────
class CompactionEngine:
"""三级对话压缩引擎。
与 Claude Code 一样,在每轮 LLM 调用前检查 token 用量:
- >70% → MicroCompact旧工具结果打桩
- >85% → FullCompactLLM 摘要替换)
- >95% → 等 API 报错后 ReactiveCompact
"""
def __init__(
self,
config: CompactionConfig,
token_counter: Optional[TokenCounter] = None,
model: str = "deepseek-v4-flash",
):
self.config = config
self.token_counter = token_counter or TokenCounter(model=model)
self.model = model
# 熔断状态
self._consecutive_failures = 0
self._last_compact_time: float = 0
self._compact_count = 0
# ──────────────────── 入口 ────────────────────
async def maybe_compact(
self,
messages: List[Dict[str, Any]],
context_window: Optional[int] = None,
) -> CompactionResult:
"""根据当前 token 用量决定是否压缩,返回(可能压缩后的)消息列表。
Args:
messages: 当前消息列表 (含 system prompt)
context_window: 模型上下文窗口大小None=自动检测)
Returns:
CompactionResult包含可能压缩后的消息列表
"""
if not self.config.enabled:
return CompactionResult(
messages, CompactionStrategy.NONE,
tokens_before=0, tokens_after=0,
details="压缩已禁用",
)
# 确定上下文窗口
if context_window is None:
context_window = get_model_context_window(self.model)
if self.config.context_window_override > 0:
context_window = self.config.context_window_override
# 有效窗口 = 模型窗口 - 输出余量
effective_window = context_window - self.config.output_reserve_tokens
# 计算当前 token 数
tokens_before = self.token_counter.count_messages(messages)
usage_ratio = tokens_before / effective_window if effective_window > 0 else 0
# ── 决策 ──
# Tier 1: MicroCompact
if (
self.config.micro_compact_enabled
and usage_ratio >= self.config.micro_compact_threshold
and usage_ratio < self.config.full_compact_threshold
):
return await self._micro_compact(messages, tokens_before)
# Tier 2: FullCompact
if (
self.config.full_compact_enabled
and usage_ratio >= self.config.full_compact_threshold
):
return await self._full_compact(messages, tokens_before)
# 不需要压缩
return CompactionResult(
messages, CompactionStrategy.NONE,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"usage={usage_ratio:.1%} < threshold={self.config.micro_compact_threshold:.0%}",
)
async def reactive_compact(
self,
messages: List[Dict[str, Any]],
error: Exception,
context_window: Optional[int] = None,
) -> CompactionResult:
"""响应 API 上下文超限错误的紧急压缩Tier 3
Args:
messages: 当前消息列表
error: 触发的异常
context_window: 上下文窗口大小
Returns:
CompactionResult
"""
if not self.config.reactive_compact_enabled:
raise error
if context_window is None:
context_window = get_model_context_window(self.model)
tokens_before = self.token_counter.count_messages(messages)
logger.warning(
"ReactiveCompact 触发: %s, tokens=%d, window=%d",
str(error)[:100], tokens_before, context_window,
)
return await self._full_compact(messages, tokens_before, is_reactive=True)
# ──────────────────── Tier 1: MicroCompact ────────────────────
async def _micro_compact(
self, messages: List[Dict[str, Any]], tokens_before: int
) -> CompactionResult:
"""MicroCompact: 将旧工具结果替换为桩标记。
核心逻辑(参考 Claude Code microCompact.ts
1. 找到所有 "可压缩" 工具类型的结果消息
2. 保留最近 N 轮的工具结果不动
3. 更早的工具结果 → 替换 content 为 "[Tool result compacted]"
4. 保护 assistant(tool_calls) 消息 — 它们包含推理链
5. 保护破坏性工具结果write/edit/deploy 等)
"""
try:
compactable = set(self.config.compactable_tools)
protected = set(self.config.protected_tools)
# ── 第 1 步: 识别各消息角色 ──
# 从后往前数 user 消息来确定"轮次"
user_indices = []
for i, msg in enumerate(messages):
if msg.get("role") == "user":
user_indices.append(i)
if len(user_indices) <= self.config.min_preserve_messages // 2:
# 对话轮次太少,不需要压缩
return CompactionResult(
messages, CompactionStrategy.MICRO,
tokens_before=tokens_before, tokens_after=tokens_before,
details="对话轮次不足,跳过 MicroCompact",
)
# 找到"保护线":倒数第 compact_older_than_rounds 个 user 消息的位置
preserve_idx = max(0, len(user_indices) - self.config.compact_older_than_rounds)
compact_before = user_indices[preserve_idx] if preserve_idx < len(user_indices) else 0
# ── 第 2 步: 识别 tool_call → tool_result 配对 ──
# 收集 assistant(tool_calls) 的 tool_call_id 集合
active_tool_ids = set()
for msg in messages:
if msg.get("role") == "assistant" and msg.get("tool_calls"):
for tc in msg["tool_calls"]:
tc_id = tc.get("id") or tc.get("tool_call_id")
if tc_id:
active_tool_ids.add(tc_id)
# ── 第 3 步: 在保护线之前压缩可压缩工具结果 ──
stubbed_count = 0
result = []
for i, msg in enumerate(messages):
if i >= compact_before:
# 在保护线之后,保留原样
result.append(msg)
continue
role = msg.get("role", "")
tool_name = msg.get("name", "")
if role == "tool" and tool_name in compactable and tool_name not in protected:
# 检查是否有对应的 assistant(tool_calls) 也早于保护线
tc_id = msg.get("tool_call_id", "")
result.append({
"role": "tool",
"tool_call_id": tc_id or "compacted",
"content": "[Tool result compacted]",
"name": tool_name,
})
stubbed_count += 1
else:
result.append(msg)
if stubbed_count == 0:
return CompactionResult(
result, CompactionStrategy.MICRO,
tokens_before=tokens_before,
tokens_after=self.token_counter.count_messages(result),
details="没有可压缩的旧工具结果",
)
tokens_after = self.token_counter.count_messages(result)
logger.info(
"MicroCompact: %d 条工具结果打桩, %d%d tokens (节省 %d)",
stubbed_count, tokens_before, tokens_after,
tokens_before - tokens_after,
)
return CompactionResult(
result, CompactionStrategy.MICRO,
tokens_before=tokens_before, tokens_after=tokens_after,
details=f"{stubbed_count} 条工具结果已压缩",
)
except Exception as e:
self._consecutive_failures += 1
logger.error("MicroCompact 失败 (%d/%d): %s",
self._consecutive_failures,
self.config.max_consecutive_failures, e)
if self._consecutive_failures >= self.config.max_consecutive_failures:
logger.warning("MicroCompact 熔断!跳过本次压缩")
return CompactionResult(
messages, CompactionStrategy.MICRO,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"熔断 ({self._consecutive_failures}次连续失败)",
)
return CompactionResult(
messages, CompactionStrategy.MICRO,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"失败: {e}",
)
# ──────────────────── Tier 2: FullCompact ────────────────────
async def _full_compact(
self,
messages: List[Dict[str, Any]],
tokens_before: int,
is_reactive: bool = False,
llm_client=None, # 可选:外部传入 LLM 客户端
) -> CompactionResult:
"""FullCompact: 用 LLM 将旧对话压缩为摘要消息。
核心逻辑(参考 Claude Code compact.ts
1. 保留 system 消息 + 最近 N 条消息
2. 中间部分 → 调用轻量 LLM 生成摘要
3. 将摘要作为 compact_boundary 消息插入
4. 熔断保护:连续失败 N 次后放弃
"""
try:
preserve_count = self.config.min_preserve_messages
if len(messages) <= preserve_count + 4:
return CompactionResult(
messages, CompactionStrategy.FULL,
tokens_before=tokens_before, tokens_after=tokens_before,
details="消息数不足,跳过 FullCompact",
)
# ── 分离各段 ──
# 找到 system 消息
system_msgs = [m for m in messages if m.get("role") == "system"]
non_system = [m for m in messages if m.get("role") != "system"]
middle_start = 0
# 跳过最前面的几条 system 后的过渡消息(通常是首次问候等)
# 保留至少 preserve_count 条在末尾
if len(non_system) > preserve_count + 4:
middle_end = len(non_system) - preserve_count
# 只压缩中间部分:跳过前 2 条(通常是 system 后的首次交互)到倒数 preserve_count 条之间
middle_start = max(2, 0)
older = non_system[middle_start:middle_end]
recent = non_system[middle_end:]
else:
# 消息太少,只保留最近的
older = non_system[:-preserve_count] if len(non_system) > preserve_count else []
recent = non_system[-preserve_count:] if len(non_system) >= preserve_count else non_system
if len(older) < 3:
return CompactionResult(
messages, CompactionStrategy.FULL,
tokens_before=tokens_before, tokens_after=tokens_before,
details="旧消息不足以压缩",
)
# ── 调用 LLM 生成摘要 ──
summary_text = await self._generate_summary(older, llm_client)
# ── 组装结果: system + compact_boundary + recent ──
compact_boundary = {
"role": "user",
"content": (
f"[对话上下文摘要 — 之前的关键信息]\n\n{summary_text}\n\n"
f"[以上为自动生成的对话摘要,共压缩 {len(older)} 条消息。"
f"以下是最近的对话延续]"
),
}
new_messages = system_msgs + [compact_boundary] + recent
tokens_after = self.token_counter.count_messages(new_messages)
strategy = CompactionStrategy.REACTIVE if is_reactive else CompactionStrategy.FULL
logger.info(
"FullCompact (%s): %d 条消息→摘要 (%d 字), %d%d tokens (节省 %d)",
"被动" if is_reactive else "主动",
len(older), len(summary_text),
tokens_before, tokens_after, tokens_before - tokens_after,
)
self._consecutive_failures = 0 # 成功后重置
self._last_compact_time = time.time()
self._compact_count += 1
return CompactionResult(
new_messages, strategy,
tokens_before=tokens_before, tokens_after=tokens_after,
details=f"压缩 {len(older)} 条→{len(summary_text)} 字摘要",
)
except Exception as e:
self._consecutive_failures += 1
logger.error("FullCompact 失败 (%d/%d): %s",
self._consecutive_failures,
self.config.max_consecutive_failures, e)
if self._consecutive_failures >= self.config.max_consecutive_failures:
logger.warning("FullCompact 熔断!返回原始消息")
return CompactionResult(
messages, CompactionStrategy.FULL,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"熔断 ({self._consecutive_failures}次连续失败)",
)
return CompactionResult(
messages, CompactionStrategy.FULL,
tokens_before=tokens_before, tokens_after=tokens_before,
details=f"失败: {e}",
)
async def _generate_summary(
self,
older_messages: List[Dict[str, Any]],
llm_client=None,
) -> str:
"""调用轻量 LLM 生成对话摘要。"""
# 构建提示词
user_content = _build_compact_user_prompt(
older_messages,
max_chars=3000,
)
if llm_client is not None:
# 使用外部传入的 LLM 客户端
from app.agent_runtime.core import _LLMClient
from app.agent_runtime.schemas import AgentLLMConfig
if not isinstance(llm_client, _LLMClient):
# 创建临时客户端
summary_config = AgentLLMConfig(
provider="deepseek",
model=self.config.summary_model,
temperature=self.config.summary_temperature,
max_tokens=self.config.summary_max_tokens,
request_timeout=30.0,
)
llm_client = _LLMClient(summary_config)
messages = [
{"role": "system", "content": COMPACT_SUMMARY_SYSTEM},
{"role": "user", "content": f"请将以下对话历史压缩为不超过{self.config.summary_max_tokens // 2}字的摘要:\n\n{user_content}"},
]
response = await llm_client.chat(messages=messages, tools=None, iteration=-1)
content = getattr(response, 'content', '') or (
response.get('content', '') if isinstance(response, dict) else ""
)
return content.strip() or self._fallback_summary(older_messages)
else:
# 无 LLM 客户端,使用 fallback
return self._fallback_summary(older_messages)
@staticmethod
def _fallback_summary(older_messages: List[Dict[str, Any]]) -> str:
"""无 LLM 时的降级摘要(提取关键信息)。"""
topics = set()
for msg in older_messages:
if msg.get("role") == "user":
content = msg.get("content", "")
if len(content) > 60:
content = content[:60] + "..."
if content:
topics.add(content)
if not topics:
return "此段对话为助手与用户的交互。"
topic_list = "".join(list(topics)[:10])
return f"对话涉及以下话题: {topic_list}"
# ──────────────────────────── 工厂函数 ────────────────────────────
def create_compaction_engine(
config: Optional[CompactionConfig] = None,
model: str = "deepseek-v4-flash",
) -> CompactionEngine:
"""创建 CompactionEngine 实例的便捷工厂。"""
if config is None:
config = CompactionConfig()
return CompactionEngine(config=config, model=model)