- Fix delete agent 500: clean up FK records (agent_llm_logs, permissions, schedules, executions, team_members) and unbind goals/tasks before delete - Remove hardcoded personality templates in Android, replace with dynamic system prompt generation from name + description - Set promptSectionsEnabled=false to bypass PromptComposer for personality - Add Tencent Cloud Linux deployment guide (Docker Compose) - Accumulated backend service updates, frontend UI fixes, Android app changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
498 lines
20 KiB
Python
498 lines
20 KiB
Python
"""
|
||
对话自动压缩引擎 — 三级压缩体系
|
||
|
||
参考 Claude Code:
|
||
- src/services/compact/microCompact.ts — Tier 1: 工具结果打桩
|
||
- src/services/compact/compact.ts — Tier 2: LLM 摘要替换
|
||
- src/services/compact/reactiveCompact.ts — Tier 3: 错误触发压缩
|
||
- src/services/compact/grouping.ts — 安全分割点识别
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
import time
|
||
from enum import Enum
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
|
||
from app.core.token_counter import (
|
||
TokenCounter,
|
||
get_model_context_window,
|
||
is_context_length_error,
|
||
)
|
||
from app.core.compaction_config import CompactionConfig
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
# ──────────────────────────── 数据结构 ────────────────────────────
|
||
|
||
class CompactionStrategy(str, Enum):
|
||
NONE = "none"
|
||
MICRO = "micro" # Tier 1: 工具结果打桩
|
||
FULL = "full" # Tier 2: LLM 摘要替换
|
||
REACTIVE = "reactive" # Tier 3: 错误触发
|
||
|
||
|
||
class CompactionResult:
|
||
"""压缩操作结果。"""
|
||
|
||
def __init__(
|
||
self,
|
||
messages: List[Dict[str, Any]],
|
||
strategy: CompactionStrategy,
|
||
tokens_before: int,
|
||
tokens_after: int,
|
||
details: Optional[str] = None,
|
||
):
|
||
self.messages = messages
|
||
self.strategy = strategy
|
||
self.tokens_before = tokens_before
|
||
self.tokens_after = tokens_after
|
||
self.tokens_saved = tokens_before - tokens_after
|
||
self.details = details
|
||
self.timestamp = time.time()
|
||
|
||
def __repr__(self) -> str:
|
||
return (
|
||
f"CompactionResult(strategy={self.strategy.value}, "
|
||
f"saved={self.tokens_saved} tokens, "
|
||
f"before={self.tokens_before} after={self.tokens_after})"
|
||
)
|
||
|
||
|
||
# ──────────────────────────── 压缩摘要提示词 ────────────────────────────
|
||
|
||
COMPACT_SUMMARY_SYSTEM = """你是一个对话摘要专家。你需要将一段 AI 助手与用户的对话历史压缩为简洁的摘要。
|
||
|
||
规则:
|
||
- 保留关键事实和决策(文件路径、数值、结论、用户偏好)
|
||
- 保留未完成的任务或待处理事项
|
||
- 忽略纯粹的问候、闲聊和中间工具调用细节
|
||
- 用第三人称中文描述
|
||
- 控制在你被要求的字数范围内
|
||
|
||
返回格式:直接返回摘要文本,不要加任何前缀或标记。"""
|
||
|
||
|
||
def _build_compact_user_prompt(older_messages: List[Dict[str, Any]], max_chars: int = 2000) -> str:
|
||
"""从旧消息中构建压缩提示词。"""
|
||
parts = []
|
||
total_chars = 0
|
||
for msg in older_messages:
|
||
role = msg.get("role", "?")
|
||
content = msg.get("content", "") or ""
|
||
# 截断长内容
|
||
if len(content) > 500:
|
||
content = content[:500] + "..."
|
||
line = f"[{role}]: {content}"
|
||
if total_chars + len(line) > max_chars:
|
||
parts.append("...(更早的消息已省略)")
|
||
break
|
||
parts.append(line)
|
||
total_chars += len(line)
|
||
return "\n".join(parts)
|
||
|
||
|
||
# ──────────────────────────── CompactionEngine ────────────────────────────
|
||
|
||
class CompactionEngine:
|
||
"""三级对话压缩引擎。
|
||
|
||
与 Claude Code 一样,在每轮 LLM 调用前检查 token 用量:
|
||
- >70% → MicroCompact(旧工具结果打桩)
|
||
- >85% → FullCompact(LLM 摘要替换)
|
||
- >95% → 等 API 报错后 ReactiveCompact
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
config: CompactionConfig,
|
||
token_counter: Optional[TokenCounter] = None,
|
||
model: str = "deepseek-v4-flash",
|
||
):
|
||
self.config = config
|
||
self.token_counter = token_counter or TokenCounter(model=model)
|
||
self.model = model
|
||
# 熔断状态
|
||
self._consecutive_failures = 0
|
||
self._last_compact_time: float = 0
|
||
self._compact_count = 0
|
||
|
||
# ──────────────────── 入口 ────────────────────
|
||
|
||
async def maybe_compact(
|
||
self,
|
||
messages: List[Dict[str, Any]],
|
||
context_window: Optional[int] = None,
|
||
) -> CompactionResult:
|
||
"""根据当前 token 用量决定是否压缩,返回(可能压缩后的)消息列表。
|
||
|
||
Args:
|
||
messages: 当前消息列表 (含 system prompt)
|
||
context_window: 模型上下文窗口大小(None=自动检测)
|
||
|
||
Returns:
|
||
CompactionResult,包含(可能压缩后的)消息列表
|
||
"""
|
||
if not self.config.enabled:
|
||
return CompactionResult(
|
||
messages, CompactionStrategy.NONE,
|
||
tokens_before=0, tokens_after=0,
|
||
details="压缩已禁用",
|
||
)
|
||
|
||
# 确定上下文窗口
|
||
if context_window is None:
|
||
context_window = get_model_context_window(self.model)
|
||
if self.config.context_window_override > 0:
|
||
context_window = self.config.context_window_override
|
||
|
||
# 有效窗口 = 模型窗口 - 输出余量
|
||
effective_window = context_window - self.config.output_reserve_tokens
|
||
|
||
# 计算当前 token 数
|
||
tokens_before = self.token_counter.count_messages(messages)
|
||
usage_ratio = tokens_before / effective_window if effective_window > 0 else 0
|
||
|
||
# ── 决策 ──
|
||
# Tier 1: MicroCompact
|
||
if (
|
||
self.config.micro_compact_enabled
|
||
and usage_ratio >= self.config.micro_compact_threshold
|
||
and usage_ratio < self.config.full_compact_threshold
|
||
):
|
||
return await self._micro_compact(messages, tokens_before)
|
||
|
||
# Tier 2: FullCompact
|
||
if (
|
||
self.config.full_compact_enabled
|
||
and usage_ratio >= self.config.full_compact_threshold
|
||
):
|
||
return await self._full_compact(messages, tokens_before)
|
||
|
||
# 不需要压缩
|
||
return CompactionResult(
|
||
messages, CompactionStrategy.NONE,
|
||
tokens_before=tokens_before, tokens_after=tokens_before,
|
||
details=f"usage={usage_ratio:.1%} < threshold={self.config.micro_compact_threshold:.0%}",
|
||
)
|
||
|
||
async def reactive_compact(
|
||
self,
|
||
messages: List[Dict[str, Any]],
|
||
error: Exception,
|
||
context_window: Optional[int] = None,
|
||
) -> CompactionResult:
|
||
"""响应 API 上下文超限错误的紧急压缩(Tier 3)。
|
||
|
||
Args:
|
||
messages: 当前消息列表
|
||
error: 触发的异常
|
||
context_window: 上下文窗口大小
|
||
|
||
Returns:
|
||
CompactionResult
|
||
"""
|
||
if not self.config.reactive_compact_enabled:
|
||
raise error
|
||
|
||
if context_window is None:
|
||
context_window = get_model_context_window(self.model)
|
||
|
||
tokens_before = self.token_counter.count_messages(messages)
|
||
logger.warning(
|
||
"ReactiveCompact 触发: %s, tokens=%d, window=%d",
|
||
str(error)[:100], tokens_before, context_window,
|
||
)
|
||
|
||
return await self._full_compact(messages, tokens_before, is_reactive=True)
|
||
|
||
# ──────────────────── Tier 1: MicroCompact ────────────────────
|
||
|
||
async def _micro_compact(
|
||
self, messages: List[Dict[str, Any]], tokens_before: int
|
||
) -> CompactionResult:
|
||
"""MicroCompact: 将旧工具结果替换为桩标记。
|
||
|
||
核心逻辑(参考 Claude Code microCompact.ts):
|
||
1. 找到所有 "可压缩" 工具类型的结果消息
|
||
2. 保留最近 N 轮的工具结果不动
|
||
3. 更早的工具结果 → 替换 content 为 "[Tool result compacted]"
|
||
4. 保护 assistant(tool_calls) 消息 — 它们包含推理链
|
||
5. 保护破坏性工具结果(write/edit/deploy 等)
|
||
"""
|
||
try:
|
||
compactable = set(self.config.compactable_tools)
|
||
protected = set(self.config.protected_tools)
|
||
|
||
# ── 第 1 步: 识别各消息角色 ──
|
||
# 从后往前数 user 消息来确定"轮次"
|
||
user_indices = []
|
||
for i, msg in enumerate(messages):
|
||
if msg.get("role") == "user":
|
||
user_indices.append(i)
|
||
|
||
if len(user_indices) <= self.config.min_preserve_messages // 2:
|
||
# 对话轮次太少,不需要压缩
|
||
return CompactionResult(
|
||
messages, CompactionStrategy.MICRO,
|
||
tokens_before=tokens_before, tokens_after=tokens_before,
|
||
details="对话轮次不足,跳过 MicroCompact",
|
||
)
|
||
|
||
# 找到"保护线":倒数第 compact_older_than_rounds 个 user 消息的位置
|
||
preserve_idx = max(0, len(user_indices) - self.config.compact_older_than_rounds)
|
||
compact_before = user_indices[preserve_idx] if preserve_idx < len(user_indices) else 0
|
||
|
||
# ── 第 2 步: 识别 tool_call → tool_result 配对 ──
|
||
# 收集 assistant(tool_calls) 的 tool_call_id 集合
|
||
active_tool_ids = set()
|
||
for msg in messages:
|
||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||
for tc in msg["tool_calls"]:
|
||
tc_id = tc.get("id") or tc.get("tool_call_id")
|
||
if tc_id:
|
||
active_tool_ids.add(tc_id)
|
||
|
||
# ── 第 3 步: 在保护线之前压缩可压缩工具结果 ──
|
||
stubbed_count = 0
|
||
result = []
|
||
for i, msg in enumerate(messages):
|
||
if i >= compact_before:
|
||
# 在保护线之后,保留原样
|
||
result.append(msg)
|
||
continue
|
||
|
||
role = msg.get("role", "")
|
||
tool_name = msg.get("name", "")
|
||
|
||
if role == "tool" and tool_name in compactable and tool_name not in protected:
|
||
# 检查是否有对应的 assistant(tool_calls) 也早于保护线
|
||
tc_id = msg.get("tool_call_id", "")
|
||
result.append({
|
||
"role": "tool",
|
||
"tool_call_id": tc_id or "compacted",
|
||
"content": "[Tool result compacted]",
|
||
"name": tool_name,
|
||
})
|
||
stubbed_count += 1
|
||
else:
|
||
result.append(msg)
|
||
|
||
if stubbed_count == 0:
|
||
return CompactionResult(
|
||
result, CompactionStrategy.MICRO,
|
||
tokens_before=tokens_before,
|
||
tokens_after=self.token_counter.count_messages(result),
|
||
details="没有可压缩的旧工具结果",
|
||
)
|
||
|
||
tokens_after = self.token_counter.count_messages(result)
|
||
logger.info(
|
||
"MicroCompact: %d 条工具结果打桩, %d → %d tokens (节省 %d)",
|
||
stubbed_count, tokens_before, tokens_after,
|
||
tokens_before - tokens_after,
|
||
)
|
||
return CompactionResult(
|
||
result, CompactionStrategy.MICRO,
|
||
tokens_before=tokens_before, tokens_after=tokens_after,
|
||
details=f"{stubbed_count} 条工具结果已压缩",
|
||
)
|
||
|
||
except Exception as e:
|
||
self._consecutive_failures += 1
|
||
logger.error("MicroCompact 失败 (%d/%d): %s",
|
||
self._consecutive_failures,
|
||
self.config.max_consecutive_failures, e)
|
||
if self._consecutive_failures >= self.config.max_consecutive_failures:
|
||
logger.warning("MicroCompact 熔断!跳过本次压缩")
|
||
return CompactionResult(
|
||
messages, CompactionStrategy.MICRO,
|
||
tokens_before=tokens_before, tokens_after=tokens_before,
|
||
details=f"熔断 ({self._consecutive_failures}次连续失败)",
|
||
)
|
||
return CompactionResult(
|
||
messages, CompactionStrategy.MICRO,
|
||
tokens_before=tokens_before, tokens_after=tokens_before,
|
||
details=f"失败: {e}",
|
||
)
|
||
|
||
# ──────────────────── Tier 2: FullCompact ────────────────────
|
||
|
||
async def _full_compact(
|
||
self,
|
||
messages: List[Dict[str, Any]],
|
||
tokens_before: int,
|
||
is_reactive: bool = False,
|
||
llm_client=None, # 可选:外部传入 LLM 客户端
|
||
) -> CompactionResult:
|
||
"""FullCompact: 用 LLM 将旧对话压缩为摘要消息。
|
||
|
||
核心逻辑(参考 Claude Code compact.ts):
|
||
1. 保留 system 消息 + 最近 N 条消息
|
||
2. 中间部分 → 调用轻量 LLM 生成摘要
|
||
3. 将摘要作为 compact_boundary 消息插入
|
||
4. 熔断保护:连续失败 N 次后放弃
|
||
"""
|
||
try:
|
||
preserve_count = self.config.min_preserve_messages
|
||
|
||
if len(messages) <= preserve_count + 4:
|
||
return CompactionResult(
|
||
messages, CompactionStrategy.FULL,
|
||
tokens_before=tokens_before, tokens_after=tokens_before,
|
||
details="消息数不足,跳过 FullCompact",
|
||
)
|
||
|
||
# ── 分离各段 ──
|
||
# 找到 system 消息
|
||
system_msgs = [m for m in messages if m.get("role") == "system"]
|
||
non_system = [m for m in messages if m.get("role") != "system"]
|
||
|
||
middle_start = 0
|
||
# 跳过最前面的几条 system 后的过渡消息(通常是首次问候等)
|
||
# 保留至少 preserve_count 条在末尾
|
||
if len(non_system) > preserve_count + 4:
|
||
middle_end = len(non_system) - preserve_count
|
||
# 只压缩中间部分:跳过前 2 条(通常是 system 后的首次交互)到倒数 preserve_count 条之间
|
||
middle_start = max(2, 0)
|
||
older = non_system[middle_start:middle_end]
|
||
recent = non_system[middle_end:]
|
||
else:
|
||
# 消息太少,只保留最近的
|
||
older = non_system[:-preserve_count] if len(non_system) > preserve_count else []
|
||
recent = non_system[-preserve_count:] if len(non_system) >= preserve_count else non_system
|
||
|
||
if len(older) < 3:
|
||
return CompactionResult(
|
||
messages, CompactionStrategy.FULL,
|
||
tokens_before=tokens_before, tokens_after=tokens_before,
|
||
details="旧消息不足以压缩",
|
||
)
|
||
|
||
# ── 调用 LLM 生成摘要 ──
|
||
summary_text = await self._generate_summary(older, llm_client)
|
||
|
||
# ── 组装结果: system + compact_boundary + recent ──
|
||
compact_boundary = {
|
||
"role": "user",
|
||
"content": (
|
||
f"[对话上下文摘要 — 之前的关键信息]\n\n{summary_text}\n\n"
|
||
f"[以上为自动生成的对话摘要,共压缩 {len(older)} 条消息。"
|
||
f"以下是最近的对话延续]"
|
||
),
|
||
}
|
||
|
||
new_messages = system_msgs + [compact_boundary] + recent
|
||
tokens_after = self.token_counter.count_messages(new_messages)
|
||
|
||
strategy = CompactionStrategy.REACTIVE if is_reactive else CompactionStrategy.FULL
|
||
logger.info(
|
||
"FullCompact (%s): %d 条消息→摘要 (%d 字), %d → %d tokens (节省 %d)",
|
||
"被动" if is_reactive else "主动",
|
||
len(older), len(summary_text),
|
||
tokens_before, tokens_after, tokens_before - tokens_after,
|
||
)
|
||
|
||
self._consecutive_failures = 0 # 成功后重置
|
||
self._last_compact_time = time.time()
|
||
self._compact_count += 1
|
||
|
||
return CompactionResult(
|
||
new_messages, strategy,
|
||
tokens_before=tokens_before, tokens_after=tokens_after,
|
||
details=f"压缩 {len(older)} 条→{len(summary_text)} 字摘要",
|
||
)
|
||
|
||
except Exception as e:
|
||
self._consecutive_failures += 1
|
||
logger.error("FullCompact 失败 (%d/%d): %s",
|
||
self._consecutive_failures,
|
||
self.config.max_consecutive_failures, e)
|
||
|
||
if self._consecutive_failures >= self.config.max_consecutive_failures:
|
||
logger.warning("FullCompact 熔断!返回原始消息")
|
||
return CompactionResult(
|
||
messages, CompactionStrategy.FULL,
|
||
tokens_before=tokens_before, tokens_after=tokens_before,
|
||
details=f"熔断 ({self._consecutive_failures}次连续失败)",
|
||
)
|
||
|
||
return CompactionResult(
|
||
messages, CompactionStrategy.FULL,
|
||
tokens_before=tokens_before, tokens_after=tokens_before,
|
||
details=f"失败: {e}",
|
||
)
|
||
|
||
async def _generate_summary(
|
||
self,
|
||
older_messages: List[Dict[str, Any]],
|
||
llm_client=None,
|
||
) -> str:
|
||
"""调用轻量 LLM 生成对话摘要。"""
|
||
# 构建提示词
|
||
user_content = _build_compact_user_prompt(
|
||
older_messages,
|
||
max_chars=3000,
|
||
)
|
||
|
||
if llm_client is not None:
|
||
# 使用外部传入的 LLM 客户端
|
||
from app.agent_runtime.core import _LLMClient
|
||
from app.agent_runtime.schemas import AgentLLMConfig
|
||
|
||
if not isinstance(llm_client, _LLMClient):
|
||
# 创建临时客户端
|
||
summary_config = AgentLLMConfig(
|
||
provider="deepseek",
|
||
model=self.config.summary_model,
|
||
temperature=self.config.summary_temperature,
|
||
max_tokens=self.config.summary_max_tokens,
|
||
request_timeout=30.0,
|
||
)
|
||
llm_client = _LLMClient(summary_config)
|
||
|
||
messages = [
|
||
{"role": "system", "content": COMPACT_SUMMARY_SYSTEM},
|
||
{"role": "user", "content": f"请将以下对话历史压缩为不超过{self.config.summary_max_tokens // 2}字的摘要:\n\n{user_content}"},
|
||
]
|
||
|
||
response = await llm_client.chat(messages=messages, tools=None, iteration=-1)
|
||
content = getattr(response, 'content', '') or (
|
||
response.get('content', '') if isinstance(response, dict) else ""
|
||
)
|
||
return content.strip() or self._fallback_summary(older_messages)
|
||
else:
|
||
# 无 LLM 客户端,使用 fallback
|
||
return self._fallback_summary(older_messages)
|
||
|
||
@staticmethod
|
||
def _fallback_summary(older_messages: List[Dict[str, Any]]) -> str:
|
||
"""无 LLM 时的降级摘要(提取关键信息)。"""
|
||
topics = set()
|
||
for msg in older_messages:
|
||
if msg.get("role") == "user":
|
||
content = msg.get("content", "")
|
||
if len(content) > 60:
|
||
content = content[:60] + "..."
|
||
if content:
|
||
topics.add(content)
|
||
|
||
if not topics:
|
||
return "此段对话为助手与用户的交互。"
|
||
|
||
topic_list = ";".join(list(topics)[:10])
|
||
return f"对话涉及以下话题: {topic_list}"
|
||
|
||
|
||
# ──────────────────────────── 工厂函数 ────────────────────────────
|
||
|
||
def create_compaction_engine(
|
||
config: Optional[CompactionConfig] = None,
|
||
model: str = "deepseek-v4-flash",
|
||
) -> CompactionEngine:
|
||
"""创建 CompactionEngine 实例的便捷工厂。"""
|
||
if config is None:
|
||
config = CompactionConfig()
|
||
return CompactionEngine(config=config, model=model)
|