498 lines
20 KiB
Python
498 lines
20 KiB
Python
|
|
"""
|
|||
|
|
对话自动压缩引擎 — 三级压缩体系
|
|||
|
|
|
|||
|
|
参考 Claude Code:
|
|||
|
|
- src/services/compact/microCompact.ts — Tier 1: 工具结果打桩
|
|||
|
|
- src/services/compact/compact.ts — Tier 2: LLM 摘要替换
|
|||
|
|
- src/services/compact/reactiveCompact.ts — Tier 3: 错误触发压缩
|
|||
|
|
- src/services/compact/grouping.ts — 安全分割点识别
|
|||
|
|
"""
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import logging
|
|||
|
|
import time
|
|||
|
|
from enum import Enum
|
|||
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|||
|
|
|
|||
|
|
from app.core.token_counter import (
|
|||
|
|
TokenCounter,
|
|||
|
|
get_model_context_window,
|
|||
|
|
is_context_length_error,
|
|||
|
|
)
|
|||
|
|
from app.core.compaction_config import CompactionConfig
|
|||
|
|
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ──────────────────────────── 数据结构 ────────────────────────────
|
|||
|
|
|
|||
|
|
class CompactionStrategy(str, Enum):
|
|||
|
|
NONE = "none"
|
|||
|
|
MICRO = "micro" # Tier 1: 工具结果打桩
|
|||
|
|
FULL = "full" # Tier 2: LLM 摘要替换
|
|||
|
|
REACTIVE = "reactive" # Tier 3: 错误触发
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CompactionResult:
|
|||
|
|
"""压缩操作结果。"""
|
|||
|
|
|
|||
|
|
def __init__(
|
|||
|
|
self,
|
|||
|
|
messages: List[Dict[str, Any]],
|
|||
|
|
strategy: CompactionStrategy,
|
|||
|
|
tokens_before: int,
|
|||
|
|
tokens_after: int,
|
|||
|
|
details: Optional[str] = None,
|
|||
|
|
):
|
|||
|
|
self.messages = messages
|
|||
|
|
self.strategy = strategy
|
|||
|
|
self.tokens_before = tokens_before
|
|||
|
|
self.tokens_after = tokens_after
|
|||
|
|
self.tokens_saved = tokens_before - tokens_after
|
|||
|
|
self.details = details
|
|||
|
|
self.timestamp = time.time()
|
|||
|
|
|
|||
|
|
def __repr__(self) -> str:
|
|||
|
|
return (
|
|||
|
|
f"CompactionResult(strategy={self.strategy.value}, "
|
|||
|
|
f"saved={self.tokens_saved} tokens, "
|
|||
|
|
f"before={self.tokens_before} after={self.tokens_after})"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ──────────────────────────── 压缩摘要提示词 ────────────────────────────
|
|||
|
|
|
|||
|
|
COMPACT_SUMMARY_SYSTEM = """你是一个对话摘要专家。你需要将一段 AI 助手与用户的对话历史压缩为简洁的摘要。
|
|||
|
|
|
|||
|
|
规则:
|
|||
|
|
- 保留关键事实和决策(文件路径、数值、结论、用户偏好)
|
|||
|
|
- 保留未完成的任务或待处理事项
|
|||
|
|
- 忽略纯粹的问候、闲聊和中间工具调用细节
|
|||
|
|
- 用第三人称中文描述
|
|||
|
|
- 控制在你被要求的字数范围内
|
|||
|
|
|
|||
|
|
返回格式:直接返回摘要文本,不要加任何前缀或标记。"""
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _build_compact_user_prompt(older_messages: List[Dict[str, Any]], max_chars: int = 2000) -> str:
|
|||
|
|
"""从旧消息中构建压缩提示词。"""
|
|||
|
|
parts = []
|
|||
|
|
total_chars = 0
|
|||
|
|
for msg in older_messages:
|
|||
|
|
role = msg.get("role", "?")
|
|||
|
|
content = msg.get("content", "") or ""
|
|||
|
|
# 截断长内容
|
|||
|
|
if len(content) > 500:
|
|||
|
|
content = content[:500] + "..."
|
|||
|
|
line = f"[{role}]: {content}"
|
|||
|
|
if total_chars + len(line) > max_chars:
|
|||
|
|
parts.append("...(更早的消息已省略)")
|
|||
|
|
break
|
|||
|
|
parts.append(line)
|
|||
|
|
total_chars += len(line)
|
|||
|
|
return "\n".join(parts)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ──────────────────────────── CompactionEngine ────────────────────────────
|
|||
|
|
|
|||
|
|
class CompactionEngine:
|
|||
|
|
"""三级对话压缩引擎。
|
|||
|
|
|
|||
|
|
与 Claude Code 一样,在每轮 LLM 调用前检查 token 用量:
|
|||
|
|
- >70% → MicroCompact(旧工具结果打桩)
|
|||
|
|
- >85% → FullCompact(LLM 摘要替换)
|
|||
|
|
- >95% → 等 API 报错后 ReactiveCompact
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(
|
|||
|
|
self,
|
|||
|
|
config: CompactionConfig,
|
|||
|
|
token_counter: Optional[TokenCounter] = None,
|
|||
|
|
model: str = "deepseek-v4-flash",
|
|||
|
|
):
|
|||
|
|
self.config = config
|
|||
|
|
self.token_counter = token_counter or TokenCounter(model=model)
|
|||
|
|
self.model = model
|
|||
|
|
# 熔断状态
|
|||
|
|
self._consecutive_failures = 0
|
|||
|
|
self._last_compact_time: float = 0
|
|||
|
|
self._compact_count = 0
|
|||
|
|
|
|||
|
|
# ──────────────────── 入口 ────────────────────
|
|||
|
|
|
|||
|
|
async def maybe_compact(
|
|||
|
|
self,
|
|||
|
|
messages: List[Dict[str, Any]],
|
|||
|
|
context_window: Optional[int] = None,
|
|||
|
|
) -> CompactionResult:
|
|||
|
|
"""根据当前 token 用量决定是否压缩,返回(可能压缩后的)消息列表。
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
messages: 当前消息列表 (含 system prompt)
|
|||
|
|
context_window: 模型上下文窗口大小(None=自动检测)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
CompactionResult,包含(可能压缩后的)消息列表
|
|||
|
|
"""
|
|||
|
|
if not self.config.enabled:
|
|||
|
|
return CompactionResult(
|
|||
|
|
messages, CompactionStrategy.NONE,
|
|||
|
|
tokens_before=0, tokens_after=0,
|
|||
|
|
details="压缩已禁用",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 确定上下文窗口
|
|||
|
|
if context_window is None:
|
|||
|
|
context_window = get_model_context_window(self.model)
|
|||
|
|
if self.config.context_window_override > 0:
|
|||
|
|
context_window = self.config.context_window_override
|
|||
|
|
|
|||
|
|
# 有效窗口 = 模型窗口 - 输出余量
|
|||
|
|
effective_window = context_window - self.config.output_reserve_tokens
|
|||
|
|
|
|||
|
|
# 计算当前 token 数
|
|||
|
|
tokens_before = self.token_counter.count_messages(messages)
|
|||
|
|
usage_ratio = tokens_before / effective_window if effective_window > 0 else 0
|
|||
|
|
|
|||
|
|
# ── 决策 ──
|
|||
|
|
# Tier 1: MicroCompact
|
|||
|
|
if (
|
|||
|
|
self.config.micro_compact_enabled
|
|||
|
|
and usage_ratio >= self.config.micro_compact_threshold
|
|||
|
|
and usage_ratio < self.config.full_compact_threshold
|
|||
|
|
):
|
|||
|
|
return await self._micro_compact(messages, tokens_before)
|
|||
|
|
|
|||
|
|
# Tier 2: FullCompact
|
|||
|
|
if (
|
|||
|
|
self.config.full_compact_enabled
|
|||
|
|
and usage_ratio >= self.config.full_compact_threshold
|
|||
|
|
):
|
|||
|
|
return await self._full_compact(messages, tokens_before)
|
|||
|
|
|
|||
|
|
# 不需要压缩
|
|||
|
|
return CompactionResult(
|
|||
|
|
messages, CompactionStrategy.NONE,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_before,
|
|||
|
|
details=f"usage={usage_ratio:.1%} < threshold={self.config.micro_compact_threshold:.0%}",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
async def reactive_compact(
|
|||
|
|
self,
|
|||
|
|
messages: List[Dict[str, Any]],
|
|||
|
|
error: Exception,
|
|||
|
|
context_window: Optional[int] = None,
|
|||
|
|
) -> CompactionResult:
|
|||
|
|
"""响应 API 上下文超限错误的紧急压缩(Tier 3)。
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
messages: 当前消息列表
|
|||
|
|
error: 触发的异常
|
|||
|
|
context_window: 上下文窗口大小
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
CompactionResult
|
|||
|
|
"""
|
|||
|
|
if not self.config.reactive_compact_enabled:
|
|||
|
|
raise error
|
|||
|
|
|
|||
|
|
if context_window is None:
|
|||
|
|
context_window = get_model_context_window(self.model)
|
|||
|
|
|
|||
|
|
tokens_before = self.token_counter.count_messages(messages)
|
|||
|
|
logger.warning(
|
|||
|
|
"ReactiveCompact 触发: %s, tokens=%d, window=%d",
|
|||
|
|
str(error)[:100], tokens_before, context_window,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return await self._full_compact(messages, tokens_before, is_reactive=True)
|
|||
|
|
|
|||
|
|
# ──────────────────── Tier 1: MicroCompact ────────────────────
|
|||
|
|
|
|||
|
|
async def _micro_compact(
|
|||
|
|
self, messages: List[Dict[str, Any]], tokens_before: int
|
|||
|
|
) -> CompactionResult:
|
|||
|
|
"""MicroCompact: 将旧工具结果替换为桩标记。
|
|||
|
|
|
|||
|
|
核心逻辑(参考 Claude Code microCompact.ts):
|
|||
|
|
1. 找到所有 "可压缩" 工具类型的结果消息
|
|||
|
|
2. 保留最近 N 轮的工具结果不动
|
|||
|
|
3. 更早的工具结果 → 替换 content 为 "[Tool result compacted]"
|
|||
|
|
4. 保护 assistant(tool_calls) 消息 — 它们包含推理链
|
|||
|
|
5. 保护破坏性工具结果(write/edit/deploy 等)
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
compactable = set(self.config.compactable_tools)
|
|||
|
|
protected = set(self.config.protected_tools)
|
|||
|
|
|
|||
|
|
# ── 第 1 步: 识别各消息角色 ──
|
|||
|
|
# 从后往前数 user 消息来确定"轮次"
|
|||
|
|
user_indices = []
|
|||
|
|
for i, msg in enumerate(messages):
|
|||
|
|
if msg.get("role") == "user":
|
|||
|
|
user_indices.append(i)
|
|||
|
|
|
|||
|
|
if len(user_indices) <= self.config.min_preserve_messages // 2:
|
|||
|
|
# 对话轮次太少,不需要压缩
|
|||
|
|
return CompactionResult(
|
|||
|
|
messages, CompactionStrategy.MICRO,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_before,
|
|||
|
|
details="对话轮次不足,跳过 MicroCompact",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 找到"保护线":倒数第 compact_older_than_rounds 个 user 消息的位置
|
|||
|
|
preserve_idx = max(0, len(user_indices) - self.config.compact_older_than_rounds)
|
|||
|
|
compact_before = user_indices[preserve_idx] if preserve_idx < len(user_indices) else 0
|
|||
|
|
|
|||
|
|
# ── 第 2 步: 识别 tool_call → tool_result 配对 ──
|
|||
|
|
# 收集 assistant(tool_calls) 的 tool_call_id 集合
|
|||
|
|
active_tool_ids = set()
|
|||
|
|
for msg in messages:
|
|||
|
|
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
|||
|
|
for tc in msg["tool_calls"]:
|
|||
|
|
tc_id = tc.get("id") or tc.get("tool_call_id")
|
|||
|
|
if tc_id:
|
|||
|
|
active_tool_ids.add(tc_id)
|
|||
|
|
|
|||
|
|
# ── 第 3 步: 在保护线之前压缩可压缩工具结果 ──
|
|||
|
|
stubbed_count = 0
|
|||
|
|
result = []
|
|||
|
|
for i, msg in enumerate(messages):
|
|||
|
|
if i >= compact_before:
|
|||
|
|
# 在保护线之后,保留原样
|
|||
|
|
result.append(msg)
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
role = msg.get("role", "")
|
|||
|
|
tool_name = msg.get("name", "")
|
|||
|
|
|
|||
|
|
if role == "tool" and tool_name in compactable and tool_name not in protected:
|
|||
|
|
# 检查是否有对应的 assistant(tool_calls) 也早于保护线
|
|||
|
|
tc_id = msg.get("tool_call_id", "")
|
|||
|
|
result.append({
|
|||
|
|
"role": "tool",
|
|||
|
|
"tool_call_id": tc_id or "compacted",
|
|||
|
|
"content": "[Tool result compacted]",
|
|||
|
|
"name": tool_name,
|
|||
|
|
})
|
|||
|
|
stubbed_count += 1
|
|||
|
|
else:
|
|||
|
|
result.append(msg)
|
|||
|
|
|
|||
|
|
if stubbed_count == 0:
|
|||
|
|
return CompactionResult(
|
|||
|
|
result, CompactionStrategy.MICRO,
|
|||
|
|
tokens_before=tokens_before,
|
|||
|
|
tokens_after=self.token_counter.count_messages(result),
|
|||
|
|
details="没有可压缩的旧工具结果",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
tokens_after = self.token_counter.count_messages(result)
|
|||
|
|
logger.info(
|
|||
|
|
"MicroCompact: %d 条工具结果打桩, %d → %d tokens (节省 %d)",
|
|||
|
|
stubbed_count, tokens_before, tokens_after,
|
|||
|
|
tokens_before - tokens_after,
|
|||
|
|
)
|
|||
|
|
return CompactionResult(
|
|||
|
|
result, CompactionStrategy.MICRO,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_after,
|
|||
|
|
details=f"{stubbed_count} 条工具结果已压缩",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
self._consecutive_failures += 1
|
|||
|
|
logger.error("MicroCompact 失败 (%d/%d): %s",
|
|||
|
|
self._consecutive_failures,
|
|||
|
|
self.config.max_consecutive_failures, e)
|
|||
|
|
if self._consecutive_failures >= self.config.max_consecutive_failures:
|
|||
|
|
logger.warning("MicroCompact 熔断!跳过本次压缩")
|
|||
|
|
return CompactionResult(
|
|||
|
|
messages, CompactionStrategy.MICRO,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_before,
|
|||
|
|
details=f"熔断 ({self._consecutive_failures}次连续失败)",
|
|||
|
|
)
|
|||
|
|
return CompactionResult(
|
|||
|
|
messages, CompactionStrategy.MICRO,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_before,
|
|||
|
|
details=f"失败: {e}",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# ──────────────────── Tier 2: FullCompact ────────────────────
|
|||
|
|
|
|||
|
|
async def _full_compact(
|
|||
|
|
self,
|
|||
|
|
messages: List[Dict[str, Any]],
|
|||
|
|
tokens_before: int,
|
|||
|
|
is_reactive: bool = False,
|
|||
|
|
llm_client=None, # 可选:外部传入 LLM 客户端
|
|||
|
|
) -> CompactionResult:
|
|||
|
|
"""FullCompact: 用 LLM 将旧对话压缩为摘要消息。
|
|||
|
|
|
|||
|
|
核心逻辑(参考 Claude Code compact.ts):
|
|||
|
|
1. 保留 system 消息 + 最近 N 条消息
|
|||
|
|
2. 中间部分 → 调用轻量 LLM 生成摘要
|
|||
|
|
3. 将摘要作为 compact_boundary 消息插入
|
|||
|
|
4. 熔断保护:连续失败 N 次后放弃
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
preserve_count = self.config.min_preserve_messages
|
|||
|
|
|
|||
|
|
if len(messages) <= preserve_count + 4:
|
|||
|
|
return CompactionResult(
|
|||
|
|
messages, CompactionStrategy.FULL,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_before,
|
|||
|
|
details="消息数不足,跳过 FullCompact",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# ── 分离各段 ──
|
|||
|
|
# 找到 system 消息
|
|||
|
|
system_msgs = [m for m in messages if m.get("role") == "system"]
|
|||
|
|
non_system = [m for m in messages if m.get("role") != "system"]
|
|||
|
|
|
|||
|
|
middle_start = 0
|
|||
|
|
# 跳过最前面的几条 system 后的过渡消息(通常是首次问候等)
|
|||
|
|
# 保留至少 preserve_count 条在末尾
|
|||
|
|
if len(non_system) > preserve_count + 4:
|
|||
|
|
middle_end = len(non_system) - preserve_count
|
|||
|
|
# 只压缩中间部分:跳过前 2 条(通常是 system 后的首次交互)到倒数 preserve_count 条之间
|
|||
|
|
middle_start = max(2, 0)
|
|||
|
|
older = non_system[middle_start:middle_end]
|
|||
|
|
recent = non_system[middle_end:]
|
|||
|
|
else:
|
|||
|
|
# 消息太少,只保留最近的
|
|||
|
|
older = non_system[:-preserve_count] if len(non_system) > preserve_count else []
|
|||
|
|
recent = non_system[-preserve_count:] if len(non_system) >= preserve_count else non_system
|
|||
|
|
|
|||
|
|
if len(older) < 3:
|
|||
|
|
return CompactionResult(
|
|||
|
|
messages, CompactionStrategy.FULL,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_before,
|
|||
|
|
details="旧消息不足以压缩",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# ── 调用 LLM 生成摘要 ──
|
|||
|
|
summary_text = await self._generate_summary(older, llm_client)
|
|||
|
|
|
|||
|
|
# ── 组装结果: system + compact_boundary + recent ──
|
|||
|
|
compact_boundary = {
|
|||
|
|
"role": "user",
|
|||
|
|
"content": (
|
|||
|
|
f"[对话上下文摘要 — 之前的关键信息]\n\n{summary_text}\n\n"
|
|||
|
|
f"[以上为自动生成的对话摘要,共压缩 {len(older)} 条消息。"
|
|||
|
|
f"以下是最近的对话延续]"
|
|||
|
|
),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
new_messages = system_msgs + [compact_boundary] + recent
|
|||
|
|
tokens_after = self.token_counter.count_messages(new_messages)
|
|||
|
|
|
|||
|
|
strategy = CompactionStrategy.REACTIVE if is_reactive else CompactionStrategy.FULL
|
|||
|
|
logger.info(
|
|||
|
|
"FullCompact (%s): %d 条消息→摘要 (%d 字), %d → %d tokens (节省 %d)",
|
|||
|
|
"被动" if is_reactive else "主动",
|
|||
|
|
len(older), len(summary_text),
|
|||
|
|
tokens_before, tokens_after, tokens_before - tokens_after,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
self._consecutive_failures = 0 # 成功后重置
|
|||
|
|
self._last_compact_time = time.time()
|
|||
|
|
self._compact_count += 1
|
|||
|
|
|
|||
|
|
return CompactionResult(
|
|||
|
|
new_messages, strategy,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_after,
|
|||
|
|
details=f"压缩 {len(older)} 条→{len(summary_text)} 字摘要",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
self._consecutive_failures += 1
|
|||
|
|
logger.error("FullCompact 失败 (%d/%d): %s",
|
|||
|
|
self._consecutive_failures,
|
|||
|
|
self.config.max_consecutive_failures, e)
|
|||
|
|
|
|||
|
|
if self._consecutive_failures >= self.config.max_consecutive_failures:
|
|||
|
|
logger.warning("FullCompact 熔断!返回原始消息")
|
|||
|
|
return CompactionResult(
|
|||
|
|
messages, CompactionStrategy.FULL,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_before,
|
|||
|
|
details=f"熔断 ({self._consecutive_failures}次连续失败)",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return CompactionResult(
|
|||
|
|
messages, CompactionStrategy.FULL,
|
|||
|
|
tokens_before=tokens_before, tokens_after=tokens_before,
|
|||
|
|
details=f"失败: {e}",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
async def _generate_summary(
|
|||
|
|
self,
|
|||
|
|
older_messages: List[Dict[str, Any]],
|
|||
|
|
llm_client=None,
|
|||
|
|
) -> str:
|
|||
|
|
"""调用轻量 LLM 生成对话摘要。"""
|
|||
|
|
# 构建提示词
|
|||
|
|
user_content = _build_compact_user_prompt(
|
|||
|
|
older_messages,
|
|||
|
|
max_chars=3000,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if llm_client is not None:
|
|||
|
|
# 使用外部传入的 LLM 客户端
|
|||
|
|
from app.agent_runtime.core import _LLMClient
|
|||
|
|
from app.agent_runtime.schemas import AgentLLMConfig
|
|||
|
|
|
|||
|
|
if not isinstance(llm_client, _LLMClient):
|
|||
|
|
# 创建临时客户端
|
|||
|
|
summary_config = AgentLLMConfig(
|
|||
|
|
provider="deepseek",
|
|||
|
|
model=self.config.summary_model,
|
|||
|
|
temperature=self.config.summary_temperature,
|
|||
|
|
max_tokens=self.config.summary_max_tokens,
|
|||
|
|
request_timeout=30.0,
|
|||
|
|
)
|
|||
|
|
llm_client = _LLMClient(summary_config)
|
|||
|
|
|
|||
|
|
messages = [
|
|||
|
|
{"role": "system", "content": COMPACT_SUMMARY_SYSTEM},
|
|||
|
|
{"role": "user", "content": f"请将以下对话历史压缩为不超过{self.config.summary_max_tokens // 2}字的摘要:\n\n{user_content}"},
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
response = await llm_client.chat(messages=messages, tools=None, iteration=-1)
|
|||
|
|
content = getattr(response, 'content', '') or (
|
|||
|
|
response.get('content', '') if isinstance(response, dict) else ""
|
|||
|
|
)
|
|||
|
|
return content.strip() or self._fallback_summary(older_messages)
|
|||
|
|
else:
|
|||
|
|
# 无 LLM 客户端,使用 fallback
|
|||
|
|
return self._fallback_summary(older_messages)
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _fallback_summary(older_messages: List[Dict[str, Any]]) -> str:
|
|||
|
|
"""无 LLM 时的降级摘要(提取关键信息)。"""
|
|||
|
|
topics = set()
|
|||
|
|
for msg in older_messages:
|
|||
|
|
if msg.get("role") == "user":
|
|||
|
|
content = msg.get("content", "")
|
|||
|
|
if len(content) > 60:
|
|||
|
|
content = content[:60] + "..."
|
|||
|
|
if content:
|
|||
|
|
topics.add(content)
|
|||
|
|
|
|||
|
|
if not topics:
|
|||
|
|
return "此段对话为助手与用户的交互。"
|
|||
|
|
|
|||
|
|
topic_list = ";".join(list(topics)[:10])
|
|||
|
|
return f"对话涉及以下话题: {topic_list}"
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ──────────────────────────── 工厂函数 ────────────────────────────
|
|||
|
|
|
|||
|
|
def create_compaction_engine(
|
|||
|
|
config: Optional[CompactionConfig] = None,
|
|||
|
|
model: str = "deepseek-v4-flash",
|
|||
|
|
) -> CompactionEngine:
|
|||
|
|
"""创建 CompactionEngine 实例的便捷工厂。"""
|
|||
|
|
if config is None:
|
|||
|
|
config = CompactionConfig()
|
|||
|
|
return CompactionEngine(config=config, model=model)
|