- Fix 8 Feishu agent handlers to use permission_level="acceptEdits" so file_write tool works without Web UI approval popup (lingxi/renshenguo/suyao/tiantian/orange/main/schedule) - Add P5-P7 memory improvements: offline keyword fallback, team sharing, file-based memory - Add auto_dream_service for daily memory consolidation - Add 99 memory system test cases (basic 18 + advanced 43 + pytest 38) - Add platform capability assessment report and unfinished project checklist Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
250 lines
8.1 KiB
Python
250 lines
8.1 KiB
Python
"""
|
||
Auto Dream — 夜间记忆整合服务。
|
||
|
||
参考 Claude Code 的 Auto Dream 机制:
|
||
- 每天凌晨 3:00 触发
|
||
- 扫描过去 24 小时的向量记忆
|
||
- 合并相似条目(余弦相似度 > 0.85)
|
||
- 生成整合摘要并写入向量记忆池
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import logging
|
||
from datetime import datetime, timedelta, timezone
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.core.database import SessionLocal
|
||
from app.services.embedding_service import embedding_service
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 相似度阈值:高于此值视为可合并
|
||
MERGE_SIMILARITY_THRESHOLD = 0.85
|
||
|
||
# 每天凌晨 3:00 触发(UTC+8)
|
||
DREAM_HOUR = 3
|
||
DREAM_MINUTE = 0
|
||
|
||
# 上次整合日期(模块级,进程重启后重置)
|
||
_last_dream_date: Optional[str] = None
|
||
_dream_lock = asyncio.Lock()
|
||
|
||
|
||
def _should_dream_today() -> bool:
|
||
"""检查是否到了今天的整合时间且尚未执行。"""
|
||
global _last_dream_date
|
||
|
||
now = datetime.now(timezone.utc).astimezone(
|
||
timezone(timedelta(hours=8))
|
||
)
|
||
today_str = now.strftime("%Y-%m-%d")
|
||
|
||
# 已执行过
|
||
if _last_dream_date == today_str:
|
||
return False
|
||
|
||
# 在凌晨 3:00-4:00 之间触发
|
||
if now.hour != DREAM_HOUR:
|
||
return False
|
||
|
||
return True
|
||
|
||
|
||
async def run_auto_dream() -> dict:
|
||
"""
|
||
执行一次记忆整合。
|
||
|
||
返回: {"merged": int, "deleted": int, "dreams": int, "elapsed_s": float}
|
||
"""
|
||
import time
|
||
start = time.time()
|
||
|
||
if not _dream_lock.locked():
|
||
async with _dream_lock:
|
||
return await _do_consolidate()
|
||
|
||
logger.info("Auto Dream:上一次整合仍在进行中,跳过")
|
||
return {"merged": 0, "deleted": 0, "dreams": 0, "elapsed_s": 0, "skipped": True}
|
||
|
||
|
||
async def _do_consolidate() -> dict:
|
||
"""实际的整合逻辑。"""
|
||
global _last_dream_date
|
||
|
||
from app.models.agent_vector_memory import AgentVectorMemory
|
||
|
||
now = datetime.now(timezone.utc).astimezone(
|
||
timezone(timedelta(hours=8))
|
||
)
|
||
today_str = now.strftime("%Y-%m-%d")
|
||
cutoff = now - timedelta(hours=24)
|
||
|
||
db: Optional[Session] = None
|
||
merged = 0
|
||
deleted = 0
|
||
dreams = 0
|
||
|
||
try:
|
||
db = SessionLocal()
|
||
|
||
# 1. 获取过去 24h 的所有向量记忆
|
||
rows = (
|
||
db.query(AgentVectorMemory)
|
||
.filter(AgentVectorMemory.created_at >= cutoff)
|
||
.order_by(AgentVectorMemory.created_at.desc())
|
||
.limit(200)
|
||
.all()
|
||
)
|
||
|
||
if len(rows) < 3:
|
||
logger.info("Auto Dream:最近 24h 记忆不足(%d 条),跳过", len(rows))
|
||
_last_dream_date = today_str
|
||
return {"merged": 0, "deleted": 0, "dreams": 0, "elapsed_s": 0}
|
||
|
||
logger.info("Auto Dream:开始整合 %d 条最近记忆", len(rows))
|
||
|
||
# 2. 构建带 embedding 的条目列表
|
||
entries: List[Tuple[Any, List[float]]] = []
|
||
for row in rows:
|
||
if not row.embedding:
|
||
continue
|
||
emb = embedding_service.deserialize_embedding(row.embedding)
|
||
if emb and len(emb) > 0:
|
||
entries.append((row, emb))
|
||
|
||
if len(entries) < 3:
|
||
_last_dream_date = today_str
|
||
return {"merged": 0, "deleted": 0, "dreams": 0, "elapsed_s": 0}
|
||
|
||
# 3. 两两计算相似度,找出可合并的对
|
||
to_delete_ids: set = set()
|
||
to_merge_pairs: List[Tuple[Any, Any]] = []
|
||
|
||
for i in range(len(entries)):
|
||
if entries[i][0].id in to_delete_ids:
|
||
continue
|
||
for j in range(i + 1, len(entries)):
|
||
if entries[j][0].id in to_delete_ids:
|
||
continue
|
||
sim = embedding_service.cosine_similarity(
|
||
entries[i][1], entries[j][1]
|
||
)
|
||
if sim >= MERGE_SIMILARITY_THRESHOLD:
|
||
# 保留较新的,删除较旧的
|
||
newer = entries[i][0] if entries[i][0].created_at >= entries[j][0].created_at else entries[j][0]
|
||
older = entries[j][0] if newer is entries[i][0] else entries[i][0]
|
||
to_delete_ids.add(older.id)
|
||
to_merge_pairs.append((newer, older))
|
||
|
||
# 4. 执行合并删除
|
||
if to_delete_ids:
|
||
for row_id in to_delete_ids:
|
||
try:
|
||
db.query(AgentVectorMemory).filter(
|
||
AgentVectorMemory.id == row_id
|
||
).delete()
|
||
deleted += 1
|
||
except Exception:
|
||
db.rollback()
|
||
db.commit()
|
||
logger.info("Auto Dream:合并删除 %d 条重复记忆", deleted)
|
||
merged = len(to_merge_pairs)
|
||
|
||
# 5. 生成整合摘要(Dream Summary)
|
||
dream_text = await _generate_dream_summary(rows[:50])
|
||
if dream_text:
|
||
from app.services.embedding_service import embedding_service as es
|
||
try:
|
||
emb = await es.generate_embedding(dream_text)
|
||
embedding_json = es.serialize_embedding(emb) if emb else ""
|
||
dream_record = AgentVectorMemory(
|
||
scope_kind="system",
|
||
scope_id="auto_dream",
|
||
session_key=f"dream_{today_str}",
|
||
content_text=dream_text[:2000],
|
||
embedding=embedding_json or None,
|
||
metadata_={
|
||
"type": "dream_summary",
|
||
"memory_type": "project",
|
||
"source": "auto_dream",
|
||
"dream_date": today_str,
|
||
"merged_count": merged,
|
||
"deleted_count": deleted,
|
||
},
|
||
)
|
||
db.add(dream_record)
|
||
db.commit()
|
||
dreams = 1
|
||
logger.info("Auto Dream:已生成整合摘要 (%d 字)", len(dream_text))
|
||
except Exception as e:
|
||
logger.warning("Auto Dream 摘要写入失败: %s", e)
|
||
|
||
except Exception as e:
|
||
logger.error("Auto Dream 整合失败: %s", e)
|
||
if db:
|
||
db.rollback()
|
||
finally:
|
||
if db:
|
||
db.close()
|
||
|
||
_last_dream_date = today_str
|
||
elapsed = time.time() - start
|
||
logger.info("Auto Dream 完成: merged=%d deleted=%d dreams=%d elapsed=%.1fs",
|
||
merged, deleted, dreams, elapsed)
|
||
return {"merged": merged, "deleted": deleted, "dreams": dreams, "elapsed_s": elapsed}
|
||
|
||
|
||
async def _generate_dream_summary(rows: list) -> str:
|
||
"""
|
||
用 LLM 从最近记忆生成整合摘要。
|
||
返回空字符串表示失败(不阻塞主流程)。
|
||
"""
|
||
if not rows or len(rows) < 3:
|
||
return ""
|
||
|
||
# 提取记忆内容
|
||
items = []
|
||
for r in rows[-30:]: # 最近 30 条
|
||
content = r.content_text[:300] if r.content_text else ""
|
||
meta = r.metadata_ or {}
|
||
mem_type = meta.get("memory_type", "unknown")
|
||
items.append(f"[{mem_type}] {content}")
|
||
|
||
if not items:
|
||
return ""
|
||
|
||
prompt = (
|
||
"你是一个记忆整合助手。请分析以下 24 小时内的 Agent 对话记忆,\n"
|
||
"生成一份简洁的每日摘要(200字以内),包含:\n"
|
||
"1. 用户讨论了哪些主要话题\n"
|
||
"2. 用户表达了哪些偏好或需求\n"
|
||
"3. 有哪些值得保留的关键信息\n\n"
|
||
"记忆条目:\n"
|
||
) + "\n".join(f"- {item}" for item in items)
|
||
|
||
try:
|
||
from openai import AsyncOpenAI
|
||
from app.core.config import settings
|
||
|
||
api_key = settings.DEEPSEEK_API_KEY or ""
|
||
base_url = settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"
|
||
|
||
if not api_key:
|
||
return ""
|
||
|
||
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
||
resp = await client.chat.completions.create(
|
||
model="deepseek-v4-flash",
|
||
messages=[{"role": "user", "content": prompt}],
|
||
temperature=0.3,
|
||
max_tokens=600,
|
||
timeout=30,
|
||
)
|
||
return resp.choices[0].message.content or ""
|
||
except Exception as e:
|
||
logger.warning("Auto Dream 摘要生成失败: %s", e)
|
||
return ""
|