feat: Agent间知识共享 — 全局知识池(去重+置信度+过期)

- GlobalKnowledge 模型新增 confidence 和 expires_at 字段
- save_global_knowledge 增加 MD5 去重、置信度评分、TTL过期
- _global_knowledge_search 增加过期过滤、置信度优先排序
- run()/run_stream() 所有完成路径补齐知识提取调用
- 新增 Alembic 迁移 010_add_global_knowledge

Cl  oses #9

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-05-06 22:04:56 +08:00
parent 1f7c136544
commit 9054f42cda
4 changed files with 175 additions and 8 deletions

View File

@@ -178,13 +178,21 @@ class AgentMemory:
async def _global_knowledge_search(self, query: str = "") -> str:
"""从 GlobalKnowledge 表检索相关的全局知识条目。"""
from datetime import datetime
from app.models.agent import GlobalKnowledge
db: Optional[Session] = None
try:
db = SessionLocal()
now = datetime.utcnow()
# 查询未过期的知识expires_at IS NULL 或 expires_at > now
rows = (
db.query(GlobalKnowledge)
.filter(
(GlobalKnowledge.expires_at.is_(None))
| (GlobalKnowledge.expires_at > now)
)
.order_by(GlobalKnowledge.created_at.desc())
.limit(50)
.all()
@@ -212,6 +220,7 @@ class AgentMemory:
"metadata": {
"source_agent_id": row.source_agent_id,
"tags": row.tags or [],
"confidence": row.confidence or "medium",
},
})
@@ -225,17 +234,22 @@ class AgentMemory:
lines = ["## 全局知识库"]
for i, m in enumerate(matched, 1):
tags = m.get("metadata", {}).get("tags", [])
conf = m.get("metadata", {}).get("confidence", "medium")
tag_str = f" [{', '.join(tags[:3])}]" if tags else ""
lines.append(f"{i}.{tag_str} {m.get('content_text', '')[:500]}")
conf_str = f" (置信度:{conf})" if conf != "medium" else ""
lines.append(f"{i}.{tag_str}{conf_str} {m.get('content_text', '')[:500]}")
return "\n".join(lines)
else:
# 无 query返回最近 5 条全局知识
recent = rows[:5]
# 无 query返回最近 5 条全局知识(优先高置信度)
recent = sorted(rows, key=lambda r: (
0 if r.confidence == "high" else 1 if r.confidence == "medium" else 2
))[:5]
if recent:
lines = ["## 全局知识库(最近)"]
for i, row in enumerate(recent, 1):
tag_str = f" [{(', '.join(row.tags[:3]))}]" if row.tags else ""
lines.append(f"{i}.{tag_str} {row.content[:500]}")
conf_str = f" (置信度:{row.confidence})" if row.confidence and row.confidence != "medium" else ""
lines.append(f"{i}.{tag_str}{conf_str} {row.content[:500]}")
return "\n".join(lines)
return ""
@@ -249,8 +263,14 @@ class AgentMemory:
async def save_global_knowledge(
self, content: str, source_agent_id: str = "",
source_user_id: str = "", tags: Optional[List[str]] = None,
confidence: str = "medium", ttl_hours: int = 0,
) -> None:
"""将知识条目写入全局知识池。"""
"""将知识条目写入全局知识池(带去重、置信度、过期时间)
去重策略:对 content 取哈希,若已有相同哈希的条目则跳过。
过期策略ttl_hours > 0 时设置 expires_at0 表示永不过期。
"""
from datetime import datetime, timedelta
from app.models.agent import GlobalKnowledge
if not content or len(content) < 20:
@@ -260,7 +280,26 @@ class AgentMemory:
try:
db = SessionLocal()
# 生成 embedding
# 去重:用 content 的 MD5 哈希检查是否已存在
import hashlib
content_hash = hashlib.md5(content[:500].encode()).hexdigest()
# 查询最近 200 条,检查是否有相同哈希的条目
recent = (
db.query(GlobalKnowledge)
.order_by(GlobalKnowledge.created_at.desc())
.limit(200)
.all()
)
for existing in recent:
existing_hash = hashlib.md5(
(existing.content or "")[:500].encode()
).hexdigest()
if existing_hash == content_hash:
logger.info("全局知识去重:已存在相同条目,跳过写入")
return
# 嵌入向量
embedding_json = ""
try:
emb = await embedding_service.generate_embedding(content)
@@ -269,18 +308,26 @@ class AgentMemory:
except Exception:
pass
# 过期时间
expires_at = None
if ttl_hours > 0:
expires_at = datetime.utcnow() + timedelta(hours=ttl_hours)
record = GlobalKnowledge(
content=content[:2000],
embedding=embedding_json or None,
source_agent_id=source_agent_id or "",
source_user_id=source_user_id or "",
tags=tags or [],
confidence=confidence or "medium",
expires_at=expires_at,
scope_kind=self.scope_kind,
scope_id=self.scope_id or "global",
)
db.add(record)
db.commit()
logger.info("已写入全局知识: agent=%s tags=%s", source_agent_id, tags)
logger.info("已写入全局知识: agent=%s tags=%s confidence=%s",
source_agent_id, tags, confidence)
except Exception as e:
logger.warning("保存全局知识失败: %s", e)
if db: