feat: Agent间知识共享 — 全局知识池（去重+置信度+过期）

- GlobalKnowledge 模型新增 confidence 和 expires_at 字段 - save_global_knowledge 增加 MD5 去重、置信度评分、TTL过期 - _global_knowledge_search 增加过期过滤、置信度优先排序 - run()/run_stream() 所有完成路径补齐知识提取调用 - 新增 Alembic 迁移 010_add_global_knowledge Cl oses #9 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 22:04:56 +08:00
parent 1f7c136544
commit 9054f42cda
4 changed files with 175 additions and 8 deletions
--- a/backend/app/agent_runtime/memory.py
+++ b/backend/app/agent_runtime/memory.py
@@ -178,13 +178,21 @@ class AgentMemory:

    async def _global_knowledge_search(self, query: str = "") -> str:
        """从 GlobalKnowledge 表检索相关的全局知识条目。"""
+        from datetime import datetime
        from app.models.agent import GlobalKnowledge

        db: Optional[Session] = None
        try:
            db = SessionLocal()
+            now = datetime.utcnow()
+
+            # 查询未过期的知识（expires_at IS NULL 或 expires_at > now）
            rows = (
                db.query(GlobalKnowledge)
+                .filter(
+                    (GlobalKnowledge.expires_at.is_(None))
+                    | (GlobalKnowledge.expires_at > now)
+                )
                .order_by(GlobalKnowledge.created_at.desc())
                .limit(50)
                .all()
@@ -212,6 +220,7 @@ class AgentMemory:
                            "metadata": {
                                "source_agent_id": row.source_agent_id,
                                "tags": row.tags or [],
+                                "confidence": row.confidence or "medium",
                            },
                        })

@@ -225,17 +234,22 @@ class AgentMemory:
                            lines = ["## 全局知识库"]
                            for i, m in enumerate(matched, 1):
                                tags = m.get("metadata", {}).get("tags", [])
+                                conf = m.get("metadata", {}).get("confidence", "medium")
                                tag_str = f" [{', '.join(tags[:3])}]" if tags else ""
-                                lines.append(f"{i}.{tag_str} {m.get('content_text', '')[:500]}")
+                                conf_str = f" (置信度:{conf})" if conf != "medium" else ""
+                                lines.append(f"{i}.{tag_str}{conf_str} {m.get('content_text', '')[:500]}")
                            return "\n".join(lines)
            else:
-                # 无 query，返回最近 5 条全局知识
-                recent = rows[:5]
+                # 无 query，返回最近 5 条全局知识（优先高置信度）
+                recent = sorted(rows, key=lambda r: (
+                    0 if r.confidence == "high" else 1 if r.confidence == "medium" else 2
+                ))[:5]
                if recent:
                    lines = ["## 全局知识库（最近）"]
                    for i, row in enumerate(recent, 1):
                        tag_str = f" [{(', '.join(row.tags[:3]))}]" if row.tags else ""
-                        lines.append(f"{i}.{tag_str} {row.content[:500]}")
+                        conf_str = f" (置信度:{row.confidence})" if row.confidence and row.confidence != "medium" else ""
+                        lines.append(f"{i}.{tag_str}{conf_str} {row.content[:500]}")
                    return "\n".join(lines)

            return ""
@@ -249,8 +263,14 @@ class AgentMemory:
    async def save_global_knowledge(
        self, content: str, source_agent_id: str = "",
        source_user_id: str = "", tags: Optional[List[str]] = None,
+        confidence: str = "medium", ttl_hours: int = 0,
    ) -> None:
-        """将知识条目写入全局知识池。"""
+        """将知识条目写入全局知识池（带去重、置信度、过期时间）。
+
+        去重策略：对 content 取哈希，若已有相同哈希的条目则跳过。
+        过期策略：ttl_hours > 0 时设置 expires_at；0 表示永不过期。
+        """
+        from datetime import datetime, timedelta
        from app.models.agent import GlobalKnowledge

        if not content or len(content) < 20:
@@ -260,7 +280,26 @@ class AgentMemory:
        try:
            db = SessionLocal()

-            # 生成 embedding
+            # 去重：用 content 的 MD5 哈希检查是否已存在
+            import hashlib
+            content_hash = hashlib.md5(content[:500].encode()).hexdigest()
+
+            # 查询最近 200 条，检查是否有相同哈希的条目
+            recent = (
+                db.query(GlobalKnowledge)
+                .order_by(GlobalKnowledge.created_at.desc())
+                .limit(200)
+                .all()
+            )
+            for existing in recent:
+                existing_hash = hashlib.md5(
+                    (existing.content or "")[:500].encode()
+                ).hexdigest()
+                if existing_hash == content_hash:
+                    logger.info("全局知识去重：已存在相同条目，跳过写入")
+                    return
+
+            # 嵌入向量
            embedding_json = ""
            try:
                emb = await embedding_service.generate_embedding(content)
@@ -269,18 +308,26 @@ class AgentMemory:
            except Exception:
                pass

+            # 过期时间
+            expires_at = None
+            if ttl_hours > 0:
+                expires_at = datetime.utcnow() + timedelta(hours=ttl_hours)
+
            record = GlobalKnowledge(
                content=content[:2000],
                embedding=embedding_json or None,
                source_agent_id=source_agent_id or "",
                source_user_id=source_user_id or "",
                tags=tags or [],
+                confidence=confidence or "medium",
+                expires_at=expires_at,
                scope_kind=self.scope_kind,
                scope_id=self.scope_id or "global",
            )
            db.add(record)
            db.commit()
-            logger.info("已写入全局知识: agent=%s tags=%s", source_agent_id, tags)
+            logger.info("已写入全局知识: agent=%s tags=%s confidence=%s",
+                        source_agent_id, tags, confidence)
        except Exception as e:
            logger.warning("保存全局知识失败: %s", e)
            if db: