feat: 向量记忆 RAG、工具市场、SSE 流式响应、前端集成与测试覆盖

- 新增 embedding_service（语义检索）、knowledge_service（RAG）、text_chunker、document_parser - 新增 tool_registry（自定义工具注册表）并完善工具市场 API（CRUD + code/http 执行） - 新增 agent_vector_memory / knowledge_base 模型及对应数据库表 - 实现 SSE 流式响应与 Agent 预算控制 - AgentChat.vue 集成 MainLayout 导航布局 - 完善测试体系：7 个新测试文件共 110 个测试覆盖 - 修复 conftest.py SQLite 内存数据库连接隔离问题 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-01 22:30:46 +08:00
parent 036f533881
commit 7b9e0826de
35 changed files with 4353 additions and 365 deletions
--- a/backend/app/models/knowledge_base.py
+++ b/backend/app/models/knowledge_base.py
@@ -0,0 +1,116 @@
+"""
+知识库 RAG 模型。
+
+KnowledgeBase — 知识库容器
+Document     — 上传的源文档
+DocumentChunk — 文档切片（含 embedding）
+"""
+from __future__ import annotations
+
+import uuid
+from datetime import datetime
+
+from sqlalchemy import Column, String, Text, Integer, DateTime, ForeignKey, Index
+from sqlalchemy.dialects.mysql import JSON as MySQLJSON
+from sqlalchemy.orm import relationship
+
+from app.core.database import Base
+
+
+class KnowledgeBase(Base):
+    """知识库"""
+    __tablename__ = "knowledge_bases"
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+    name = Column(String(200), nullable=False, comment="知识库名称")
+    description = Column(Text, nullable=True, comment="描述")
+    user_id = Column(String(36), nullable=True, index=True, comment="创建者 ID")
+    chunk_size = Column(Integer, default=500, comment="分块大小（字符数）")
+    chunk_overlap = Column(Integer, default=50, comment="分块重叠（字符数）")
+    doc_count = Column(Integer, default=0, comment="文档数量")
+    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
+
+    documents = relationship("Document", back_populates="kb", cascade="all, delete-orphan",
+                             passive_deletes=True)
+
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "name": self.name,
+            "description": self.description,
+            "user_id": self.user_id,
+            "chunk_size": self.chunk_size,
+            "chunk_overlap": self.chunk_overlap,
+            "doc_count": self.doc_count,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
+        }
+
+
+class Document(Base):
+    """知识库文档"""
+    __tablename__ = "kb_documents"
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+    kb_id = Column(String(36), ForeignKey("knowledge_bases.id", ondelete="CASCADE"),
+                   nullable=False, index=True, comment="所属知识库")
+    filename = Column(String(500), nullable=False, comment="原始文件名")
+    file_type = Column(String(20), nullable=False, comment="文件类型: txt/pdf/docx/md/csv")
+    file_size = Column(Integer, default=0, comment="文件大小（字节）")
+    status = Column(String(20), default="pending",
+                    comment="状态: pending/processing/ready/failed")
+    error_message = Column(Text, nullable=True, comment="处理失败信息")
+    chunk_count = Column(Integer, default=0, comment="分块数量")
+    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
+
+    kb = relationship("KnowledgeBase", back_populates="documents")
+    chunks = relationship("DocumentChunk", back_populates="document",
+                          cascade="all, delete-orphan", passive_deletes=True)
+
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "kb_id": self.kb_id,
+            "filename": self.filename,
+            "file_type": self.file_type,
+            "file_size": self.file_size,
+            "status": self.status,
+            "error_message": self.error_message,
+            "chunk_count": self.chunk_count,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+        }
+
+
+class DocumentChunk(Base):
+    """文档切片 — 含 embedding 向量"""
+    __tablename__ = "kb_document_chunks"
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+    document_id = Column(String(36), ForeignKey("kb_documents.id", ondelete="CASCADE"),
+                         nullable=False, index=True, comment="所属文档")
+    kb_id = Column(String(36), ForeignKey("knowledge_bases.id", ondelete="CASCADE"),
+                   nullable=False, index=True, comment="所属知识库")
+    chunk_index = Column(Integer, default=0, comment="块序号")
+    content = Column(Text, nullable=False, comment="切片文本内容")
+    embedding = Column(Text, nullable=True, comment="JSON 序列化的 embedding 向量")
+    metadata_ = Column("metadata", MySQLJSON, nullable=True, comment="元数据")
+    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
+
+    document = relationship("Document", back_populates="chunks")
+
+    __table_args__ = (
+        Index("ix_kb_chunks_kb_id", "kb_id"),
+        Index("ix_kb_chunks_doc_id", "document_id"),
+    )
+
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "document_id": self.document_id,
+            "kb_id": self.kb_id,
+            "chunk_index": self.chunk_index,
+            "content": self.content[:200] + "..." if len(self.content) > 200 else self.content,
+            "metadata": self.metadata_ or {},
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+        }