feat: 向量记忆 RAG、工具市场、SSE 流式响应、前端集成与测试覆盖

- 新增 embedding_service(语义检索)、knowledge_service(RAG)、text_chunker、document_parser
- 新增 tool_registry(自定义工具注册表)并完善工具市场 API(CRUD + code/http 执行)
- 新增 agent_vector_memory / knowledge_base 模型及对应数据库表
- 实现 SSE 流式响应与 Agent 预算控制
- AgentChat.vue 集成 MainLayout 导航布局
- 完善测试体系:7 个新测试文件共 110 个测试覆盖
- 修复 conftest.py SQLite 内存数据库连接隔离问题

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-05-01 22:30:46 +08:00
parent 036f533881
commit 7b9e0826de
35 changed files with 4353 additions and 365 deletions

View File

@@ -0,0 +1,116 @@
"""
知识库 RAG 模型。
KnowledgeBase — 知识库容器
Document — 上传的源文档
DocumentChunk — 文档切片(含 embedding
"""
from __future__ import annotations
import uuid
from datetime import datetime
from sqlalchemy import Column, String, Text, Integer, DateTime, ForeignKey, Index
from sqlalchemy.dialects.mysql import JSON as MySQLJSON
from sqlalchemy.orm import relationship
from app.core.database import Base
class KnowledgeBase(Base):
"""知识库"""
__tablename__ = "knowledge_bases"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
name = Column(String(200), nullable=False, comment="知识库名称")
description = Column(Text, nullable=True, comment="描述")
user_id = Column(String(36), nullable=True, index=True, comment="创建者 ID")
chunk_size = Column(Integer, default=500, comment="分块大小(字符数)")
chunk_overlap = Column(Integer, default=50, comment="分块重叠(字符数)")
doc_count = Column(Integer, default=0, comment="文档数量")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
documents = relationship("Document", back_populates="kb", cascade="all, delete-orphan",
passive_deletes=True)
def to_dict(self) -> dict:
return {
"id": self.id,
"name": self.name,
"description": self.description,
"user_id": self.user_id,
"chunk_size": self.chunk_size,
"chunk_overlap": self.chunk_overlap,
"doc_count": self.doc_count,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
class Document(Base):
"""知识库文档"""
__tablename__ = "kb_documents"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
kb_id = Column(String(36), ForeignKey("knowledge_bases.id", ondelete="CASCADE"),
nullable=False, index=True, comment="所属知识库")
filename = Column(String(500), nullable=False, comment="原始文件名")
file_type = Column(String(20), nullable=False, comment="文件类型: txt/pdf/docx/md/csv")
file_size = Column(Integer, default=0, comment="文件大小(字节)")
status = Column(String(20), default="pending",
comment="状态: pending/processing/ready/failed")
error_message = Column(Text, nullable=True, comment="处理失败信息")
chunk_count = Column(Integer, default=0, comment="分块数量")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
kb = relationship("KnowledgeBase", back_populates="documents")
chunks = relationship("DocumentChunk", back_populates="document",
cascade="all, delete-orphan", passive_deletes=True)
def to_dict(self) -> dict:
return {
"id": self.id,
"kb_id": self.kb_id,
"filename": self.filename,
"file_type": self.file_type,
"file_size": self.file_size,
"status": self.status,
"error_message": self.error_message,
"chunk_count": self.chunk_count,
"created_at": self.created_at.isoformat() if self.created_at else None,
}
class DocumentChunk(Base):
"""文档切片 — 含 embedding 向量"""
__tablename__ = "kb_document_chunks"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
document_id = Column(String(36), ForeignKey("kb_documents.id", ondelete="CASCADE"),
nullable=False, index=True, comment="所属文档")
kb_id = Column(String(36), ForeignKey("knowledge_bases.id", ondelete="CASCADE"),
nullable=False, index=True, comment="所属知识库")
chunk_index = Column(Integer, default=0, comment="块序号")
content = Column(Text, nullable=False, comment="切片文本内容")
embedding = Column(Text, nullable=True, comment="JSON 序列化的 embedding 向量")
metadata_ = Column("metadata", MySQLJSON, nullable=True, comment="元数据")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
document = relationship("Document", back_populates="chunks")
__table_args__ = (
Index("ix_kb_chunks_kb_id", "kb_id"),
Index("ix_kb_chunks_doc_id", "document_id"),
)
def to_dict(self) -> dict:
return {
"id": self.id,
"document_id": self.document_id,
"kb_id": self.kb_id,
"chunk_index": self.chunk_index,
"content": self.content[:200] + "..." if len(self.content) > 200 else self.content,
"metadata": self.metadata_ or {},
"created_at": self.created_at.isoformat() if self.created_at else None,
}