feat: 向量记忆 RAG、工具市场、SSE 流式响应、前端集成与测试覆盖
- 新增 embedding_service(语义检索)、knowledge_service(RAG)、text_chunker、document_parser - 新增 tool_registry(自定义工具注册表)并完善工具市场 API(CRUD + code/http 执行) - 新增 agent_vector_memory / knowledge_base 模型及对应数据库表 - 实现 SSE 流式响应与 Agent 预算控制 - AgentChat.vue 集成 MainLayout 导航布局 - 完善测试体系:7 个新测试文件共 110 个测试覆盖 - 修复 conftest.py SQLite 内存数据库连接隔离问题 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
116
backend/app/models/knowledge_base.py
Normal file
116
backend/app/models/knowledge_base.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""
|
||||
知识库 RAG 模型。
|
||||
|
||||
KnowledgeBase — 知识库容器
|
||||
Document — 上传的源文档
|
||||
DocumentChunk — 文档切片(含 embedding)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import Column, String, Text, Integer, DateTime, ForeignKey, Index
|
||||
from sqlalchemy.dialects.mysql import JSON as MySQLJSON
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class KnowledgeBase(Base):
|
||||
"""知识库"""
|
||||
__tablename__ = "knowledge_bases"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
name = Column(String(200), nullable=False, comment="知识库名称")
|
||||
description = Column(Text, nullable=True, comment="描述")
|
||||
user_id = Column(String(36), nullable=True, index=True, comment="创建者 ID")
|
||||
chunk_size = Column(Integer, default=500, comment="分块大小(字符数)")
|
||||
chunk_overlap = Column(Integer, default=50, comment="分块重叠(字符数)")
|
||||
doc_count = Column(Integer, default=0, comment="文档数量")
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
documents = relationship("Document", back_populates="kb", cascade="all, delete-orphan",
|
||||
passive_deletes=True)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"user_id": self.user_id,
|
||||
"chunk_size": self.chunk_size,
|
||||
"chunk_overlap": self.chunk_overlap,
|
||||
"doc_count": self.doc_count,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
||||
}
|
||||
|
||||
|
||||
class Document(Base):
|
||||
"""知识库文档"""
|
||||
__tablename__ = "kb_documents"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
kb_id = Column(String(36), ForeignKey("knowledge_bases.id", ondelete="CASCADE"),
|
||||
nullable=False, index=True, comment="所属知识库")
|
||||
filename = Column(String(500), nullable=False, comment="原始文件名")
|
||||
file_type = Column(String(20), nullable=False, comment="文件类型: txt/pdf/docx/md/csv")
|
||||
file_size = Column(Integer, default=0, comment="文件大小(字节)")
|
||||
status = Column(String(20), default="pending",
|
||||
comment="状态: pending/processing/ready/failed")
|
||||
error_message = Column(Text, nullable=True, comment="处理失败信息")
|
||||
chunk_count = Column(Integer, default=0, comment="分块数量")
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
kb = relationship("KnowledgeBase", back_populates="documents")
|
||||
chunks = relationship("DocumentChunk", back_populates="document",
|
||||
cascade="all, delete-orphan", passive_deletes=True)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"id": self.id,
|
||||
"kb_id": self.kb_id,
|
||||
"filename": self.filename,
|
||||
"file_type": self.file_type,
|
||||
"file_size": self.file_size,
|
||||
"status": self.status,
|
||||
"error_message": self.error_message,
|
||||
"chunk_count": self.chunk_count,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
}
|
||||
|
||||
|
||||
class DocumentChunk(Base):
|
||||
"""文档切片 — 含 embedding 向量"""
|
||||
__tablename__ = "kb_document_chunks"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
document_id = Column(String(36), ForeignKey("kb_documents.id", ondelete="CASCADE"),
|
||||
nullable=False, index=True, comment="所属文档")
|
||||
kb_id = Column(String(36), ForeignKey("knowledge_bases.id", ondelete="CASCADE"),
|
||||
nullable=False, index=True, comment="所属知识库")
|
||||
chunk_index = Column(Integer, default=0, comment="块序号")
|
||||
content = Column(Text, nullable=False, comment="切片文本内容")
|
||||
embedding = Column(Text, nullable=True, comment="JSON 序列化的 embedding 向量")
|
||||
metadata_ = Column("metadata", MySQLJSON, nullable=True, comment="元数据")
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
document = relationship("Document", back_populates="chunks")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_kb_chunks_kb_id", "kb_id"),
|
||||
Index("ix_kb_chunks_doc_id", "document_id"),
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"id": self.id,
|
||||
"document_id": self.document_id,
|
||||
"kb_id": self.kb_id,
|
||||
"chunk_index": self.chunk_index,
|
||||
"content": self.content[:200] + "..." if len(self.content) > 200 else self.content,
|
||||
"metadata": self.metadata_ or {},
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
}
|
||||
Reference in New Issue
Block a user