Files
aiagent/backend/app/models/knowledge_base.py
renjianbo 7b9e0826de feat: 向量记忆 RAG、工具市场、SSE 流式响应、前端集成与测试覆盖
- 新增 embedding_service(语义检索)、knowledge_service(RAG)、text_chunker、document_parser
- 新增 tool_registry(自定义工具注册表)并完善工具市场 API(CRUD + code/http 执行)
- 新增 agent_vector_memory / knowledge_base 模型及对应数据库表
- 实现 SSE 流式响应与 Agent 预算控制
- AgentChat.vue 集成 MainLayout 导航布局
- 完善测试体系:7 个新测试文件共 110 个测试覆盖
- 修复 conftest.py SQLite 内存数据库连接隔离问题

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-01 22:30:46 +08:00

117 lines
4.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
知识库 RAG 模型。
KnowledgeBase — 知识库容器
Document — 上传的源文档
DocumentChunk — 文档切片(含 embedding
"""
from __future__ import annotations
import uuid
from datetime import datetime
from sqlalchemy import Column, String, Text, Integer, DateTime, ForeignKey, Index
from sqlalchemy.dialects.mysql import JSON as MySQLJSON
from sqlalchemy.orm import relationship
from app.core.database import Base
class KnowledgeBase(Base):
"""知识库"""
__tablename__ = "knowledge_bases"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
name = Column(String(200), nullable=False, comment="知识库名称")
description = Column(Text, nullable=True, comment="描述")
user_id = Column(String(36), nullable=True, index=True, comment="创建者 ID")
chunk_size = Column(Integer, default=500, comment="分块大小(字符数)")
chunk_overlap = Column(Integer, default=50, comment="分块重叠(字符数)")
doc_count = Column(Integer, default=0, comment="文档数量")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
documents = relationship("Document", back_populates="kb", cascade="all, delete-orphan",
passive_deletes=True)
def to_dict(self) -> dict:
return {
"id": self.id,
"name": self.name,
"description": self.description,
"user_id": self.user_id,
"chunk_size": self.chunk_size,
"chunk_overlap": self.chunk_overlap,
"doc_count": self.doc_count,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
class Document(Base):
"""知识库文档"""
__tablename__ = "kb_documents"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
kb_id = Column(String(36), ForeignKey("knowledge_bases.id", ondelete="CASCADE"),
nullable=False, index=True, comment="所属知识库")
filename = Column(String(500), nullable=False, comment="原始文件名")
file_type = Column(String(20), nullable=False, comment="文件类型: txt/pdf/docx/md/csv")
file_size = Column(Integer, default=0, comment="文件大小(字节)")
status = Column(String(20), default="pending",
comment="状态: pending/processing/ready/failed")
error_message = Column(Text, nullable=True, comment="处理失败信息")
chunk_count = Column(Integer, default=0, comment="分块数量")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
kb = relationship("KnowledgeBase", back_populates="documents")
chunks = relationship("DocumentChunk", back_populates="document",
cascade="all, delete-orphan", passive_deletes=True)
def to_dict(self) -> dict:
return {
"id": self.id,
"kb_id": self.kb_id,
"filename": self.filename,
"file_type": self.file_type,
"file_size": self.file_size,
"status": self.status,
"error_message": self.error_message,
"chunk_count": self.chunk_count,
"created_at": self.created_at.isoformat() if self.created_at else None,
}
class DocumentChunk(Base):
"""文档切片 — 含 embedding 向量"""
__tablename__ = "kb_document_chunks"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
document_id = Column(String(36), ForeignKey("kb_documents.id", ondelete="CASCADE"),
nullable=False, index=True, comment="所属文档")
kb_id = Column(String(36), ForeignKey("knowledge_bases.id", ondelete="CASCADE"),
nullable=False, index=True, comment="所属知识库")
chunk_index = Column(Integer, default=0, comment="块序号")
content = Column(Text, nullable=False, comment="切片文本内容")
embedding = Column(Text, nullable=True, comment="JSON 序列化的 embedding 向量")
metadata_ = Column("metadata", MySQLJSON, nullable=True, comment="元数据")
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
document = relationship("Document", back_populates="chunks")
__table_args__ = (
Index("ix_kb_chunks_kb_id", "kb_id"),
Index("ix_kb_chunks_doc_id", "document_id"),
)
def to_dict(self) -> dict:
return {
"id": self.id,
"document_id": self.document_id,
"kb_id": self.kb_id,
"chunk_index": self.chunk_index,
"content": self.content[:200] + "..." if len(self.content) > 200 else self.content,
"metadata": self.metadata_ or {},
"created_at": self.created_at.isoformat() if self.created_at else None,
}