feat: 向量记忆 RAG、工具市场、SSE 流式响应、前端集成与测试覆盖
- 新增 embedding_service(语义检索)、knowledge_service(RAG)、text_chunker、document_parser - 新增 tool_registry(自定义工具注册表)并完善工具市场 API(CRUD + code/http 执行) - 新增 agent_vector_memory / knowledge_base 模型及对应数据库表 - 实现 SSE 流式响应与 Agent 预算控制 - AgentChat.vue 集成 MainLayout 导航布局 - 完善测试体系:7 个新测试文件共 110 个测试覆盖 - 修复 conftest.py SQLite 内存数据库连接隔离问题 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
251
backend/app/api/knowledge_base.py
Normal file
251
backend/app/api/knowledge_base.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""
|
||||
知识库 RAG API。
|
||||
|
||||
提供知识库管理、文档上传、语义搜索和 RAG 查询接口。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.api.auth import get_current_user
|
||||
from app.models.user import User
|
||||
from app.services.knowledge_service import (
|
||||
create_knowledge_base,
|
||||
delete_document,
|
||||
delete_knowledge_base,
|
||||
get_knowledge_base,
|
||||
list_documents,
|
||||
list_knowledge_bases,
|
||||
rag_query,
|
||||
search,
|
||||
upload_document,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/api/v1/knowledge-bases", tags=["knowledge-base"])
|
||||
|
||||
|
||||
# ─── Schema ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class KBCreateRequest(BaseModel):
|
||||
name: str
|
||||
description: str = ""
|
||||
chunk_size: int = 500
|
||||
chunk_overlap: int = 50
|
||||
|
||||
|
||||
class KBResponse(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
description: Optional[str] = ""
|
||||
user_id: Optional[str] = ""
|
||||
chunk_size: int = 500
|
||||
chunk_overlap: int = 50
|
||||
doc_count: int = 0
|
||||
created_at: Optional[str] = ""
|
||||
updated_at: Optional[str] = ""
|
||||
|
||||
|
||||
class DocumentResponse(BaseModel):
|
||||
id: str
|
||||
kb_id: str
|
||||
filename: str
|
||||
file_type: str
|
||||
file_size: int = 0
|
||||
status: str = "pending"
|
||||
error_message: Optional[str] = None
|
||||
chunk_count: int = 0
|
||||
created_at: Optional[str] = None
|
||||
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
query: str
|
||||
top_k: int = 5
|
||||
min_score: float = 0.3
|
||||
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
chunk_id: str
|
||||
content: str
|
||||
score: float
|
||||
metadata: Dict[str, Any] = {}
|
||||
|
||||
|
||||
class SearchResponse(BaseModel):
|
||||
results: List[SearchResult] = []
|
||||
|
||||
|
||||
class RAGRequest(BaseModel):
|
||||
query: str
|
||||
top_k: int = 5
|
||||
min_score: float = 0.3
|
||||
|
||||
|
||||
class RAGResponse(BaseModel):
|
||||
query: str
|
||||
context: str = ""
|
||||
sources: List[Dict[str, Any]] = []
|
||||
found: bool = False
|
||||
|
||||
|
||||
# ─── 知识库 CRUD ──────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("", response_model=KBResponse)
|
||||
async def api_create_kb(
|
||||
req: KBCreateRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""创建知识库。"""
|
||||
kb = create_knowledge_base(
|
||||
db=db,
|
||||
name=req.name,
|
||||
user_id=current_user.id,
|
||||
description=req.description,
|
||||
chunk_size=req.chunk_size,
|
||||
chunk_overlap=req.chunk_overlap,
|
||||
)
|
||||
return KBResponse(**kb.to_dict())
|
||||
|
||||
|
||||
@router.get("", response_model=List[KBResponse])
|
||||
async def api_list_kb(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""列出知识库。"""
|
||||
kbs = list_knowledge_bases(db, user_id=current_user.id)
|
||||
return [KBResponse(**kb.to_dict()) for kb in kbs]
|
||||
|
||||
|
||||
@router.get("/{kb_id}", response_model=KBResponse)
|
||||
async def api_get_kb(
|
||||
kb_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""获取知识库详情。"""
|
||||
kb = get_knowledge_base(db, kb_id)
|
||||
if not kb:
|
||||
raise HTTPException(status_code=404, detail="知识库不存在")
|
||||
return KBResponse(**kb.to_dict())
|
||||
|
||||
|
||||
@router.delete("/{kb_id}")
|
||||
async def api_delete_kb(
|
||||
kb_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""删除知识库。"""
|
||||
ok = delete_knowledge_base(db, kb_id)
|
||||
if not ok:
|
||||
raise HTTPException(status_code=404, detail="知识库不存在")
|
||||
return {"message": "知识库已删除"}
|
||||
|
||||
|
||||
# ─── 文档管理 ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/{kb_id}/documents", response_model=DocumentResponse)
|
||||
async def api_upload_document(
|
||||
kb_id: str,
|
||||
file: UploadFile = File(...),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""上传文档到知识库(自动解析、分块、生成 Embedding)。"""
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="文件名不能为空")
|
||||
|
||||
content = await file.read()
|
||||
if not content:
|
||||
raise HTTPException(status_code=400, detail="文件内容为空")
|
||||
|
||||
try:
|
||||
doc = await upload_document(
|
||||
db=db,
|
||||
kb_id=kb_id,
|
||||
filename=file.filename,
|
||||
file_content=content,
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
|
||||
if doc.status == "failed":
|
||||
# 返回成功但告知处理失败
|
||||
return DocumentResponse(**doc.to_dict())
|
||||
|
||||
return DocumentResponse(**doc.to_dict())
|
||||
|
||||
|
||||
@router.get("/{kb_id}/documents", response_model=List[DocumentResponse])
|
||||
async def api_list_documents(
|
||||
kb_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""列出知识库中的文档。"""
|
||||
docs = list_documents(db, kb_id)
|
||||
return [DocumentResponse(**d.to_dict()) for d in docs]
|
||||
|
||||
|
||||
@router.delete("/{kb_id}/documents/{doc_id}")
|
||||
async def api_delete_document(
|
||||
kb_id: str,
|
||||
doc_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""删除文档。"""
|
||||
ok = delete_document(db, doc_id)
|
||||
if not ok:
|
||||
raise HTTPException(status_code=404, detail="文档不存在")
|
||||
return {"message": "文档已删除"}
|
||||
|
||||
|
||||
# ─── 搜索 & RAG ────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/{kb_id}/search", response_model=SearchResponse)
|
||||
async def api_search(
|
||||
kb_id: str,
|
||||
req: SearchRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""语义搜索知识库。"""
|
||||
results = await search(
|
||||
db=db,
|
||||
kb_id=kb_id,
|
||||
query=req.query,
|
||||
top_k=req.top_k,
|
||||
min_score=req.min_score,
|
||||
)
|
||||
return SearchResponse(results=[SearchResult(**r) for r in results])
|
||||
|
||||
|
||||
@router.post("/{kb_id}/rag", response_model=RAGResponse)
|
||||
async def api_rag(
|
||||
kb_id: str,
|
||||
req: RAGRequest,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""RAG 查询:搜索相关片段并格式化为上下文。"""
|
||||
result = await rag_query(
|
||||
db=db,
|
||||
kb_id=kb_id,
|
||||
query=req.query,
|
||||
top_k=req.top_k,
|
||||
min_score=req.min_score,
|
||||
)
|
||||
return RAGResponse(**result)
|
||||
Reference in New Issue
Block a user