feat: 向量记忆 RAG、工具市场、SSE 流式响应、前端集成与测试覆盖

- 新增 embedding_service(语义检索)、knowledge_service(RAG)、text_chunker、document_parser
- 新增 tool_registry(自定义工具注册表)并完善工具市场 API(CRUD + code/http 执行)
- 新增 agent_vector_memory / knowledge_base 模型及对应数据库表
- 实现 SSE 流式响应与 Agent 预算控制
- AgentChat.vue 集成 MainLayout 导航布局
- 完善测试体系:7 个新测试文件共 110 个测试覆盖
- 修复 conftest.py SQLite 内存数据库连接隔离问题

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-05-01 22:30:46 +08:00
parent 036f533881
commit 7b9e0826de
35 changed files with 4353 additions and 365 deletions

View File

@@ -0,0 +1,101 @@
"""
文档解析器 — 支持 txt / pdf / docx / md / csv。
解析结果统一返回纯文本字符串,由调用方做分块处理。
"""
from __future__ import annotations
import csv
import io
import logging
import os
from typing import Optional
logger = logging.getLogger(__name__)
def parse_document(file_path: str, file_type: str) -> Optional[str]:
"""
解析文档为纯文本。
Args:
file_path: 文件绝对路径
file_type: 文件类型txt / pdf / docx / md / csv
Returns:
文本内容,解析失败返回 None
"""
if not os.path.isfile(file_path):
logger.warning("文件不存在: %s", file_path)
return None
parsers = {
"txt": _parse_text,
"md": _parse_text,
"pdf": _parse_pdf,
"docx": _parse_docx,
"csv": _parse_csv,
}
parser = parsers.get(file_type)
if not parser:
logger.warning("不支持的文件类型: %s", file_type)
return None
try:
text = parser(file_path)
if text:
text = text.strip()
logger.info("文档解析完成: %s (%s, %d 字符)", file_path, file_type, len(text or ""))
return text
except Exception as e:
logger.error("文档解析失败: %s (%s)", file_path, e, exc_info=True)
return None
def _parse_text(file_path: str) -> str:
"""解析纯文本 / Markdown 文件。"""
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
return f.read()
def _parse_pdf(file_path: str) -> str:
"""解析 PDF 文件。"""
from pypdf import PdfReader
reader = PdfReader(file_path)
pages = []
for page in reader.pages:
text = page.extract_text()
if text:
pages.append(text)
return "\n\n".join(pages)
def _parse_docx(file_path: str) -> str:
"""解析 Word 文档。"""
from docx import Document as DocxDocument
doc = DocxDocument(file_path)
paragraphs = []
for para in doc.paragraphs:
if para.text and para.text.strip():
paragraphs.append(para.text.strip())
# 也提取表格内容
for table in doc.tables:
for row in table.rows:
cells = [cell.text.strip() for cell in row.cells if cell.text.strip()]
if cells:
paragraphs.append(" | ".join(cells))
return "\n\n".join(paragraphs)
def _parse_csv(file_path: str) -> str:
"""解析 CSV 文件为文本表格。"""
rows = []
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
reader = csv.reader(f)
for row in reader:
rows.append(" | ".join(cell.strip() for cell in row))
return "\n".join(rows)