feat: 向量记忆 RAG、工具市场、SSE 流式响应、前端集成与测试覆盖

- 新增 embedding_service(语义检索)、knowledge_service(RAG)、text_chunker、document_parser
- 新增 tool_registry(自定义工具注册表)并完善工具市场 API(CRUD + code/http 执行)
- 新增 agent_vector_memory / knowledge_base 模型及对应数据库表
- 实现 SSE 流式响应与 Agent 预算控制
- AgentChat.vue 集成 MainLayout 导航布局
- 完善测试体系:7 个新测试文件共 110 个测试覆盖
- 修复 conftest.py SQLite 内存数据库连接隔离问题

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-05-01 22:30:46 +08:00
parent 036f533881
commit 7b9e0826de
35 changed files with 4353 additions and 365 deletions

View File

@@ -13,7 +13,7 @@ from __future__ import annotations
import json
import logging
import time
from typing import Any, Callable, Dict, List, Optional, Protocol, TypedDict
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Protocol, TypedDict
from app.agent_runtime.schemas import (
AgentConfig,
@@ -23,6 +23,7 @@ from app.agent_runtime.schemas import (
from app.agent_runtime.context import AgentContext
from app.agent_runtime.memory import AgentMemory
from app.agent_runtime.tool_manager import AgentToolManager
from app.core.exceptions import WorkflowExecutionError
logger = logging.getLogger(__name__)
@@ -95,6 +96,11 @@ class AgentRuntime:
self.on_tool_executed = on_tool_executed
self.on_llm_call = on_llm_call
self._memory_context_loaded = False
self._llm_invocations = 0
# 预算回调:供 WorkflowEngine 注入,使 Agent 内部计数计入工作流预算
# 返回 True 表示预算充足;返回 False 或抛出异常表示超限
self.on_llm_invocation: Optional[Callable[[], Any]] = None
async def run(self, user_input: str) -> AgentResult:
"""
@@ -108,7 +114,7 @@ class AgentRuntime:
# 1. 首次运行时加载长期记忆到 system prompt
if not self._memory_context_loaded:
await self._inject_memory_context()
await self._inject_memory_context(user_input)
self._memory_context_loaded = True
# 2. 追加用户消息
@@ -139,6 +145,32 @@ class AgentRuntime:
# 裁剪过长历史
messages = self.memory.trim_messages(self.context.messages)
# 预算检查LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
budget = self.config.budget
if self._llm_invocations >= budget.max_llm_invocations:
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
logger.warning(err)
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
await self.memory.save_context(user_input, err, self.context.messages)
return AgentResult(success=False, content=err, truncated=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps, error=err)
# 调用外部 LLM 预算回调WorkflowEngine 注入,将 Agent 的 LLM 计入工作流预算)
if self.on_llm_invocation:
try:
self.on_llm_invocation()
except Exception as e:
err = f"LLM 调用超出工作流预算: {e}"
logger.warning(err)
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
await self.memory.save_context(user_input, err, self.context.messages)
return AgentResult(success=False, content=err, truncated=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps, error=str(e))
# 调用 LLM
try:
response = await llm.chat(
@@ -166,6 +198,9 @@ class AgentRuntime:
error=err_str,
)
# 记录 LLM 调用次数(内部计数)
self._llm_invocations += 1
# 解析工具调用
tool_calls = self._extract_tool_calls(response)
content = self._extract_content(response)
@@ -247,9 +282,22 @@ class AgentRuntime:
self.context.add_tool_result(tcid, tname, result)
self.context.tool_calls_made += 1
# 预算检查:工具调用次数
if self.context.tool_calls_made > budget.max_tool_calls:
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
logger.warning(err)
steps.append(AgentStep(iteration=self.context.iteration, type="tool_result",
content=err, tool_name=tname))
return AgentResult(success=False, content=err, truncated=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps, error=err)
if self.on_tool_executed:
try:
await self.on_tool_executed(tname)
except WorkflowExecutionError:
raise
except Exception:
pass
@@ -284,9 +332,240 @@ class AgentRuntime:
steps=steps,
)
async def _inject_memory_context(self) -> None:
async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]:
"""
流式执行 Agent 单轮对话。
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件:
- think: LLM 思考中,准备调用工具
- tool_call: 即将执行工具
- tool_result: 工具执行完毕
- final: 最终回答
- error: 出错/预算超限
"""
max_iter = max(1, self.config.llm.max_iterations)
self.context.iteration = 0
self.context.tool_calls_made = 0
# 1. 首次运行时加载长期记忆到 system prompt
if not self._memory_context_loaded:
await self._inject_memory_context(user_input)
self._memory_context_loaded = True
# 2. 追加用户消息
self.context.add_user_message(user_input)
# 3. ReAct 循环
llm = _LLMClient(self.config.llm)
tool_schemas = self.tool_manager.get_tool_schemas()
has_tools = self.tool_manager.has_tools()
steps: List[AgentStep] = []
llm_callback_ctx = {"step_type": "think", "tool_name": None}
def _llm_callback(metrics: Dict[str, Any]):
if self.on_llm_call:
metrics.update({
"session_id": self.context.session_id,
"user_id": self.config.user_id,
"step_type": llm_callback_ctx["step_type"],
"tool_name": llm_callback_ctx["tool_name"],
})
self.on_llm_call(metrics)
while self.context.iteration < max_iter:
self.context.iteration += 1
messages = self.memory.trim_messages(self.context.messages)
# 预算检查LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
budget = self.config.budget
if self._llm_invocations >= budget.max_llm_invocations:
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
logger.warning(err)
yield {"type": "error", "content": err, "iteration": self.context.iteration,
"truncated": True}
await self.memory.save_context(user_input, err, self.context.messages)
return
# 调用外部 LLM 预算回调WorkflowEngine 注入)
if self.on_llm_invocation:
try:
self.on_llm_invocation()
except Exception as e:
err = f"LLM 调用超出工作流预算: {e}"
logger.warning(err)
yield {"type": "error", "content": err, "iteration": self.context.iteration,
"truncated": True}
return
# 调用 LLM
try:
response = await llm.chat(
messages=messages,
tools=tool_schemas if has_tools and self.context.iteration == 1 else
(tool_schemas if has_tools else None),
iteration=self.context.iteration,
on_completion=_llm_callback,
)
except Exception as e:
err_str = str(e)
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
if self.context.iteration < max_iter and self._is_retryable(err_str):
yield {"type": "error", "content": f"LLM 调用失败(可重试): {err_str}",
"iteration": self.context.iteration}
continue
yield {"type": "error", "content": f"LLM 调用失败: {err_str}",
"iteration": self.context.iteration}
return
# 记录 LLM 调用次数(内部计数)
self._llm_invocations += 1
# 解析工具调用
tool_calls = self._extract_tool_calls(response)
content = self._extract_content(response)
reasoning = getattr(response, "reasoning_content", None) or (
response.get("reasoning_content") if isinstance(response, dict) else None
)
if not tool_calls:
# LLM 直接返回文本 → 结束
self.context.add_assistant_message(content)
final_text = content or "(模型未返回有效内容)"
yield {
"type": "final",
"content": final_text,
"reasoning": reasoning,
"iteration": self.context.iteration,
"iterations_used": self.context.iteration,
"tool_calls_made": self.context.tool_calls_made,
"session_id": self.context.session_id,
}
await self.memory.save_context(user_input, final_text, self.context.messages)
return
# 有工具调用 → 先记录 assistant 消息
self.context.add_assistant_message(content or "", tool_calls, reasoning)
# yield think 事件
tc_names = [tc["function"]["name"] for tc in tool_calls]
tc_args_list = []
for tc in tool_calls:
try:
tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
except (json.JSONDecodeError, TypeError):
tc_args_list.append({})
yield {
"type": "think",
"content": content or f"调用工具: {', '.join(tc_names)}",
"reasoning": reasoning,
"tool_names": tc_names,
"iteration": self.context.iteration,
}
steps.append(AgentStep(
iteration=self.context.iteration,
type="think",
content=content or f"调用工具: {', '.join(tc_names)}",
reasoning=reasoning,
tool_name=tc_names[0] if len(tc_names) == 1 else None,
tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
))
if self.execution_logger:
self.execution_logger.info(
f"Agent 调用 {len(tool_calls)} 个工具",
data={"tool_calls": tc_names, "iteration": self.context.iteration},
)
# 逐一执行工具
for tc in tool_calls:
tfn = tc.get("function", {})
tname = tfn.get("name", "unknown")
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
try:
targs = json.loads(tfn.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
targs = {}
# yield tool_call 事件
yield {
"type": "tool_call",
"name": tname,
"input": targs,
"iteration": self.context.iteration,
}
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
result = await self.tool_manager.execute(tname, targs)
# yield tool_result 事件
yield {
"type": "tool_result",
"name": tname,
"result": result[:500] + "..." if len(result) > 500 else result,
"iteration": self.context.iteration,
}
steps.append(AgentStep(
iteration=self.context.iteration,
type="tool_result",
content=f"工具 {tname} 返回结果",
tool_name=tname,
tool_input=targs,
tool_result=result[:500] + "..." if len(result) > 500 else result,
))
self.context.add_tool_result(tcid, tname, result)
self.context.tool_calls_made += 1
# 预算检查:工具调用次数
if self.context.tool_calls_made > budget.max_tool_calls:
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
logger.warning(err)
yield {"type": "error", "content": err, "iteration": self.context.iteration,
"truncated": True}
return
if self.on_tool_executed:
try:
await self.on_tool_executed(tname)
except WorkflowExecutionError:
raise
except Exception:
pass
if self.execution_logger:
preview = result[:300] + "..." if len(result) > 300 else result
self.execution_logger.info(
f"工具 {tname} 执行完成",
data={"tool_name": tname, "result_preview": preview},
)
# 达到最大迭代次数
last_content = ""
for m in reversed(self.context.messages):
if m.get("role") == "assistant" and m.get("content"):
last_content = m["content"]
break
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages)
yield {
"type": "final",
"content": last_content or "已达最大迭代次数,但模型未返回最终回答。",
"iteration": self.context.iteration,
"iterations_used": self.context.iteration,
"tool_calls_made": self.context.tool_calls_made,
"truncated": True,
"session_id": self.context.session_id,
}
async def _inject_memory_context(self, query: str = "") -> None:
"""加载长期记忆并注入 system prompt。"""
mem_text = await self.memory.initialize()
mem_text = await self.memory.initialize(query=query)
if mem_text:
enriched = (
self.config.system_prompt.rstrip("\n")