feat: 向量记忆 RAG、工具市场、SSE 流式响应、前端集成与测试覆盖
- 新增 embedding_service(语义检索)、knowledge_service(RAG)、text_chunker、document_parser - 新增 tool_registry(自定义工具注册表)并完善工具市场 API(CRUD + code/http 执行) - 新增 agent_vector_memory / knowledge_base 模型及对应数据库表 - 实现 SSE 流式响应与 Agent 预算控制 - AgentChat.vue 集成 MainLayout 导航布局 - 完善测试体系:7 个新测试文件共 110 个测试覆盖 - 修复 conftest.py SQLite 内存数据库连接隔离问题 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -13,7 +13,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Callable, Dict, List, Optional, Protocol, TypedDict
|
||||
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Protocol, TypedDict
|
||||
|
||||
from app.agent_runtime.schemas import (
|
||||
AgentConfig,
|
||||
@@ -23,6 +23,7 @@ from app.agent_runtime.schemas import (
|
||||
from app.agent_runtime.context import AgentContext
|
||||
from app.agent_runtime.memory import AgentMemory
|
||||
from app.agent_runtime.tool_manager import AgentToolManager
|
||||
from app.core.exceptions import WorkflowExecutionError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -95,6 +96,11 @@ class AgentRuntime:
|
||||
self.on_tool_executed = on_tool_executed
|
||||
self.on_llm_call = on_llm_call
|
||||
self._memory_context_loaded = False
|
||||
self._llm_invocations = 0
|
||||
|
||||
# 预算回调:供 WorkflowEngine 注入,使 Agent 内部计数计入工作流预算
|
||||
# 返回 True 表示预算充足;返回 False 或抛出异常表示超限
|
||||
self.on_llm_invocation: Optional[Callable[[], Any]] = None
|
||||
|
||||
async def run(self, user_input: str) -> AgentResult:
|
||||
"""
|
||||
@@ -108,7 +114,7 @@ class AgentRuntime:
|
||||
|
||||
# 1. 首次运行时加载长期记忆到 system prompt
|
||||
if not self._memory_context_loaded:
|
||||
await self._inject_memory_context()
|
||||
await self._inject_memory_context(user_input)
|
||||
self._memory_context_loaded = True
|
||||
|
||||
# 2. 追加用户消息
|
||||
@@ -139,6 +145,32 @@ class AgentRuntime:
|
||||
# 裁剪过长历史
|
||||
messages = self.memory.trim_messages(self.context.messages)
|
||||
|
||||
# 预算检查:LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
|
||||
budget = self.config.budget
|
||||
if self._llm_invocations >= budget.max_llm_invocations:
|
||||
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
|
||||
logger.warning(err)
|
||||
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
|
||||
await self.memory.save_context(user_input, err, self.context.messages)
|
||||
return AgentResult(success=False, content=err, truncated=True,
|
||||
iterations_used=self.context.iteration,
|
||||
tool_calls_made=self.context.tool_calls_made,
|
||||
steps=steps, error=err)
|
||||
|
||||
# 调用外部 LLM 预算回调(WorkflowEngine 注入,将 Agent 的 LLM 计入工作流预算)
|
||||
if self.on_llm_invocation:
|
||||
try:
|
||||
self.on_llm_invocation()
|
||||
except Exception as e:
|
||||
err = f"LLM 调用超出工作流预算: {e}"
|
||||
logger.warning(err)
|
||||
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
|
||||
await self.memory.save_context(user_input, err, self.context.messages)
|
||||
return AgentResult(success=False, content=err, truncated=True,
|
||||
iterations_used=self.context.iteration,
|
||||
tool_calls_made=self.context.tool_calls_made,
|
||||
steps=steps, error=str(e))
|
||||
|
||||
# 调用 LLM
|
||||
try:
|
||||
response = await llm.chat(
|
||||
@@ -166,6 +198,9 @@ class AgentRuntime:
|
||||
error=err_str,
|
||||
)
|
||||
|
||||
# 记录 LLM 调用次数(内部计数)
|
||||
self._llm_invocations += 1
|
||||
|
||||
# 解析工具调用
|
||||
tool_calls = self._extract_tool_calls(response)
|
||||
content = self._extract_content(response)
|
||||
@@ -247,9 +282,22 @@ class AgentRuntime:
|
||||
self.context.add_tool_result(tcid, tname, result)
|
||||
self.context.tool_calls_made += 1
|
||||
|
||||
# 预算检查:工具调用次数
|
||||
if self.context.tool_calls_made > budget.max_tool_calls:
|
||||
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
|
||||
logger.warning(err)
|
||||
steps.append(AgentStep(iteration=self.context.iteration, type="tool_result",
|
||||
content=err, tool_name=tname))
|
||||
return AgentResult(success=False, content=err, truncated=True,
|
||||
iterations_used=self.context.iteration,
|
||||
tool_calls_made=self.context.tool_calls_made,
|
||||
steps=steps, error=err)
|
||||
|
||||
if self.on_tool_executed:
|
||||
try:
|
||||
await self.on_tool_executed(tname)
|
||||
except WorkflowExecutionError:
|
||||
raise
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -284,9 +332,240 @@ class AgentRuntime:
|
||||
steps=steps,
|
||||
)
|
||||
|
||||
async def _inject_memory_context(self) -> None:
|
||||
async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]:
|
||||
"""
|
||||
流式执行 Agent 单轮对话。
|
||||
|
||||
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件:
|
||||
- think: LLM 思考中,准备调用工具
|
||||
- tool_call: 即将执行工具
|
||||
- tool_result: 工具执行完毕
|
||||
- final: 最终回答
|
||||
- error: 出错/预算超限
|
||||
"""
|
||||
max_iter = max(1, self.config.llm.max_iterations)
|
||||
self.context.iteration = 0
|
||||
self.context.tool_calls_made = 0
|
||||
|
||||
# 1. 首次运行时加载长期记忆到 system prompt
|
||||
if not self._memory_context_loaded:
|
||||
await self._inject_memory_context(user_input)
|
||||
self._memory_context_loaded = True
|
||||
|
||||
# 2. 追加用户消息
|
||||
self.context.add_user_message(user_input)
|
||||
|
||||
# 3. ReAct 循环
|
||||
llm = _LLMClient(self.config.llm)
|
||||
tool_schemas = self.tool_manager.get_tool_schemas()
|
||||
has_tools = self.tool_manager.has_tools()
|
||||
steps: List[AgentStep] = []
|
||||
|
||||
llm_callback_ctx = {"step_type": "think", "tool_name": None}
|
||||
|
||||
def _llm_callback(metrics: Dict[str, Any]):
|
||||
if self.on_llm_call:
|
||||
metrics.update({
|
||||
"session_id": self.context.session_id,
|
||||
"user_id": self.config.user_id,
|
||||
"step_type": llm_callback_ctx["step_type"],
|
||||
"tool_name": llm_callback_ctx["tool_name"],
|
||||
})
|
||||
self.on_llm_call(metrics)
|
||||
|
||||
while self.context.iteration < max_iter:
|
||||
self.context.iteration += 1
|
||||
messages = self.memory.trim_messages(self.context.messages)
|
||||
|
||||
# 预算检查:LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
|
||||
budget = self.config.budget
|
||||
if self._llm_invocations >= budget.max_llm_invocations:
|
||||
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
|
||||
logger.warning(err)
|
||||
yield {"type": "error", "content": err, "iteration": self.context.iteration,
|
||||
"truncated": True}
|
||||
await self.memory.save_context(user_input, err, self.context.messages)
|
||||
return
|
||||
|
||||
# 调用外部 LLM 预算回调(WorkflowEngine 注入)
|
||||
if self.on_llm_invocation:
|
||||
try:
|
||||
self.on_llm_invocation()
|
||||
except Exception as e:
|
||||
err = f"LLM 调用超出工作流预算: {e}"
|
||||
logger.warning(err)
|
||||
yield {"type": "error", "content": err, "iteration": self.context.iteration,
|
||||
"truncated": True}
|
||||
return
|
||||
|
||||
# 调用 LLM
|
||||
try:
|
||||
response = await llm.chat(
|
||||
messages=messages,
|
||||
tools=tool_schemas if has_tools and self.context.iteration == 1 else
|
||||
(tool_schemas if has_tools else None),
|
||||
iteration=self.context.iteration,
|
||||
on_completion=_llm_callback,
|
||||
)
|
||||
except Exception as e:
|
||||
err_str = str(e)
|
||||
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
|
||||
if self.context.iteration < max_iter and self._is_retryable(err_str):
|
||||
yield {"type": "error", "content": f"LLM 调用失败(可重试): {err_str}",
|
||||
"iteration": self.context.iteration}
|
||||
continue
|
||||
yield {"type": "error", "content": f"LLM 调用失败: {err_str}",
|
||||
"iteration": self.context.iteration}
|
||||
return
|
||||
|
||||
# 记录 LLM 调用次数(内部计数)
|
||||
self._llm_invocations += 1
|
||||
|
||||
# 解析工具调用
|
||||
tool_calls = self._extract_tool_calls(response)
|
||||
content = self._extract_content(response)
|
||||
reasoning = getattr(response, "reasoning_content", None) or (
|
||||
response.get("reasoning_content") if isinstance(response, dict) else None
|
||||
)
|
||||
|
||||
if not tool_calls:
|
||||
# LLM 直接返回文本 → 结束
|
||||
self.context.add_assistant_message(content)
|
||||
final_text = content or "(模型未返回有效内容)"
|
||||
yield {
|
||||
"type": "final",
|
||||
"content": final_text,
|
||||
"reasoning": reasoning,
|
||||
"iteration": self.context.iteration,
|
||||
"iterations_used": self.context.iteration,
|
||||
"tool_calls_made": self.context.tool_calls_made,
|
||||
"session_id": self.context.session_id,
|
||||
}
|
||||
await self.memory.save_context(user_input, final_text, self.context.messages)
|
||||
return
|
||||
|
||||
# 有工具调用 → 先记录 assistant 消息
|
||||
self.context.add_assistant_message(content or "", tool_calls, reasoning)
|
||||
|
||||
# yield think 事件
|
||||
tc_names = [tc["function"]["name"] for tc in tool_calls]
|
||||
tc_args_list = []
|
||||
for tc in tool_calls:
|
||||
try:
|
||||
tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
tc_args_list.append({})
|
||||
|
||||
yield {
|
||||
"type": "think",
|
||||
"content": content or f"调用工具: {', '.join(tc_names)}",
|
||||
"reasoning": reasoning,
|
||||
"tool_names": tc_names,
|
||||
"iteration": self.context.iteration,
|
||||
}
|
||||
|
||||
steps.append(AgentStep(
|
||||
iteration=self.context.iteration,
|
||||
type="think",
|
||||
content=content or f"调用工具: {', '.join(tc_names)}",
|
||||
reasoning=reasoning,
|
||||
tool_name=tc_names[0] if len(tc_names) == 1 else None,
|
||||
tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
|
||||
))
|
||||
|
||||
if self.execution_logger:
|
||||
self.execution_logger.info(
|
||||
f"Agent 调用 {len(tool_calls)} 个工具",
|
||||
data={"tool_calls": tc_names, "iteration": self.context.iteration},
|
||||
)
|
||||
|
||||
# 逐一执行工具
|
||||
for tc in tool_calls:
|
||||
tfn = tc.get("function", {})
|
||||
tname = tfn.get("name", "unknown")
|
||||
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
|
||||
|
||||
try:
|
||||
targs = json.loads(tfn.get("arguments", "{}"))
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
targs = {}
|
||||
|
||||
# yield tool_call 事件
|
||||
yield {
|
||||
"type": "tool_call",
|
||||
"name": tname,
|
||||
"input": targs,
|
||||
"iteration": self.context.iteration,
|
||||
}
|
||||
|
||||
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
|
||||
result = await self.tool_manager.execute(tname, targs)
|
||||
|
||||
# yield tool_result 事件
|
||||
yield {
|
||||
"type": "tool_result",
|
||||
"name": tname,
|
||||
"result": result[:500] + "..." if len(result) > 500 else result,
|
||||
"iteration": self.context.iteration,
|
||||
}
|
||||
|
||||
steps.append(AgentStep(
|
||||
iteration=self.context.iteration,
|
||||
type="tool_result",
|
||||
content=f"工具 {tname} 返回结果",
|
||||
tool_name=tname,
|
||||
tool_input=targs,
|
||||
tool_result=result[:500] + "..." if len(result) > 500 else result,
|
||||
))
|
||||
|
||||
self.context.add_tool_result(tcid, tname, result)
|
||||
self.context.tool_calls_made += 1
|
||||
|
||||
# 预算检查:工具调用次数
|
||||
if self.context.tool_calls_made > budget.max_tool_calls:
|
||||
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
|
||||
logger.warning(err)
|
||||
yield {"type": "error", "content": err, "iteration": self.context.iteration,
|
||||
"truncated": True}
|
||||
return
|
||||
|
||||
if self.on_tool_executed:
|
||||
try:
|
||||
await self.on_tool_executed(tname)
|
||||
except WorkflowExecutionError:
|
||||
raise
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if self.execution_logger:
|
||||
preview = result[:300] + "..." if len(result) > 300 else result
|
||||
self.execution_logger.info(
|
||||
f"工具 {tname} 执行完成",
|
||||
data={"tool_name": tname, "result_preview": preview},
|
||||
)
|
||||
|
||||
# 达到最大迭代次数
|
||||
last_content = ""
|
||||
for m in reversed(self.context.messages):
|
||||
if m.get("role") == "assistant" and m.get("content"):
|
||||
last_content = m["content"]
|
||||
break
|
||||
|
||||
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
|
||||
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages)
|
||||
yield {
|
||||
"type": "final",
|
||||
"content": last_content or "已达最大迭代次数,但模型未返回最终回答。",
|
||||
"iteration": self.context.iteration,
|
||||
"iterations_used": self.context.iteration,
|
||||
"tool_calls_made": self.context.tool_calls_made,
|
||||
"truncated": True,
|
||||
"session_id": self.context.session_id,
|
||||
}
|
||||
|
||||
async def _inject_memory_context(self, query: str = "") -> None:
|
||||
"""加载长期记忆并注入 system prompt。"""
|
||||
mem_text = await self.memory.initialize()
|
||||
mem_text = await self.memory.initialize(query=query)
|
||||
if mem_text:
|
||||
enriched = (
|
||||
self.config.system_prompt.rstrip("\n")
|
||||
|
||||
Reference in New Issue
Block a user