feat: 向量记忆 RAG、工具市场、SSE 流式响应、前端集成与测试覆盖

- 新增 embedding_service（语义检索）、knowledge_service（RAG）、text_chunker、document_parser - 新增 tool_registry（自定义工具注册表）并完善工具市场 API（CRUD + code/http 执行） - 新增 agent_vector_memory / knowledge_base 模型及对应数据库表 - 实现 SSE 流式响应与 Agent 预算控制 - AgentChat.vue 集成 MainLayout 导航布局 - 完善测试体系：7 个新测试文件共 110 个测试覆盖 - 修复 conftest.py SQLite 内存数据库连接隔离问题 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-01 22:30:46 +08:00
parent 036f533881
commit 7b9e0826de
35 changed files with 4353 additions and 365 deletions
--- a/backend/app/agent_runtime/core.py
+++ b/backend/app/agent_runtime/core.py
@@ -13,7 +13,7 @@ from __future__ import annotations
 import json
 import logging
 import time
-from typing import Any, Callable, Dict, List, Optional, Protocol, TypedDict
+from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Protocol, TypedDict

 from app.agent_runtime.schemas import (
    AgentConfig,
@@ -23,6 +23,7 @@ from app.agent_runtime.schemas import (
 from app.agent_runtime.context import AgentContext
 from app.agent_runtime.memory import AgentMemory
 from app.agent_runtime.tool_manager import AgentToolManager
+from app.core.exceptions import WorkflowExecutionError

 logger = logging.getLogger(__name__)

@@ -95,6 +96,11 @@ class AgentRuntime:
        self.on_tool_executed = on_tool_executed
        self.on_llm_call = on_llm_call
        self._memory_context_loaded = False
+        self._llm_invocations = 0
+
+        # 预算回调：供 WorkflowEngine 注入，使 Agent 内部计数计入工作流预算
+        # 返回 True 表示预算充足；返回 False 或抛出异常表示超限
+        self.on_llm_invocation: Optional[Callable[[], Any]] = None

    async def run(self, user_input: str) -> AgentResult:
        """
@@ -108,7 +114,7 @@ class AgentRuntime:

        # 1. 首次运行时加载长期记忆到 system prompt
        if not self._memory_context_loaded:
-            await self._inject_memory_context()
+            await self._inject_memory_context(user_input)
            self._memory_context_loaded = True

        # 2. 追加用户消息
@@ -139,6 +145,32 @@ class AgentRuntime:
            # 裁剪过长历史
            messages = self.memory.trim_messages(self.context.messages)

+            # 预算检查：LLM 调用次数（在调用 LLM 之前检查，避免浪费额度）
+            budget = self.config.budget
+            if self._llm_invocations >= budget.max_llm_invocations:
+                err = f"已超过 LLM 调用预算（{budget.max_llm_invocations} 次）"
+                logger.warning(err)
+                steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
+                await self.memory.save_context(user_input, err, self.context.messages)
+                return AgentResult(success=False, content=err, truncated=True,
+                                   iterations_used=self.context.iteration,
+                                   tool_calls_made=self.context.tool_calls_made,
+                                   steps=steps, error=err)
+
+            # 调用外部 LLM 预算回调（WorkflowEngine 注入，将 Agent 的 LLM 计入工作流预算）
+            if self.on_llm_invocation:
+                try:
+                    self.on_llm_invocation()
+                except Exception as e:
+                    err = f"LLM 调用超出工作流预算: {e}"
+                    logger.warning(err)
+                    steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
+                    await self.memory.save_context(user_input, err, self.context.messages)
+                    return AgentResult(success=False, content=err, truncated=True,
+                                       iterations_used=self.context.iteration,
+                                       tool_calls_made=self.context.tool_calls_made,
+                                       steps=steps, error=str(e))
+
            # 调用 LLM
            try:
                response = await llm.chat(
@@ -166,6 +198,9 @@ class AgentRuntime:
                    error=err_str,
                )

+            # 记录 LLM 调用次数（内部计数）
+            self._llm_invocations += 1
+
            # 解析工具调用
            tool_calls = self._extract_tool_calls(response)
            content = self._extract_content(response)
@@ -247,9 +282,22 @@ class AgentRuntime:
                self.context.add_tool_result(tcid, tname, result)
                self.context.tool_calls_made += 1

+                # 预算检查：工具调用次数
+                if self.context.tool_calls_made > budget.max_tool_calls:
+                    err = f"已超过工具调用预算（{budget.max_tool_calls} 次）"
+                    logger.warning(err)
+                    steps.append(AgentStep(iteration=self.context.iteration, type="tool_result",
+                                           content=err, tool_name=tname))
+                    return AgentResult(success=False, content=err, truncated=True,
+                                       iterations_used=self.context.iteration,
+                                       tool_calls_made=self.context.tool_calls_made,
+                                       steps=steps, error=err)
+
                if self.on_tool_executed:
                    try:
                        await self.on_tool_executed(tname)
+                    except WorkflowExecutionError:
+                        raise
                    except Exception:
                        pass

@@ -284,9 +332,240 @@ class AgentRuntime:
            steps=steps,
        )

-    async def _inject_memory_context(self) -> None:
+    async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]:
+        """
+        流式执行 Agent 单轮对话。
+
+        与 run() 逻辑相同，但在每个关键步骤 yield SSE 事件：
+        - think: LLM 思考中，准备调用工具
+        - tool_call: 即将执行工具
+        - tool_result: 工具执行完毕
+        - final: 最终回答
+        - error: 出错/预算超限
+        """
+        max_iter = max(1, self.config.llm.max_iterations)
+        self.context.iteration = 0
+        self.context.tool_calls_made = 0
+
+        # 1. 首次运行时加载长期记忆到 system prompt
+        if not self._memory_context_loaded:
+            await self._inject_memory_context(user_input)
+            self._memory_context_loaded = True
+
+        # 2. 追加用户消息
+        self.context.add_user_message(user_input)
+
+        # 3. ReAct 循环
+        llm = _LLMClient(self.config.llm)
+        tool_schemas = self.tool_manager.get_tool_schemas()
+        has_tools = self.tool_manager.has_tools()
+        steps: List[AgentStep] = []
+
+        llm_callback_ctx = {"step_type": "think", "tool_name": None}
+
+        def _llm_callback(metrics: Dict[str, Any]):
+            if self.on_llm_call:
+                metrics.update({
+                    "session_id": self.context.session_id,
+                    "user_id": self.config.user_id,
+                    "step_type": llm_callback_ctx["step_type"],
+                    "tool_name": llm_callback_ctx["tool_name"],
+                })
+                self.on_llm_call(metrics)
+
+        while self.context.iteration < max_iter:
+            self.context.iteration += 1
+            messages = self.memory.trim_messages(self.context.messages)
+
+            # 预算检查：LLM 调用次数（在调用 LLM 之前检查，避免浪费额度）
+            budget = self.config.budget
+            if self._llm_invocations >= budget.max_llm_invocations:
+                err = f"已超过 LLM 调用预算（{budget.max_llm_invocations} 次）"
+                logger.warning(err)
+                yield {"type": "error", "content": err, "iteration": self.context.iteration,
+                       "truncated": True}
+                await self.memory.save_context(user_input, err, self.context.messages)
+                return
+
+            # 调用外部 LLM 预算回调（WorkflowEngine 注入）
+            if self.on_llm_invocation:
+                try:
+                    self.on_llm_invocation()
+                except Exception as e:
+                    err = f"LLM 调用超出工作流预算: {e}"
+                    logger.warning(err)
+                    yield {"type": "error", "content": err, "iteration": self.context.iteration,
+                           "truncated": True}
+                    return
+
+            # 调用 LLM
+            try:
+                response = await llm.chat(
+                    messages=messages,
+                    tools=tool_schemas if has_tools and self.context.iteration == 1 else
+                               (tool_schemas if has_tools else None),
+                    iteration=self.context.iteration,
+                    on_completion=_llm_callback,
+                )
+            except Exception as e:
+                err_str = str(e)
+                logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
+                if self.context.iteration < max_iter and self._is_retryable(err_str):
+                    yield {"type": "error", "content": f"LLM 调用失败（可重试）: {err_str}",
+                           "iteration": self.context.iteration}
+                    continue
+                yield {"type": "error", "content": f"LLM 调用失败: {err_str}",
+                       "iteration": self.context.iteration}
+                return
+
+            # 记录 LLM 调用次数（内部计数）
+            self._llm_invocations += 1
+
+            # 解析工具调用
+            tool_calls = self._extract_tool_calls(response)
+            content = self._extract_content(response)
+            reasoning = getattr(response, "reasoning_content", None) or (
+                response.get("reasoning_content") if isinstance(response, dict) else None
+            )
+
+            if not tool_calls:
+                # LLM 直接返回文本 → 结束
+                self.context.add_assistant_message(content)
+                final_text = content or "（模型未返回有效内容）"
+                yield {
+                    "type": "final",
+                    "content": final_text,
+                    "reasoning": reasoning,
+                    "iteration": self.context.iteration,
+                    "iterations_used": self.context.iteration,
+                    "tool_calls_made": self.context.tool_calls_made,
+                    "session_id": self.context.session_id,
+                }
+                await self.memory.save_context(user_input, final_text, self.context.messages)
+                return
+
+            # 有工具调用 → 先记录 assistant 消息
+            self.context.add_assistant_message(content or "", tool_calls, reasoning)
+
+            # yield think 事件
+            tc_names = [tc["function"]["name"] for tc in tool_calls]
+            tc_args_list = []
+            for tc in tool_calls:
+                try:
+                    tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
+                except (json.JSONDecodeError, TypeError):
+                    tc_args_list.append({})
+
+            yield {
+                "type": "think",
+                "content": content or f"调用工具: {', '.join(tc_names)}",
+                "reasoning": reasoning,
+                "tool_names": tc_names,
+                "iteration": self.context.iteration,
+            }
+
+            steps.append(AgentStep(
+                iteration=self.context.iteration,
+                type="think",
+                content=content or f"调用工具: {', '.join(tc_names)}",
+                reasoning=reasoning,
+                tool_name=tc_names[0] if len(tc_names) == 1 else None,
+                tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
+            ))
+
+            if self.execution_logger:
+                self.execution_logger.info(
+                    f"Agent 调用 {len(tool_calls)} 个工具",
+                    data={"tool_calls": tc_names, "iteration": self.context.iteration},
+                )
+
+            # 逐一执行工具
+            for tc in tool_calls:
+                tfn = tc.get("function", {})
+                tname = tfn.get("name", "unknown")
+                tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
+
+                try:
+                    targs = json.loads(tfn.get("arguments", "{}"))
+                except (json.JSONDecodeError, TypeError):
+                    targs = {}
+
+                # yield tool_call 事件
+                yield {
+                    "type": "tool_call",
+                    "name": tname,
+                    "input": targs,
+                    "iteration": self.context.iteration,
+                }
+
+                logger.info("Agent 执行工具 [%s]: %s", tname, targs)
+                result = await self.tool_manager.execute(tname, targs)
+
+                # yield tool_result 事件
+                yield {
+                    "type": "tool_result",
+                    "name": tname,
+                    "result": result[:500] + "..." if len(result) > 500 else result,
+                    "iteration": self.context.iteration,
+                }
+
+                steps.append(AgentStep(
+                    iteration=self.context.iteration,
+                    type="tool_result",
+                    content=f"工具 {tname} 返回结果",
+                    tool_name=tname,
+                    tool_input=targs,
+                    tool_result=result[:500] + "..." if len(result) > 500 else result,
+                ))
+
+                self.context.add_tool_result(tcid, tname, result)
+                self.context.tool_calls_made += 1
+
+                # 预算检查：工具调用次数
+                if self.context.tool_calls_made > budget.max_tool_calls:
+                    err = f"已超过工具调用预算（{budget.max_tool_calls} 次）"
+                    logger.warning(err)
+                    yield {"type": "error", "content": err, "iteration": self.context.iteration,
+                           "truncated": True}
+                    return
+
+                if self.on_tool_executed:
+                    try:
+                        await self.on_tool_executed(tname)
+                    except WorkflowExecutionError:
+                        raise
+                    except Exception:
+                        pass
+
+                if self.execution_logger:
+                    preview = result[:300] + "..." if len(result) > 300 else result
+                    self.execution_logger.info(
+                        f"工具 {tname} 执行完成",
+                        data={"tool_name": tname, "result_preview": preview},
+                    )
+
+        # 达到最大迭代次数
+        last_content = ""
+        for m in reversed(self.context.messages):
+            if m.get("role") == "assistant" and m.get("content"):
+                last_content = m["content"]
+                break
+
+        logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
+        await self.memory.save_context(user_input, last_content or "（已达最大迭代次数）", self.context.messages)
+        yield {
+            "type": "final",
+            "content": last_content or "已达最大迭代次数，但模型未返回最终回答。",
+            "iteration": self.context.iteration,
+            "iterations_used": self.context.iteration,
+            "tool_calls_made": self.context.tool_calls_made,
+            "truncated": True,
+            "session_id": self.context.session_id,
+        }
+
+    async def _inject_memory_context(self, query: str = "") -> None:
        """加载长期记忆并注入 system prompt。"""
-        mem_text = await self.memory.initialize()
+        mem_text = await self.memory.initialize(query=query)
        if mem_text:
            enriched = (
                self.config.system_prompt.rstrip("\n")