feat: Phase 4 - LLM/Agent fallback chain, cross-agent knowledge sharing, async agent execution

- 4.1 Fallback chain: LLM fallback_llm config in AgentLLMConfig, retry with alternate model on API failure; Agent fallback_agent in DAG nodes - 4.2 Knowledge sharing: GlobalKnowledge model with embedding-based semantic search, auto-extraction of tool names as tags after execution - 4.3 Async execution: execute_agent_task fully implemented with AgentRuntime, scheduler dual-path for workflow/non-workflow agents Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-05 00:27:54 +08:00
parent 7e00b027d4
commit 592bca4f39
7 changed files with 461 additions and 70 deletions
--- a/backend/app/agent_runtime/core.py
+++ b/backend/app/agent_runtime/core.py
@@ -268,6 +268,8 @@ class AgentRuntime:
                        iterations_used=self.context.iteration,
                        tool_calls_made=self.context.tool_calls_made,
                    )
+                # 提取知识到全局知识池（Agent 间知识共享）
+                await self._extract_global_knowledge(user_input, final_text, steps)
                return AgentResult(
                    success=True,
                    content=final_text,
@@ -774,6 +776,35 @@ class AgentRuntime:
            if db:
                db.close()

+    async def _extract_global_knowledge(
+        self, user_input: str, final_answer: str, steps: List[AgentStep],
+    ) -> None:
+        """从 Agent 执行结果中提取知识，写入全局知识池（Agent 间共享）。"""
+        # 提取工具调用名称作为 tags
+        tool_names = list(dict.fromkeys(
+            s.tool_name for s in (steps or [])
+            if s.tool_name and s.type == "tool_result"
+        ))
+        tags = tool_names[:5] if tool_names else ["对话"]
+
+        # 提取关键信息：用户问题摘要 + 回答要点（前 500 字）
+        content = (
+            f"问题: {user_input[:300]}\n"
+            f"回答要点: {final_answer[:500]}"
+        )
+        if tool_names:
+            content += f"\n使用工具: {', '.join(tool_names[:5])}"
+
+        source_agent_id = self.config.name if self.config.name != "default_agent" else ""
+        source_user_id = self.config.user_id or ""
+
+        await self.memory.save_global_knowledge(
+            content=content,
+            source_agent_id=source_agent_id,
+            source_user_id=source_user_id,
+            tags=tags,
+        )
+
    async def _self_review(self, content: str, task_context: str = "") -> dict:
        """输出质量自检：用轻量 LLM 评判输出，返回 {score, passed, issues, suggestions}。"""
        criteria = (
@@ -957,15 +988,7 @@ class _LLMClient:
        iteration: int = 1,
        on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
    ) -> Any:
-        """
-        调用 LLM。
-        优先使用 llm_service.call_openai_with_tools（支持 ReAct 的多次工具调用）。
-
-        但为避免外层 ReAct 与内部 ReAct 冲突：
-        - 第 1 轮：使用标准 chat（无内部 ReAct），由外层 AgentRuntime 控制循环
-        - 后续轮次：也使用标准 chat，仅追加工具结果
-        """
-        # 直接用 OpenAI/DeepSeek SDK 调用，由 AgentRuntime 控制循环
+        """调用 LLM，主模型失败时自动切换 fallback_llm 重试。"""
        from openai import AsyncOpenAI
        from app.core.config import settings

@@ -974,17 +997,36 @@ class _LLMClient:
        base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""

        if not api_key or api_key == "your-openai-api-key":
-            # 尝试 DeepSeek
            api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
            base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"

        if not api_key:
            raise ValueError("未配置 API Key")

+        return await self._do_chat(
+            api_key=api_key, base_url=base_url, model=self._config.model,
+            messages=messages, tools=tools, iteration=iteration,
+            on_completion=on_completion,
+        )
+
+    async def _do_chat(
+        self,
+        api_key: str,
+        base_url: str,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        iteration: int = 1,
+        on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
+        _is_fallback: bool = False,
+    ) -> Any:
+        from openai import AsyncOpenAI
+        from app.core.config import settings
+
        client = AsyncOpenAI(api_key=api_key, base_url=base_url)

        kwargs: Dict[str, Any] = {
-            "model": self._config.model,
+            "model": model,
            "messages": messages,
            "temperature": self._config.temperature,
            "timeout": self._config.request_timeout,
@@ -1015,60 +1057,77 @@ class _LLMClient:
            kwargs["tool_choice"] = "auto"

        # LLM 响应缓存（仅不用工具时缓存，避免复杂序列化）
-        if self._config.cache_enabled and not tools:
+        if self._config.cache_enabled and not tools and not _is_fallback:
            cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
            cached = await _llm_cache_get(cache_key)
            if cached is not None:
                logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
-                # 构造简易 message 对象（含 content 字段即可）
                class _CachedMsg:
                    content = cached
                    tool_calls = None
                return _CachedMsg()

        start_time = time.perf_counter()
+        last_error = None
        try:
            response = await client.chat.completions.create(**kwargs)
-            latency_ms = int((time.perf_counter() - start_time) * 1000)
-            message = response.choices[0].message
-
-            # 缓存写入（仅不用工具时）
-            if self._config.cache_enabled and not tools and message.content:
-                ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
-                await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
-
-            # 提取 token 用量
-            usage = getattr(response, "usage", None)
-            prompt_tokens = usage.prompt_tokens if usage else 0
-            completion_tokens = usage.completion_tokens if usage else 0
-            total_tokens = usage.total_tokens if usage else 0
-
-            # 调用完成回调
-            if on_completion:
-                on_completion({
-                    "model": self._config.model,
-                    "provider": self._config.provider,
-                    "prompt_tokens": prompt_tokens or 0,
-                    "completion_tokens": completion_tokens or 0,
-                    "total_tokens": total_tokens or 0,
-                    "latency_ms": latency_ms,
-                    "iteration_number": iteration,
-                    "status": "success",
-                })
-
-            return message
        except Exception as e:
-            latency_ms = int((time.perf_counter() - start_time) * 1000)
-            if on_completion:
-                on_completion({
-                    "model": self._config.model,
-                    "provider": self._config.provider,
-                    "prompt_tokens": 0,
-                    "completion_tokens": 0,
-                    "total_tokens": 0,
-                    "latency_ms": latency_ms,
-                    "iteration_number": iteration,
-                    "status": "error",
-                    "error_message": str(e),
-                })
+            last_error = e
+            # 降级回退：主模型失败时尝试 fallback_llm
+            fallback = self._config.fallback_llm
+            if fallback and isinstance(fallback, dict) and not _is_fallback:
+                fb_model = fallback.get("model")
+                fb_api_key = fallback.get("api_key")
+                fb_base_url = fallback.get("base_url")
+                if fb_model and (fb_api_key or fb_base_url):
+                    logger.warning(
+                        "主模型 %s 调用失败，降级到 %s: %s",
+                        model, fb_model, str(e)[:200],
+                    )
+                    # 先报告主模型失败
+                    latency_ms = int((time.perf_counter() - start_time) * 1000)
+                    if on_completion:
+                        on_completion({
+                            "model": model, "provider": self._config.provider,
+                            "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0,
+                            "latency_ms": latency_ms, "iteration_number": iteration,
+                            "status": "fallback", "error_message": str(e),
+                        })
+                    return await self._do_chat(
+                        api_key=fb_api_key or api_key,
+                        base_url=fb_base_url or base_url,
+                        model=fb_model,
+                        messages=messages, tools=tools,
+                        iteration=iteration, on_completion=on_completion,
+                        _is_fallback=True,
+                    )
            raise
+
+        latency_ms = int((time.perf_counter() - start_time) * 1000)
+        message = response.choices[0].message
+
+        # 缓存写入（仅不用工具时）
+        if self._config.cache_enabled and not tools and message.content:
+            ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
+            await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
+
+        # 提取 token 用量
+        usage = getattr(response, "usage", None)
+        prompt_tokens = usage.prompt_tokens if usage else 0
+        completion_tokens = usage.completion_tokens if usage else 0
+        total_tokens = usage.total_tokens if usage else 0
+
+        # 调用完成回调
+        if on_completion:
+            on_completion({
+                "model": model,
+                "provider": self._config.provider,
+                "prompt_tokens": prompt_tokens or 0,
+                "completion_tokens": completion_tokens or 0,
+                "total_tokens": total_tokens or 0,
+                "latency_ms": latency_ms,
+                "iteration_number": iteration,
+                "status": "success",
+            })
+
+        return message