feat: Phase 4 - LLM/Agent fallback chain, cross-agent knowledge sharing, async agent execution

- 4.1 Fallback chain: LLM fallback_llm config in AgentLLMConfig, retry with alternate model on API failure; Agent fallback_agent in DAG nodes - 4.2 Knowledge sharing: GlobalKnowledge model with embedding-based semantic search, auto-extraction of tool names as tags after execution - 4.3 Async execution: execute_agent_task fully implemented with AgentRuntime, scheduler dual-path for workflow/non-workflow agents Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-05 00:27:54 +08:00
parent 7e00b027d4
commit 592bca4f39
7 changed files with 461 additions and 70 deletions
--- a/backend/app/agent_runtime/core.py
+++ b/backend/app/agent_runtime/core.py
@@ -268,6 +268,8 @@ class AgentRuntime:
                        iterations_used=self.context.iteration,
                        tool_calls_made=self.context.tool_calls_made,
                    )
+                # 提取知识到全局知识池（Agent 间知识共享）
+                await self._extract_global_knowledge(user_input, final_text, steps)
                return AgentResult(
                    success=True,
                    content=final_text,
@@ -774,6 +776,35 @@ class AgentRuntime:
            if db:
                db.close()

+    async def _extract_global_knowledge(
+        self, user_input: str, final_answer: str, steps: List[AgentStep],
+    ) -> None:
+        """从 Agent 执行结果中提取知识，写入全局知识池（Agent 间共享）。"""
+        # 提取工具调用名称作为 tags
+        tool_names = list(dict.fromkeys(
+            s.tool_name for s in (steps or [])
+            if s.tool_name and s.type == "tool_result"
+        ))
+        tags = tool_names[:5] if tool_names else ["对话"]
+
+        # 提取关键信息：用户问题摘要 + 回答要点（前 500 字）
+        content = (
+            f"问题: {user_input[:300]}\n"
+            f"回答要点: {final_answer[:500]}"
+        )
+        if tool_names:
+            content += f"\n使用工具: {', '.join(tool_names[:5])}"
+
+        source_agent_id = self.config.name if self.config.name != "default_agent" else ""
+        source_user_id = self.config.user_id or ""
+
+        await self.memory.save_global_knowledge(
+            content=content,
+            source_agent_id=source_agent_id,
+            source_user_id=source_user_id,
+            tags=tags,
+        )
+
    async def _self_review(self, content: str, task_context: str = "") -> dict:
        """输出质量自检：用轻量 LLM 评判输出，返回 {score, passed, issues, suggestions}。"""
        criteria = (
@@ -957,15 +988,7 @@ class _LLMClient:
        iteration: int = 1,
        on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
    ) -> Any:
-        """
-        调用 LLM。
-        优先使用 llm_service.call_openai_with_tools（支持 ReAct 的多次工具调用）。
-
-        但为避免外层 ReAct 与内部 ReAct 冲突：
-        - 第 1 轮：使用标准 chat（无内部 ReAct），由外层 AgentRuntime 控制循环
-        - 后续轮次：也使用标准 chat，仅追加工具结果
-        """
-        # 直接用 OpenAI/DeepSeek SDK 调用，由 AgentRuntime 控制循环
+        """调用 LLM，主模型失败时自动切换 fallback_llm 重试。"""
        from openai import AsyncOpenAI
        from app.core.config import settings

@@ -974,17 +997,36 @@ class _LLMClient:
        base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""

        if not api_key or api_key == "your-openai-api-key":
-            # 尝试 DeepSeek
            api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
            base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"

        if not api_key:
            raise ValueError("未配置 API Key")

+        return await self._do_chat(
+            api_key=api_key, base_url=base_url, model=self._config.model,
+            messages=messages, tools=tools, iteration=iteration,
+            on_completion=on_completion,
+        )
+
+    async def _do_chat(
+        self,
+        api_key: str,
+        base_url: str,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        iteration: int = 1,
+        on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
+        _is_fallback: bool = False,
+    ) -> Any:
+        from openai import AsyncOpenAI
+        from app.core.config import settings
+
        client = AsyncOpenAI(api_key=api_key, base_url=base_url)

        kwargs: Dict[str, Any] = {
-            "model": self._config.model,
+            "model": model,
            "messages": messages,
            "temperature": self._config.temperature,
            "timeout": self._config.request_timeout,
@@ -1015,60 +1057,77 @@ class _LLMClient:
            kwargs["tool_choice"] = "auto"

        # LLM 响应缓存（仅不用工具时缓存，避免复杂序列化）
-        if self._config.cache_enabled and not tools:
+        if self._config.cache_enabled and not tools and not _is_fallback:
            cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
            cached = await _llm_cache_get(cache_key)
            if cached is not None:
                logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
-                # 构造简易 message 对象（含 content 字段即可）
                class _CachedMsg:
                    content = cached
                    tool_calls = None
                return _CachedMsg()

        start_time = time.perf_counter()
+        last_error = None
        try:
            response = await client.chat.completions.create(**kwargs)
-            latency_ms = int((time.perf_counter() - start_time) * 1000)
-            message = response.choices[0].message
-
-            # 缓存写入（仅不用工具时）
-            if self._config.cache_enabled and not tools and message.content:
-                ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
-                await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
-
-            # 提取 token 用量
-            usage = getattr(response, "usage", None)
-            prompt_tokens = usage.prompt_tokens if usage else 0
-            completion_tokens = usage.completion_tokens if usage else 0
-            total_tokens = usage.total_tokens if usage else 0
-
-            # 调用完成回调
-            if on_completion:
-                on_completion({
-                    "model": self._config.model,
-                    "provider": self._config.provider,
-                    "prompt_tokens": prompt_tokens or 0,
-                    "completion_tokens": completion_tokens or 0,
-                    "total_tokens": total_tokens or 0,
-                    "latency_ms": latency_ms,
-                    "iteration_number": iteration,
-                    "status": "success",
-                })
-
-            return message
        except Exception as e:
-            latency_ms = int((time.perf_counter() - start_time) * 1000)
-            if on_completion:
-                on_completion({
-                    "model": self._config.model,
-                    "provider": self._config.provider,
-                    "prompt_tokens": 0,
-                    "completion_tokens": 0,
-                    "total_tokens": 0,
-                    "latency_ms": latency_ms,
-                    "iteration_number": iteration,
-                    "status": "error",
-                    "error_message": str(e),
-                })
+            last_error = e
+            # 降级回退：主模型失败时尝试 fallback_llm
+            fallback = self._config.fallback_llm
+            if fallback and isinstance(fallback, dict) and not _is_fallback:
+                fb_model = fallback.get("model")
+                fb_api_key = fallback.get("api_key")
+                fb_base_url = fallback.get("base_url")
+                if fb_model and (fb_api_key or fb_base_url):
+                    logger.warning(
+                        "主模型 %s 调用失败，降级到 %s: %s",
+                        model, fb_model, str(e)[:200],
+                    )
+                    # 先报告主模型失败
+                    latency_ms = int((time.perf_counter() - start_time) * 1000)
+                    if on_completion:
+                        on_completion({
+                            "model": model, "provider": self._config.provider,
+                            "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0,
+                            "latency_ms": latency_ms, "iteration_number": iteration,
+                            "status": "fallback", "error_message": str(e),
+                        })
+                    return await self._do_chat(
+                        api_key=fb_api_key or api_key,
+                        base_url=fb_base_url or base_url,
+                        model=fb_model,
+                        messages=messages, tools=tools,
+                        iteration=iteration, on_completion=on_completion,
+                        _is_fallback=True,
+                    )
            raise
+
+        latency_ms = int((time.perf_counter() - start_time) * 1000)
+        message = response.choices[0].message
+
+        # 缓存写入（仅不用工具时）
+        if self._config.cache_enabled and not tools and message.content:
+            ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
+            await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
+
+        # 提取 token 用量
+        usage = getattr(response, "usage", None)
+        prompt_tokens = usage.prompt_tokens if usage else 0
+        completion_tokens = usage.completion_tokens if usage else 0
+        total_tokens = usage.total_tokens if usage else 0
+
+        # 调用完成回调
+        if on_completion:
+            on_completion({
+                "model": model,
+                "provider": self._config.provider,
+                "prompt_tokens": prompt_tokens or 0,
+                "completion_tokens": completion_tokens or 0,
+                "total_tokens": total_tokens or 0,
+                "latency_ms": latency_ms,
+                "iteration_number": iteration,
+                "status": "success",
+            })
+
+        return message
--- a/backend/app/agent_runtime/memory.py
+++ b/backend/app/agent_runtime/memory.py
@@ -95,6 +95,11 @@ class AgentMemory:
            if vector_text:
                parts.append(vector_text)

+        # 3. 全局知识检索：从 GlobalKnowledge 表加载相关条目
+        global_text = await self._global_knowledge_search(query)
+        if global_text:
+            parts.append(global_text)
+
        return "\n\n".join(parts) if parts else ""

    async def _vector_search(self, query: str = "") -> str:
@@ -171,6 +176,119 @@ class AgentMemory:
            if db:
                db.close()

+    async def _global_knowledge_search(self, query: str = "") -> str:
+        """从 GlobalKnowledge 表检索相关的全局知识条目。"""
+        from app.models.agent import GlobalKnowledge
+
+        db: Optional[Session] = None
+        try:
+            db = SessionLocal()
+            rows = (
+                db.query(GlobalKnowledge)
+                .order_by(GlobalKnowledge.created_at.desc())
+                .limit(50)
+                .all()
+            )
+            if not rows:
+                return ""
+
+            # 如果有 query，用向量相似度筛选；否则返回最近的条目
+            if query and query.strip():
+                entries: List[VectorEntry] = []
+                for row in rows:
+                    if not row.embedding:
+                        continue
+                    try:
+                        emb = embedding_service.deserialize_embedding(row.embedding)
+                    except Exception:
+                        emb = []
+                    if emb:
+                        entries.append({
+                            "id": row.id,
+                            "scope_kind": "global",
+                            "scope_id": "global",
+                            "content_text": row.content,
+                            "embedding": emb,
+                            "metadata": {
+                                "source_agent_id": row.source_agent_id,
+                                "tags": row.tags or [],
+                            },
+                        })
+
+                if entries:
+                    query_emb = await embedding_service.generate_embedding(query)
+                    if query_emb:
+                        matched = await embedding_service.similarity_search(
+                            query_emb, entries, top_k=min(5, len(entries)),
+                        )
+                        if matched:
+                            lines = ["## 全局知识库"]
+                            for i, m in enumerate(matched, 1):
+                                tags = m.get("metadata", {}).get("tags", [])
+                                tag_str = f" [{', '.join(tags[:3])}]" if tags else ""
+                                lines.append(f"{i}.{tag_str} {m.get('content_text', '')[:500]}")
+                            return "\n".join(lines)
+            else:
+                # 无 query，返回最近 5 条全局知识
+                recent = rows[:5]
+                if recent:
+                    lines = ["## 全局知识库（最近）"]
+                    for i, row in enumerate(recent, 1):
+                        tag_str = f" [{(', '.join(row.tags[:3]))}]" if row.tags else ""
+                        lines.append(f"{i}.{tag_str} {row.content[:500]}")
+                    return "\n".join(lines)
+
+            return ""
+        except Exception as e:
+            logger.warning("全局知识检索失败: %s", e)
+            return ""
+        finally:
+            if db:
+                db.close()
+
+    async def save_global_knowledge(
+        self, content: str, source_agent_id: str = "",
+        source_user_id: str = "", tags: Optional[List[str]] = None,
+    ) -> None:
+        """将知识条目写入全局知识池。"""
+        from app.models.agent import GlobalKnowledge
+
+        if not content or len(content) < 20:
+            return
+
+        db: Optional[Session] = None
+        try:
+            db = SessionLocal()
+
+            # 生成 embedding
+            embedding_json = ""
+            try:
+                emb = await embedding_service.generate_embedding(content)
+                if emb:
+                    embedding_json = embedding_service.serialize_embedding(emb) or ""
+            except Exception:
+                pass
+
+            record = GlobalKnowledge(
+                content=content[:2000],
+                embedding=embedding_json or None,
+                source_agent_id=source_agent_id or "",
+                source_user_id=source_user_id or "",
+                tags=tags or [],
+                scope_kind=self.scope_kind,
+                scope_id=self.scope_id or "global",
+            )
+            db.add(record)
+            db.commit()
+            logger.info("已写入全局知识: agent=%s tags=%s", source_agent_id, tags)
+        except Exception as e:
+            logger.warning("保存全局知识失败: %s", e)
+            if db:
+                db.rollback()
+        finally:
+            if db:
+                db.close()
+
    async def save_context(
        self, user_message: str, assistant_reply: str,
        messages: Optional[List[Dict[str, Any]]] = None,
--- a/backend/app/agent_runtime/schemas.py
+++ b/backend/app/agent_runtime/schemas.py
@@ -46,6 +46,7 @@ class AgentLLMConfig(BaseModel):
    self_review_threshold: float = 0.6  # self-review 通过阈值（0-1）
    cache_enabled: bool = False          # LLM 响应缓存（默认关闭，语义缓存有风险）
    cache_ttl_ms: int = 300000           # LLM 缓存 TTL，默认 5 分钟
+    fallback_llm: Optional[Dict[str, Any]] = None  # 降级模型配置 {provider, model, api_key, base_url}


 class AgentBudgetConfig(BaseModel):