feat: Phase 4 - LLM/Agent fallback chain, cross-agent knowledge sharing, async agent execution

- 4.1 Fallback chain: LLM fallback_llm config in AgentLLMConfig, retry with alternate model on API failure; Agent fallback_agent in DAG nodes
- 4.2 Knowledge sharing: GlobalKnowledge model with embedding-based semantic search, auto-extraction of tool names as tags after execution
- 4.3 Async execution: execute_agent_task fully implemented with AgentRuntime, scheduler dual-path for workflow/non-workflow agents

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-05-05 00:27:54 +08:00
parent 7e00b027d4
commit 592bca4f39
7 changed files with 461 additions and 70 deletions

View File

@@ -268,6 +268,8 @@ class AgentRuntime:
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
)
# 提取知识到全局知识池Agent 间知识共享)
await self._extract_global_knowledge(user_input, final_text, steps)
return AgentResult(
success=True,
content=final_text,
@@ -774,6 +776,35 @@ class AgentRuntime:
if db:
db.close()
async def _extract_global_knowledge(
self, user_input: str, final_answer: str, steps: List[AgentStep],
) -> None:
"""从 Agent 执行结果中提取知识写入全局知识池Agent 间共享)。"""
# 提取工具调用名称作为 tags
tool_names = list(dict.fromkeys(
s.tool_name for s in (steps or [])
if s.tool_name and s.type == "tool_result"
))
tags = tool_names[:5] if tool_names else ["对话"]
# 提取关键信息:用户问题摘要 + 回答要点(前 500 字)
content = (
f"问题: {user_input[:300]}\n"
f"回答要点: {final_answer[:500]}"
)
if tool_names:
content += f"\n使用工具: {', '.join(tool_names[:5])}"
source_agent_id = self.config.name if self.config.name != "default_agent" else ""
source_user_id = self.config.user_id or ""
await self.memory.save_global_knowledge(
content=content,
source_agent_id=source_agent_id,
source_user_id=source_user_id,
tags=tags,
)
async def _self_review(self, content: str, task_context: str = "") -> dict:
"""输出质量自检:用轻量 LLM 评判输出,返回 {score, passed, issues, suggestions}。"""
criteria = (
@@ -957,15 +988,7 @@ class _LLMClient:
iteration: int = 1,
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
) -> Any:
"""
调用 LLM。
优先使用 llm_service.call_openai_with_tools支持 ReAct 的多次工具调用)。
但为避免外层 ReAct 与内部 ReAct 冲突:
- 第 1 轮:使用标准 chat无内部 ReAct由外层 AgentRuntime 控制循环
- 后续轮次:也使用标准 chat仅追加工具结果
"""
# 直接用 OpenAI/DeepSeek SDK 调用,由 AgentRuntime 控制循环
"""调用 LLM主模型失败时自动切换 fallback_llm 重试。"""
from openai import AsyncOpenAI
from app.core.config import settings
@@ -974,17 +997,36 @@ class _LLMClient:
base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""
if not api_key or api_key == "your-openai-api-key":
# 尝试 DeepSeek
api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"
if not api_key:
raise ValueError("未配置 API Key")
return await self._do_chat(
api_key=api_key, base_url=base_url, model=self._config.model,
messages=messages, tools=tools, iteration=iteration,
on_completion=on_completion,
)
async def _do_chat(
self,
api_key: str,
base_url: str,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
iteration: int = 1,
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
_is_fallback: bool = False,
) -> Any:
from openai import AsyncOpenAI
from app.core.config import settings
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
kwargs: Dict[str, Any] = {
"model": self._config.model,
"model": model,
"messages": messages,
"temperature": self._config.temperature,
"timeout": self._config.request_timeout,
@@ -1015,60 +1057,77 @@ class _LLMClient:
kwargs["tool_choice"] = "auto"
# LLM 响应缓存(仅不用工具时缓存,避免复杂序列化)
if self._config.cache_enabled and not tools:
if self._config.cache_enabled and not tools and not _is_fallback:
cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
cached = await _llm_cache_get(cache_key)
if cached is not None:
logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
# 构造简易 message 对象(含 content 字段即可)
class _CachedMsg:
content = cached
tool_calls = None
return _CachedMsg()
start_time = time.perf_counter()
last_error = None
try:
response = await client.chat.completions.create(**kwargs)
latency_ms = int((time.perf_counter() - start_time) * 1000)
message = response.choices[0].message
# 缓存写入(仅不用工具时)
if self._config.cache_enabled and not tools and message.content:
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
# 提取 token 用量
usage = getattr(response, "usage", None)
prompt_tokens = usage.prompt_tokens if usage else 0
completion_tokens = usage.completion_tokens if usage else 0
total_tokens = usage.total_tokens if usage else 0
# 调用完成回调
if on_completion:
on_completion({
"model": self._config.model,
"provider": self._config.provider,
"prompt_tokens": prompt_tokens or 0,
"completion_tokens": completion_tokens or 0,
"total_tokens": total_tokens or 0,
"latency_ms": latency_ms,
"iteration_number": iteration,
"status": "success",
})
return message
except Exception as e:
latency_ms = int((time.perf_counter() - start_time) * 1000)
if on_completion:
on_completion({
"model": self._config.model,
"provider": self._config.provider,
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
"latency_ms": latency_ms,
"iteration_number": iteration,
"status": "error",
"error_message": str(e),
})
last_error = e
# 降级回退:主模型失败时尝试 fallback_llm
fallback = self._config.fallback_llm
if fallback and isinstance(fallback, dict) and not _is_fallback:
fb_model = fallback.get("model")
fb_api_key = fallback.get("api_key")
fb_base_url = fallback.get("base_url")
if fb_model and (fb_api_key or fb_base_url):
logger.warning(
"主模型 %s 调用失败,降级到 %s: %s",
model, fb_model, str(e)[:200],
)
# 先报告主模型失败
latency_ms = int((time.perf_counter() - start_time) * 1000)
if on_completion:
on_completion({
"model": model, "provider": self._config.provider,
"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0,
"latency_ms": latency_ms, "iteration_number": iteration,
"status": "fallback", "error_message": str(e),
})
return await self._do_chat(
api_key=fb_api_key or api_key,
base_url=fb_base_url or base_url,
model=fb_model,
messages=messages, tools=tools,
iteration=iteration, on_completion=on_completion,
_is_fallback=True,
)
raise
latency_ms = int((time.perf_counter() - start_time) * 1000)
message = response.choices[0].message
# 缓存写入(仅不用工具时)
if self._config.cache_enabled and not tools and message.content:
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
# 提取 token 用量
usage = getattr(response, "usage", None)
prompt_tokens = usage.prompt_tokens if usage else 0
completion_tokens = usage.completion_tokens if usage else 0
total_tokens = usage.total_tokens if usage else 0
# 调用完成回调
if on_completion:
on_completion({
"model": model,
"provider": self._config.provider,
"prompt_tokens": prompt_tokens or 0,
"completion_tokens": completion_tokens or 0,
"total_tokens": total_tokens or 0,
"latency_ms": latency_ms,
"iteration_number": iteration,
"status": "success",
})
return message