feat: Phase 4 - LLM/Agent fallback chain, cross-agent knowledge sharing, async agent execution
- 4.1 Fallback chain: LLM fallback_llm config in AgentLLMConfig, retry with alternate model on API failure; Agent fallback_agent in DAG nodes - 4.2 Knowledge sharing: GlobalKnowledge model with embedding-based semantic search, auto-extraction of tool names as tags after execution - 4.3 Async execution: execute_agent_task fully implemented with AgentRuntime, scheduler dual-path for workflow/non-workflow agents Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -268,6 +268,8 @@ class AgentRuntime:
|
||||
iterations_used=self.context.iteration,
|
||||
tool_calls_made=self.context.tool_calls_made,
|
||||
)
|
||||
# 提取知识到全局知识池(Agent 间知识共享)
|
||||
await self._extract_global_knowledge(user_input, final_text, steps)
|
||||
return AgentResult(
|
||||
success=True,
|
||||
content=final_text,
|
||||
@@ -774,6 +776,35 @@ class AgentRuntime:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
async def _extract_global_knowledge(
|
||||
self, user_input: str, final_answer: str, steps: List[AgentStep],
|
||||
) -> None:
|
||||
"""从 Agent 执行结果中提取知识,写入全局知识池(Agent 间共享)。"""
|
||||
# 提取工具调用名称作为 tags
|
||||
tool_names = list(dict.fromkeys(
|
||||
s.tool_name for s in (steps or [])
|
||||
if s.tool_name and s.type == "tool_result"
|
||||
))
|
||||
tags = tool_names[:5] if tool_names else ["对话"]
|
||||
|
||||
# 提取关键信息:用户问题摘要 + 回答要点(前 500 字)
|
||||
content = (
|
||||
f"问题: {user_input[:300]}\n"
|
||||
f"回答要点: {final_answer[:500]}"
|
||||
)
|
||||
if tool_names:
|
||||
content += f"\n使用工具: {', '.join(tool_names[:5])}"
|
||||
|
||||
source_agent_id = self.config.name if self.config.name != "default_agent" else ""
|
||||
source_user_id = self.config.user_id or ""
|
||||
|
||||
await self.memory.save_global_knowledge(
|
||||
content=content,
|
||||
source_agent_id=source_agent_id,
|
||||
source_user_id=source_user_id,
|
||||
tags=tags,
|
||||
)
|
||||
|
||||
async def _self_review(self, content: str, task_context: str = "") -> dict:
|
||||
"""输出质量自检:用轻量 LLM 评判输出,返回 {score, passed, issues, suggestions}。"""
|
||||
criteria = (
|
||||
@@ -957,15 +988,7 @@ class _LLMClient:
|
||||
iteration: int = 1,
|
||||
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
调用 LLM。
|
||||
优先使用 llm_service.call_openai_with_tools(支持 ReAct 的多次工具调用)。
|
||||
|
||||
但为避免外层 ReAct 与内部 ReAct 冲突:
|
||||
- 第 1 轮:使用标准 chat(无内部 ReAct),由外层 AgentRuntime 控制循环
|
||||
- 后续轮次:也使用标准 chat,仅追加工具结果
|
||||
"""
|
||||
# 直接用 OpenAI/DeepSeek SDK 调用,由 AgentRuntime 控制循环
|
||||
"""调用 LLM,主模型失败时自动切换 fallback_llm 重试。"""
|
||||
from openai import AsyncOpenAI
|
||||
from app.core.config import settings
|
||||
|
||||
@@ -974,17 +997,36 @@ class _LLMClient:
|
||||
base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""
|
||||
|
||||
if not api_key or api_key == "your-openai-api-key":
|
||||
# 尝试 DeepSeek
|
||||
api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
|
||||
base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"
|
||||
|
||||
if not api_key:
|
||||
raise ValueError("未配置 API Key")
|
||||
|
||||
return await self._do_chat(
|
||||
api_key=api_key, base_url=base_url, model=self._config.model,
|
||||
messages=messages, tools=tools, iteration=iteration,
|
||||
on_completion=on_completion,
|
||||
)
|
||||
|
||||
async def _do_chat(
|
||||
self,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
iteration: int = 1,
|
||||
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
||||
_is_fallback: bool = False,
|
||||
) -> Any:
|
||||
from openai import AsyncOpenAI
|
||||
from app.core.config import settings
|
||||
|
||||
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
"model": self._config.model,
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": self._config.temperature,
|
||||
"timeout": self._config.request_timeout,
|
||||
@@ -1015,60 +1057,77 @@ class _LLMClient:
|
||||
kwargs["tool_choice"] = "auto"
|
||||
|
||||
# LLM 响应缓存(仅不用工具时缓存,避免复杂序列化)
|
||||
if self._config.cache_enabled and not tools:
|
||||
if self._config.cache_enabled and not tools and not _is_fallback:
|
||||
cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
|
||||
cached = await _llm_cache_get(cache_key)
|
||||
if cached is not None:
|
||||
logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
|
||||
# 构造简易 message 对象(含 content 字段即可)
|
||||
class _CachedMsg:
|
||||
content = cached
|
||||
tool_calls = None
|
||||
return _CachedMsg()
|
||||
|
||||
start_time = time.perf_counter()
|
||||
last_error = None
|
||||
try:
|
||||
response = await client.chat.completions.create(**kwargs)
|
||||
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
||||
message = response.choices[0].message
|
||||
|
||||
# 缓存写入(仅不用工具时)
|
||||
if self._config.cache_enabled and not tools and message.content:
|
||||
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
|
||||
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
|
||||
|
||||
# 提取 token 用量
|
||||
usage = getattr(response, "usage", None)
|
||||
prompt_tokens = usage.prompt_tokens if usage else 0
|
||||
completion_tokens = usage.completion_tokens if usage else 0
|
||||
total_tokens = usage.total_tokens if usage else 0
|
||||
|
||||
# 调用完成回调
|
||||
if on_completion:
|
||||
on_completion({
|
||||
"model": self._config.model,
|
||||
"provider": self._config.provider,
|
||||
"prompt_tokens": prompt_tokens or 0,
|
||||
"completion_tokens": completion_tokens or 0,
|
||||
"total_tokens": total_tokens or 0,
|
||||
"latency_ms": latency_ms,
|
||||
"iteration_number": iteration,
|
||||
"status": "success",
|
||||
})
|
||||
|
||||
return message
|
||||
except Exception as e:
|
||||
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
||||
if on_completion:
|
||||
on_completion({
|
||||
"model": self._config.model,
|
||||
"provider": self._config.provider,
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"total_tokens": 0,
|
||||
"latency_ms": latency_ms,
|
||||
"iteration_number": iteration,
|
||||
"status": "error",
|
||||
"error_message": str(e),
|
||||
})
|
||||
last_error = e
|
||||
# 降级回退:主模型失败时尝试 fallback_llm
|
||||
fallback = self._config.fallback_llm
|
||||
if fallback and isinstance(fallback, dict) and not _is_fallback:
|
||||
fb_model = fallback.get("model")
|
||||
fb_api_key = fallback.get("api_key")
|
||||
fb_base_url = fallback.get("base_url")
|
||||
if fb_model and (fb_api_key or fb_base_url):
|
||||
logger.warning(
|
||||
"主模型 %s 调用失败,降级到 %s: %s",
|
||||
model, fb_model, str(e)[:200],
|
||||
)
|
||||
# 先报告主模型失败
|
||||
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
||||
if on_completion:
|
||||
on_completion({
|
||||
"model": model, "provider": self._config.provider,
|
||||
"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0,
|
||||
"latency_ms": latency_ms, "iteration_number": iteration,
|
||||
"status": "fallback", "error_message": str(e),
|
||||
})
|
||||
return await self._do_chat(
|
||||
api_key=fb_api_key or api_key,
|
||||
base_url=fb_base_url or base_url,
|
||||
model=fb_model,
|
||||
messages=messages, tools=tools,
|
||||
iteration=iteration, on_completion=on_completion,
|
||||
_is_fallback=True,
|
||||
)
|
||||
raise
|
||||
|
||||
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
||||
message = response.choices[0].message
|
||||
|
||||
# 缓存写入(仅不用工具时)
|
||||
if self._config.cache_enabled and not tools and message.content:
|
||||
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
|
||||
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
|
||||
|
||||
# 提取 token 用量
|
||||
usage = getattr(response, "usage", None)
|
||||
prompt_tokens = usage.prompt_tokens if usage else 0
|
||||
completion_tokens = usage.completion_tokens if usage else 0
|
||||
total_tokens = usage.total_tokens if usage else 0
|
||||
|
||||
# 调用完成回调
|
||||
if on_completion:
|
||||
on_completion({
|
||||
"model": model,
|
||||
"provider": self._config.provider,
|
||||
"prompt_tokens": prompt_tokens or 0,
|
||||
"completion_tokens": completion_tokens or 0,
|
||||
"total_tokens": total_tokens or 0,
|
||||
"latency_ms": latency_ms,
|
||||
"iteration_number": iteration,
|
||||
"status": "success",
|
||||
})
|
||||
|
||||
return message
|
||||
|
||||
Reference in New Issue
Block a user