feat: Phase 4 - LLM/Agent fallback chain, cross-agent knowledge sharing, async agent execution

- 4.1 Fallback chain: LLM fallback_llm config in AgentLLMConfig, retry with alternate model on API failure; Agent fallback_agent in DAG nodes
- 4.2 Knowledge sharing: GlobalKnowledge model with embedding-based semantic search, auto-extraction of tool names as tags after execution
- 4.3 Async execution: execute_agent_task fully implemented with AgentRuntime, scheduler dual-path for workflow/non-workflow agents

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-05-05 00:27:54 +08:00
parent 7e00b027d4
commit 592bca4f39
7 changed files with 461 additions and 70 deletions

View File

@@ -268,6 +268,8 @@ class AgentRuntime:
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
)
# 提取知识到全局知识池Agent 间知识共享)
await self._extract_global_knowledge(user_input, final_text, steps)
return AgentResult(
success=True,
content=final_text,
@@ -774,6 +776,35 @@ class AgentRuntime:
if db:
db.close()
async def _extract_global_knowledge(
self, user_input: str, final_answer: str, steps: List[AgentStep],
) -> None:
"""从 Agent 执行结果中提取知识写入全局知识池Agent 间共享)。"""
# 提取工具调用名称作为 tags
tool_names = list(dict.fromkeys(
s.tool_name for s in (steps or [])
if s.tool_name and s.type == "tool_result"
))
tags = tool_names[:5] if tool_names else ["对话"]
# 提取关键信息:用户问题摘要 + 回答要点(前 500 字)
content = (
f"问题: {user_input[:300]}\n"
f"回答要点: {final_answer[:500]}"
)
if tool_names:
content += f"\n使用工具: {', '.join(tool_names[:5])}"
source_agent_id = self.config.name if self.config.name != "default_agent" else ""
source_user_id = self.config.user_id or ""
await self.memory.save_global_knowledge(
content=content,
source_agent_id=source_agent_id,
source_user_id=source_user_id,
tags=tags,
)
async def _self_review(self, content: str, task_context: str = "") -> dict:
"""输出质量自检:用轻量 LLM 评判输出,返回 {score, passed, issues, suggestions}。"""
criteria = (
@@ -957,15 +988,7 @@ class _LLMClient:
iteration: int = 1,
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
) -> Any:
"""
调用 LLM。
优先使用 llm_service.call_openai_with_tools支持 ReAct 的多次工具调用)。
但为避免外层 ReAct 与内部 ReAct 冲突:
- 第 1 轮:使用标准 chat无内部 ReAct由外层 AgentRuntime 控制循环
- 后续轮次:也使用标准 chat仅追加工具结果
"""
# 直接用 OpenAI/DeepSeek SDK 调用,由 AgentRuntime 控制循环
"""调用 LLM主模型失败时自动切换 fallback_llm 重试。"""
from openai import AsyncOpenAI
from app.core.config import settings
@@ -974,17 +997,36 @@ class _LLMClient:
base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""
if not api_key or api_key == "your-openai-api-key":
# 尝试 DeepSeek
api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"
if not api_key:
raise ValueError("未配置 API Key")
return await self._do_chat(
api_key=api_key, base_url=base_url, model=self._config.model,
messages=messages, tools=tools, iteration=iteration,
on_completion=on_completion,
)
async def _do_chat(
self,
api_key: str,
base_url: str,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
iteration: int = 1,
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
_is_fallback: bool = False,
) -> Any:
from openai import AsyncOpenAI
from app.core.config import settings
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
kwargs: Dict[str, Any] = {
"model": self._config.model,
"model": model,
"messages": messages,
"temperature": self._config.temperature,
"timeout": self._config.request_timeout,
@@ -1015,60 +1057,77 @@ class _LLMClient:
kwargs["tool_choice"] = "auto"
# LLM 响应缓存(仅不用工具时缓存,避免复杂序列化)
if self._config.cache_enabled and not tools:
if self._config.cache_enabled and not tools and not _is_fallback:
cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
cached = await _llm_cache_get(cache_key)
if cached is not None:
logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
# 构造简易 message 对象(含 content 字段即可)
class _CachedMsg:
content = cached
tool_calls = None
return _CachedMsg()
start_time = time.perf_counter()
last_error = None
try:
response = await client.chat.completions.create(**kwargs)
latency_ms = int((time.perf_counter() - start_time) * 1000)
message = response.choices[0].message
# 缓存写入(仅不用工具时)
if self._config.cache_enabled and not tools and message.content:
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
# 提取 token 用量
usage = getattr(response, "usage", None)
prompt_tokens = usage.prompt_tokens if usage else 0
completion_tokens = usage.completion_tokens if usage else 0
total_tokens = usage.total_tokens if usage else 0
# 调用完成回调
if on_completion:
on_completion({
"model": self._config.model,
"provider": self._config.provider,
"prompt_tokens": prompt_tokens or 0,
"completion_tokens": completion_tokens or 0,
"total_tokens": total_tokens or 0,
"latency_ms": latency_ms,
"iteration_number": iteration,
"status": "success",
})
return message
except Exception as e:
latency_ms = int((time.perf_counter() - start_time) * 1000)
if on_completion:
on_completion({
"model": self._config.model,
"provider": self._config.provider,
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
"latency_ms": latency_ms,
"iteration_number": iteration,
"status": "error",
"error_message": str(e),
})
last_error = e
# 降级回退:主模型失败时尝试 fallback_llm
fallback = self._config.fallback_llm
if fallback and isinstance(fallback, dict) and not _is_fallback:
fb_model = fallback.get("model")
fb_api_key = fallback.get("api_key")
fb_base_url = fallback.get("base_url")
if fb_model and (fb_api_key or fb_base_url):
logger.warning(
"主模型 %s 调用失败,降级到 %s: %s",
model, fb_model, str(e)[:200],
)
# 先报告主模型失败
latency_ms = int((time.perf_counter() - start_time) * 1000)
if on_completion:
on_completion({
"model": model, "provider": self._config.provider,
"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0,
"latency_ms": latency_ms, "iteration_number": iteration,
"status": "fallback", "error_message": str(e),
})
return await self._do_chat(
api_key=fb_api_key or api_key,
base_url=fb_base_url or base_url,
model=fb_model,
messages=messages, tools=tools,
iteration=iteration, on_completion=on_completion,
_is_fallback=True,
)
raise
latency_ms = int((time.perf_counter() - start_time) * 1000)
message = response.choices[0].message
# 缓存写入(仅不用工具时)
if self._config.cache_enabled and not tools and message.content:
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
# 提取 token 用量
usage = getattr(response, "usage", None)
prompt_tokens = usage.prompt_tokens if usage else 0
completion_tokens = usage.completion_tokens if usage else 0
total_tokens = usage.total_tokens if usage else 0
# 调用完成回调
if on_completion:
on_completion({
"model": model,
"provider": self._config.provider,
"prompt_tokens": prompt_tokens or 0,
"completion_tokens": completion_tokens or 0,
"total_tokens": total_tokens or 0,
"latency_ms": latency_ms,
"iteration_number": iteration,
"status": "success",
})
return message

View File

@@ -95,6 +95,11 @@ class AgentMemory:
if vector_text:
parts.append(vector_text)
# 3. 全局知识检索:从 GlobalKnowledge 表加载相关条目
global_text = await self._global_knowledge_search(query)
if global_text:
parts.append(global_text)
return "\n\n".join(parts) if parts else ""
async def _vector_search(self, query: str = "") -> str:
@@ -171,6 +176,119 @@ class AgentMemory:
if db:
db.close()
async def _global_knowledge_search(self, query: str = "") -> str:
"""从 GlobalKnowledge 表检索相关的全局知识条目。"""
from app.models.agent import GlobalKnowledge
db: Optional[Session] = None
try:
db = SessionLocal()
rows = (
db.query(GlobalKnowledge)
.order_by(GlobalKnowledge.created_at.desc())
.limit(50)
.all()
)
if not rows:
return ""
# 如果有 query用向量相似度筛选否则返回最近的条目
if query and query.strip():
entries: List[VectorEntry] = []
for row in rows:
if not row.embedding:
continue
try:
emb = embedding_service.deserialize_embedding(row.embedding)
except Exception:
emb = []
if emb:
entries.append({
"id": row.id,
"scope_kind": "global",
"scope_id": "global",
"content_text": row.content,
"embedding": emb,
"metadata": {
"source_agent_id": row.source_agent_id,
"tags": row.tags or [],
},
})
if entries:
query_emb = await embedding_service.generate_embedding(query)
if query_emb:
matched = await embedding_service.similarity_search(
query_emb, entries, top_k=min(5, len(entries)),
)
if matched:
lines = ["## 全局知识库"]
for i, m in enumerate(matched, 1):
tags = m.get("metadata", {}).get("tags", [])
tag_str = f" [{', '.join(tags[:3])}]" if tags else ""
lines.append(f"{i}.{tag_str} {m.get('content_text', '')[:500]}")
return "\n".join(lines)
else:
# 无 query返回最近 5 条全局知识
recent = rows[:5]
if recent:
lines = ["## 全局知识库(最近)"]
for i, row in enumerate(recent, 1):
tag_str = f" [{(', '.join(row.tags[:3]))}]" if row.tags else ""
lines.append(f"{i}.{tag_str} {row.content[:500]}")
return "\n".join(lines)
return ""
except Exception as e:
logger.warning("全局知识检索失败: %s", e)
return ""
finally:
if db:
db.close()
async def save_global_knowledge(
self, content: str, source_agent_id: str = "",
source_user_id: str = "", tags: Optional[List[str]] = None,
) -> None:
"""将知识条目写入全局知识池。"""
from app.models.agent import GlobalKnowledge
if not content or len(content) < 20:
return
db: Optional[Session] = None
try:
db = SessionLocal()
# 生成 embedding
embedding_json = ""
try:
emb = await embedding_service.generate_embedding(content)
if emb:
embedding_json = embedding_service.serialize_embedding(emb) or ""
except Exception:
pass
record = GlobalKnowledge(
content=content[:2000],
embedding=embedding_json or None,
source_agent_id=source_agent_id or "",
source_user_id=source_user_id or "",
tags=tags or [],
scope_kind=self.scope_kind,
scope_id=self.scope_id or "global",
)
db.add(record)
db.commit()
logger.info("已写入全局知识: agent=%s tags=%s", source_agent_id, tags)
except Exception as e:
logger.warning("保存全局知识失败: %s", e)
if db:
db.rollback()
finally:
if db:
db.close()
async def save_context(
self, user_message: str, assistant_reply: str,
messages: Optional[List[Dict[str, Any]]] = None,

View File

@@ -46,6 +46,7 @@ class AgentLLMConfig(BaseModel):
self_review_threshold: float = 0.6 # self-review 通过阈值0-1
cache_enabled: bool = False # LLM 响应缓存(默认关闭,语义缓存有风险)
cache_ttl_ms: int = 300000 # LLM 缓存 TTL默认 5 分钟
fallback_llm: Optional[Dict[str, Any]] = None # 降级模型配置 {provider, model, api_key, base_url}
class AgentBudgetConfig(BaseModel):