""" Agent 蜂群 (Swarm) — Leader/Teammate 并行协作引擎。 参考 Claude Code: - src/tools/AgentTool/ — 子 Agent 生成与执行 - src/tools/AgentTool/forkSubagent.ts — Fork 模式(后台并行) - buildInAgents.ts — 内置 Agent 类型(explore/plan/verify) 核心概念: - SwarmLeader: 接收用户输入 → 分解为子任务 → 分配给 Teammates → 汇总结果 - SwarmTeammate: 独立执行单个子任务的 Agent,可通过 Mailbox 与其他 Agent 通信 - SwarmMailbox: Agent 间消息传递(发布/订阅模式) - 并行执行: 无依赖的任务通过 asyncio.gather 并发运行 与现有 Orchestrator 的区别: - Orchestrator: 预定义编排模式 (route/sequential/debate/pipeline/graph) - Swarm: Leader 自主决策,动态分解任务,Agent 间可通信 """ from __future__ import annotations import asyncio import json import logging import time import uuid from dataclasses import dataclass, field from enum import Enum from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Set from pydantic import BaseModel, Field from app.agent_runtime.schemas import ( AgentConfig, AgentResult, AgentStep, AgentLLMConfig, AgentToolConfig, AgentMemoryConfig, ) logger = logging.getLogger(__name__) # ──────────────────────────── 枚举 / 配置 ──────────────────────────── class SwarmMode(str, Enum): PARALLEL = "parallel" # 所有 Teammate 并发执行 PIPELINE = "pipeline" # 流水线:A→B→C DEBATE = "debate" # 多方辩论后汇总 LEADER_ONLY = "leader_only" # Leader 自行处理(不生成 Teammate) class TaskStatus(str, Enum): PENDING = "pending" RUNNING = "running" DONE = "done" FAILED = "failed" class SwarmConfig(BaseModel): """蜂群配置""" mode: SwarmMode = SwarmMode.PARALLEL max_teammates: int = Field(default=5, ge=1, le=20, description="最大 Teammate 数量") timeout_ms: int = Field(default=300_000, description="单个 Teammate 超时(毫秒)") total_timeout_ms: int = Field(default=600_000, description="整个 Swarm 总超时") leader_model: str = Field(default="deepseek-v4-pro", description="Leader 使用的模型") teammate_model: str = Field(default="deepseek-v4-flash", description="Teammate 默认模型") mailbox_enabled: bool = Field(default=True, description="启用 Agent 间消息传递") leader_aggregation: bool = Field(default=True, description="Leader 汇总所有结果") retry_failed: bool = Field(default=True, description="失败任务是否重试") # ──────────────────────────── Mailbox ──────────────────────────── @dataclass class MailboxMessage: """Agent 间消息""" id: str from_agent: str # 发送者 agent_id to_agent: str # 接收者 agent_id("*" = 广播) content: str timestamp: float = field(default_factory=time.time) class SwarmMailbox: """Agent 间消息传递系统(发布/订阅模式)。 每个 Teammate 执行过程中可以把发现/中间结果写入 Mailbox, 其他 Teammate 可以读取相关消息来避免重复工作或协调行动。 """ def __init__(self): self._messages: List[MailboxMessage] = [] self._lock = asyncio.Lock() async def send(self, from_agent: str, to_agent: str, content: str) -> MailboxMessage: """发送消息。to_agent="*" 为广播。""" msg = MailboxMessage( id=str(uuid.uuid4())[:8], from_agent=from_agent, to_agent=to_agent, content=content, ) async with self._lock: self._messages.append(msg) logger.debug("Mailbox: %s → %s: %s", from_agent, to_agent, content[:80]) return msg async def receive(self, agent_id: str, since: float = 0) -> List[MailboxMessage]: """接收发给该 Agent 的消息(含广播)。""" async with self._lock: return [ m for m in self._messages if m.timestamp >= since and (m.to_agent == agent_id or m.to_agent == "*") ] async def broadcast(self, from_agent: str, content: str) -> MailboxMessage: """向所有 Agent 广播消息。""" return await self.send(from_agent, "*", content) def snapshot(self) -> List[Dict[str, Any]]: """返回消息列表快照(用于序列化)。""" return [ {"id": m.id, "from": m.from_agent, "to": m.to_agent, "content": m.content[:500], "timestamp": m.timestamp} for m in self._messages ] # ──────────────────────────── Task ──────────────────────────── class SwarmTask(BaseModel): """蜂群中的子任务""" id: str = Field(default_factory=lambda: str(uuid.uuid4())) description: str = Field(..., description="任务描述") assigned_agent_id: Optional[str] = Field(default=None, description="分配的 Agent ID") dependencies: List[str] = Field(default_factory=list, description="依赖的任务 ID 列表") context_hint: str = Field(default="", description="额外上下文提示") status: TaskStatus = TaskStatus.PENDING result: Optional[str] = None error: Optional[str] = None iterations_used: int = 0 tool_calls_made: int = 0 duration_ms: int = 0 # ──────────────────────────── Leader ──────────────────────────── _LEADER_DECOMPOSE_PROMPT = """你是一个任务分解专家。将用户的问题拆解为多个可并行执行的子任务。 用户问题: {user_input} {context_hint} 请返回 JSON 格式(不要 markdown 包裹): {{ "mode": "parallel", "sub_tasks": [ {{ "description": "子任务描述(清晰具体,包含所需上下文)", "dependencies": [], "context_hint": "额外说明" }} ] }} 规则: - 每个子任务应该是独立的、可单独完成的 - 标注依赖关系(如果任务 B 需要任务 A 的结果) - 子任务数量不超过 {max_tasks} 个 - 模式可选: "parallel" (全部并行) 或 "pipeline" (需要顺序执行) - 如果用户问题很简单不需要分解,返回空 sub_tasks 列表""" _LEADER_AGGREGATE_PROMPT = """你是一个信息汇总专家。将多个子任务的执行结果整合为统一回答。 用户原始问题: {user_input} 各子任务执行结果: {task_results} 请综合以上信息,给出一个完整、有条理的回答。如果结果之间有矛盾,说明矛盾所在。 直接输出最终回答,不要用 JSON 包裹。""" class SwarmLeader: """蜂群 Leader — 负责任务分解、分发和结果汇总。""" def __init__( self, config: SwarmConfig, llm_client: Any, # _LLMClient ): self.config = config self.llm = llm_client async def decompose(self, user_input: str, context_hint: str = "") -> List[SwarmTask]: """使用 LLM 将用户输入分解为子任务列表。""" prompt = _LEADER_DECOMPOSE_PROMPT.format( user_input=user_input, context_hint=context_hint or "无", max_tasks=self.config.max_teammates, ) try: response = await self.llm.chat( messages=[{"role": "user", "content": prompt}], tools=None, iteration=0, ) plan = self._parse_json(response.content or "") except Exception as e: logger.warning("Leader 任务分解失败: %s,退化为单任务模式", e) plan = {"mode": "parallel", "sub_tasks": []} if not plan or not plan.get("sub_tasks"): # 无需分解,Leader 自行处理 return [] tasks = [] for i, t in enumerate(plan.get("sub_tasks", [])): if i >= self.config.max_teammates: break task = SwarmTask( description=t.get("description", f"子任务 {i+1}"), dependencies=t.get("dependencies", []), context_hint=t.get("context_hint", ""), ) tasks.append(task) logger.info("Leader 分解完成: %d 个子任务 (mode=%s)", len(tasks), plan.get("mode", "parallel")) return tasks async def aggregate(self, user_input: str, tasks: List[SwarmTask]) -> str: """汇总所有子任务结果。""" task_results_parts = [] for t in tasks: status = "✓" if t.status == TaskStatus.DONE else "✗" result_text = t.result[:1000] if t.result else "(无输出)" task_results_parts.append( f"[{status}] {t.description}\n结果: {result_text}" ) task_results_text = "\n\n".join(task_results_parts) prompt = _LEADER_AGGREGATE_PROMPT.format( user_input=user_input, task_results=task_results_text, ) try: response = await self.llm.chat( messages=[{"role": "user", "content": prompt}], tools=None, iteration=0, ) return response.content or "汇总失败" except Exception as e: logger.error("Leader 汇总失败: %s", e) # 降级:直接拼接 return "\n\n".join( f"## {t.description}\n{t.result or '无输出'}" for t in tasks if t.status == TaskStatus.DONE ) @staticmethod def _parse_json(text: str) -> Optional[Dict[str, Any]]: """从 LLM 输出中提取 JSON。""" import re cleaned = text.strip() cleaned = re.sub(r'^```(?:json)?\s*', '', cleaned) cleaned = re.sub(r'\s*```$', '', cleaned) try: return json.loads(cleaned) except json.JSONDecodeError: pass m = re.search(r'\{[\s\S]*\}', cleaned) if m: try: return json.loads(m.group(0)) except json.JSONDecodeError: pass return None # ──────────────────────────── Teammate ──────────────────────────── @dataclass class TeammateResult: """单个 Teammate 的执行结果""" task_id: str agent_id: str agent_name: str success: bool output: str iterations_used: int tool_calls_made: int duration_ms: int error: Optional[str] = None steps: List[Dict[str, Any]] = field(default_factory=list) class SwarmTeammate: """蜂群 Teammate — 执行单个子任务的 Agent 包装器。""" def __init__( self, agent_id: str, agent_name: str, config: AgentConfig, mailbox: Optional[SwarmMailbox] = None, ): self.agent_id = agent_id self.agent_name = agent_name self.config = config self.mailbox = mailbox async def execute( self, task: SwarmTask, swarm_context: str = "", on_llm_call: Optional[Callable] = None, ) -> TeammateResult: """执行子任务。""" from app.agent_runtime import AgentRuntime task.status = TaskStatus.RUNNING start = time.time() # 构建增强的输入:包含 Swarm 上下文 + Mailbox 消息 enhanced_input = task.description extra_context: List[str] = [] if swarm_context: extra_context.append(f"[Swarm 上下文]\n{swarm_context}") # 读取 Mailbox 中相关消息 if self.mailbox: msgs = await self.mailbox.receive(self.agent_id) if msgs: mailbox_text = "\n".join( f"[{m.from_agent}]: {m.content[:300]}" for m in msgs[-5:] # 最近 5 条 ) extra_context.append(f"[Mailbox 消息]\n{mailbox_text}") if extra_context: enhanced_input = "\n\n".join(extra_context) + "\n\n---\n任务:\n" + enhanced_input try: runtime = AgentRuntime(config=self.config, on_llm_call=on_llm_call) result = await asyncio.wait_for( runtime.run(enhanced_input), timeout=self.config.llm.request_timeout, ) task.result = result.content task.status = TaskStatus.DONE if result.success else TaskStatus.FAILED task.iterations_used = result.iterations_used task.tool_calls_made = result.tool_calls_made task.error = result.error # 将重要发现写入 Mailbox(供其他 Teammate 参考) if self.mailbox and result.success and result.content: await self._share_findings(task, result) return TeammateResult( task_id=task.id, agent_id=self.agent_id, agent_name=self.agent_name, success=result.success, output=result.content, iterations_used=result.iterations_used, tool_calls_made=result.tool_calls_made, duration_ms=int((time.time() - start) * 1000), error=result.error, steps=[s.model_dump() for s in result.steps] if result.steps else [], ) except asyncio.TimeoutError: task.status = TaskStatus.FAILED task.error = f"超时({self.config.llm.request_timeout}s)" return TeammateResult( task_id=task.id, agent_id=self.agent_id, agent_name=self.agent_name, success=False, output="", iterations_used=0, tool_calls_made=0, duration_ms=int((time.time() - start) * 1000), error=task.error, ) except Exception as e: task.status = TaskStatus.FAILED task.error = str(e) logger.error("Teammate %s 执行失败: %s", self.agent_name, e) return TeammateResult( task_id=task.id, agent_id=self.agent_id, agent_name=self.agent_name, success=False, output="", iterations_used=0, tool_calls_made=0, duration_ms=int((time.time() - start) * 1000), error=str(e), ) async def _share_findings(self, task: SwarmTask, result: AgentResult) -> None: """将完成的任务结果广播到 Mailbox。""" if not self.mailbox: return summary = ( f"完成任务: {task.description}\n" f"关键发现: {result.content[:300]}" ) await self.mailbox.broadcast(self.agent_id, summary) # ──────────────────────────── Swarm Runtime ──────────────────────────── class SwarmResult(BaseModel): """蜂群执行结果""" success: bool = True final_answer: str = "" mode: SwarmMode = SwarmMode.PARALLEL tasks: List[SwarmTask] = Field(default_factory=list) teammate_results: List[Dict[str, Any]] = Field(default_factory=list) mailbox_messages: List[Dict[str, Any]] = Field(default_factory=list) total_duration_ms: int = 0 total_iterations: int = 0 total_tool_calls: int = 0 error: Optional[str] = None class SwarmRuntime: """Agent 蜂群运行时。 完整生命周期: 1. Leader 分解用户输入 → 子任务列表 2. 并行执行无依赖的子任务(asyncio.gather) 3. 检查 Mailbox 消息,有依赖的先等依赖完成 4. Leader 汇总所有结果 → 最终回答 用法:: swarm = SwarmRuntime(config=SwarmConfig(mode=SwarmMode.PARALLEL)) result = await swarm.run("帮我做三件事: ...") """ def __init__( self, config: Optional[SwarmConfig] = None, leader_config: Optional[AgentConfig] = None, teammate_configs: Optional[List[AgentConfig]] = None, on_llm_call: Optional[Callable] = None, ): self.config = config or SwarmConfig() self.leader_config = leader_config or AgentConfig( name="SwarmLeader", llm=AgentLLMConfig(model=self.config.leader_model, temperature=0.3, max_iterations=10), ) self.teammate_configs = teammate_configs or [] self.on_llm_call = on_llm_call self.mailbox = SwarmMailbox() if self.config.mailbox_enabled else None async def run(self, user_input: str) -> SwarmResult: """运行蜂群。""" from app.agent_runtime.core import _LLMClient start_time = time.time() # 1. Leader 分解任务 leader_llm = _LLMClient(self.leader_config.llm) leader = SwarmLeader(config=self.config, llm_client=leader_llm) tasks = await leader.decompose(user_input) # 无子任务 → Leader 自行处理 if not tasks: logger.info("Swarm: 无需分解,Leader 直接处理") from app.agent_runtime import AgentRuntime runtime = AgentRuntime(config=self.leader_config, on_llm_call=self.on_llm_call) result = await runtime.run(user_input) return SwarmResult( success=result.success, final_answer=result.content, mode=SwarmMode.LEADER_ONLY, total_duration_ms=int((time.time() - start_time) * 1000), total_iterations=result.iterations_used, total_tool_calls=result.tool_calls_made, error=result.error, ) # 2. 构建 Teammates(不足则自动生成) teammates = self._build_teammates(tasks) # 3. 按依赖关系分组执行 teammate_results = await self._execute_with_deps( tasks=tasks, teammates=teammates, swarm_context=user_input, ) # 4. Leader 汇总 final_answer = await leader.aggregate(user_input, tasks) total_duration = int((time.time() - start_time) * 1000) logger.info( "Swarm 完成: %d tasks, %d success, %d fail, %dms", len(tasks), sum(1 for t in tasks if t.status == TaskStatus.DONE), sum(1 for t in tasks if t.status == TaskStatus.FAILED), total_duration, ) return SwarmResult( success=all(t.status == TaskStatus.DONE for t in tasks), final_answer=final_answer, mode=self.config.mode, tasks=tasks, teammate_results=[ { "agent_id": tr.agent_id, "agent_name": tr.agent_name, "task_id": tr.task_id, "success": tr.success, "output": tr.output[:500], "duration_ms": tr.duration_ms, "iterations_used": tr.iterations_used, "tool_calls_made": tr.tool_calls_made, "error": tr.error, } for tr in teammate_results ], mailbox_messages=self.mailbox.snapshot() if self.mailbox else [], total_duration_ms=total_duration, total_iterations=sum(tr.iterations_used for tr in teammate_results), total_tool_calls=sum(tr.tool_calls_made for tr in teammate_results), ) def _build_teammates(self, tasks: List[SwarmTask]) -> Dict[str, SwarmTeammate]: """为每个子任务构建 Teammate。 策略: - 如果有预配置的 teammate_configs,按数量分配 - 不足部分自动基于 leader_config 生成轻量配置 """ teammates: Dict[str, SwarmTeammate] = {} pre_configs = list(self.teammate_configs) for i, task in enumerate(tasks): if i < len(pre_configs): cfg = pre_configs[i] agent_id = cfg.name or f"teammate_{i}" agent_name = cfg.name or f"Teammate-{i+1}" else: # 自动生成:使用 teammate_model(轻量模型) agent_id = f"teammate_{i}" agent_name = f"Teammate-{i+1}" cfg = AgentConfig( name=agent_name, system_prompt=f"你是一个专门处理以下类型任务的 AI Agent: {task.description[:200]}", llm=AgentLLMConfig( model=self.config.teammate_model, temperature=0.7, max_iterations=10, ), tools=AgentToolConfig(), memory=AgentMemoryConfig(enabled=False), # Teammate 不需要长记忆 user_id=self.leader_config.user_id, ) task.assigned_agent_id = agent_id teammates[agent_id] = SwarmTeammate( agent_id=agent_id, agent_name=agent_name, config=cfg, mailbox=self.mailbox, ) return teammates async def _execute_with_deps( self, tasks: List[SwarmTask], teammates: Dict[str, SwarmTeammate], swarm_context: str, ) -> List[TeammateResult]: """按依赖关系分批并行执行。 算法: 1. 找出所有无依赖的"就绪"任务 → 并行执行 2. 等待它们完成 → 标记依赖已解决 3. 重复直到所有任务完成 """ results: List[TeammateResult] = [] pending = list(tasks) completed_ids: Set[str] = set() failed_ids: Set[str] = set() while pending: # 找出就绪任务(依赖全部满足) ready = [ t for t in pending if all(dep in completed_ids for dep in t.dependencies) ] if not ready: # 死锁检测:剩余任务都有未完成的依赖 stuck_ids = {t.id for t in pending} unresolved = set() for t in pending: for dep in t.dependencies: if dep in stuck_ids and dep not in completed_ids and dep not in failed_ids: unresolved.add(dep) if unresolved: logger.warning("Swarm: 检测到未解决的依赖 %s,跳过阻塞任务", unresolved) # 将阻塞任务的依赖标记为 failed(跳过) for t in pending: for dep in unresolved: if dep in t.dependencies: t.dependencies.remove(dep) continue break # 不应该到这里 # 并行执行就绪任务 batch_results = await asyncio.gather( *[ teammates[t.assigned_agent_id or f"teammate_{i}"].execute( task=t, swarm_context=swarm_context, on_llm_call=self.on_llm_call, ) for i, t in enumerate(ready) ], return_exceptions=True, ) # 处理结果 for task, tr in zip(ready, batch_results): if isinstance(tr, Exception): task.status = TaskStatus.FAILED task.error = str(tr) failed_ids.add(task.id) logger.error("Swarm teammate %s 异常: %s", task.assigned_agent_id, tr) results.append(TeammateResult( task_id=task.id, agent_id=task.assigned_agent_id or "unknown", agent_name="unknown", success=False, output="", iterations_used=0, tool_calls_made=0, duration_ms=0, error=str(tr), )) else: results.append(tr) if tr.success: completed_ids.add(task.id) else: failed_ids.add(task.id) # 失败重试 if self.config.retry_failed and task.status == TaskStatus.FAILED: logger.info("Swarm: 重试失败任务 %s", task.description[:50]) task.status = TaskStatus.PENDING # 更新 pending 列表 pending = [t for t in pending if t.id not in (completed_ids | failed_ids)] return results # ──────────────────────────── 便捷工厂 ──────────────────────────── def create_swarm( user_id: Optional[str] = None, mode: SwarmMode = SwarmMode.PARALLEL, max_teammates: int = 5, leader_model: str = "deepseek-v4-pro", teammate_model: str = "deepseek-v4-flash", mailbox_enabled: bool = True, teammate_configs: Optional[List[AgentConfig]] = None, ) -> SwarmRuntime: """创建预配置的 Swarm 运行时。""" config = SwarmConfig( mode=mode, max_teammates=max_teammates, leader_model=leader_model, teammate_model=teammate_model, mailbox_enabled=mailbox_enabled, ) leader_config = AgentConfig( name="SwarmLeader", system_prompt="你是一个AI任务协调者。你将复杂问题分解为子任务,协调多个AI Agent并行处理,并汇总结果。", llm=AgentLLMConfig(model=leader_model, temperature=0.3, max_iterations=10), user_id=user_id, ) return SwarmRuntime( config=config, leader_config=leader_config, teammate_configs=teammate_configs or [], )