Files
aiagent/backend/app/agent_runtime/swarm.py
renjianbo beff3fac8d fix: delete agent 500 error + dynamic personality + deployment guide
- Fix delete agent 500: clean up FK records (agent_llm_logs, permissions,
  schedules, executions, team_members) and unbind goals/tasks before delete
- Remove hardcoded personality templates in Android, replace with dynamic
  system prompt generation from name + description
- Set promptSectionsEnabled=false to bypass PromptComposer for personality
- Add Tencent Cloud Linux deployment guide (Docker Compose)
- Accumulated backend service updates, frontend UI fixes, Android app changes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-29 01:17:21 +08:00

691 lines
26 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Agent 蜂群 (Swarm) — Leader/Teammate 并行协作引擎。
参考 Claude Code:
- src/tools/AgentTool/ — 子 Agent 生成与执行
- src/tools/AgentTool/forkSubagent.ts — Fork 模式(后台并行)
- buildInAgents.ts — 内置 Agent 类型explore/plan/verify
核心概念:
- SwarmLeader: 接收用户输入 → 分解为子任务 → 分配给 Teammates → 汇总结果
- SwarmTeammate: 独立执行单个子任务的 Agent可通过 Mailbox 与其他 Agent 通信
- SwarmMailbox: Agent 间消息传递(发布/订阅模式)
- 并行执行: 无依赖的任务通过 asyncio.gather 并发运行
与现有 Orchestrator 的区别:
- Orchestrator: 预定义编排模式 (route/sequential/debate/pipeline/graph)
- Swarm: Leader 自主决策动态分解任务Agent 间可通信
"""
from __future__ import annotations
import asyncio
import json
import logging
import time
import uuid
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Set
from pydantic import BaseModel, Field
from app.agent_runtime.schemas import (
AgentConfig,
AgentResult,
AgentStep,
AgentLLMConfig,
AgentToolConfig,
AgentMemoryConfig,
)
logger = logging.getLogger(__name__)
# ──────────────────────────── 枚举 / 配置 ────────────────────────────
class SwarmMode(str, Enum):
PARALLEL = "parallel" # 所有 Teammate 并发执行
PIPELINE = "pipeline" # 流水线A→B→C
DEBATE = "debate" # 多方辩论后汇总
LEADER_ONLY = "leader_only" # Leader 自行处理(不生成 Teammate
class TaskStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
DONE = "done"
FAILED = "failed"
class SwarmConfig(BaseModel):
"""蜂群配置"""
mode: SwarmMode = SwarmMode.PARALLEL
max_teammates: int = Field(default=5, ge=1, le=20, description="最大 Teammate 数量")
timeout_ms: int = Field(default=300_000, description="单个 Teammate 超时(毫秒)")
total_timeout_ms: int = Field(default=600_000, description="整个 Swarm 总超时")
leader_model: str = Field(default="deepseek-v4-pro", description="Leader 使用的模型")
teammate_model: str = Field(default="deepseek-v4-flash", description="Teammate 默认模型")
mailbox_enabled: bool = Field(default=True, description="启用 Agent 间消息传递")
leader_aggregation: bool = Field(default=True, description="Leader 汇总所有结果")
retry_failed: bool = Field(default=True, description="失败任务是否重试")
# ──────────────────────────── Mailbox ────────────────────────────
@dataclass
class MailboxMessage:
"""Agent 间消息"""
id: str
from_agent: str # 发送者 agent_id
to_agent: str # 接收者 agent_id"*" = 广播)
content: str
timestamp: float = field(default_factory=time.time)
class SwarmMailbox:
"""Agent 间消息传递系统(发布/订阅模式)。
每个 Teammate 执行过程中可以把发现/中间结果写入 Mailbox
其他 Teammate 可以读取相关消息来避免重复工作或协调行动。
"""
def __init__(self):
self._messages: List[MailboxMessage] = []
self._lock = asyncio.Lock()
async def send(self, from_agent: str, to_agent: str, content: str) -> MailboxMessage:
"""发送消息。to_agent="*" 为广播。"""
msg = MailboxMessage(
id=str(uuid.uuid4())[:8],
from_agent=from_agent,
to_agent=to_agent,
content=content,
)
async with self._lock:
self._messages.append(msg)
logger.debug("Mailbox: %s%s: %s", from_agent, to_agent, content[:80])
return msg
async def receive(self, agent_id: str, since: float = 0) -> List[MailboxMessage]:
"""接收发给该 Agent 的消息(含广播)。"""
async with self._lock:
return [
m for m in self._messages
if m.timestamp >= since and (m.to_agent == agent_id or m.to_agent == "*")
]
async def broadcast(self, from_agent: str, content: str) -> MailboxMessage:
"""向所有 Agent 广播消息。"""
return await self.send(from_agent, "*", content)
def snapshot(self) -> List[Dict[str, Any]]:
"""返回消息列表快照(用于序列化)。"""
return [
{"id": m.id, "from": m.from_agent, "to": m.to_agent,
"content": m.content[:500], "timestamp": m.timestamp}
for m in self._messages
]
# ──────────────────────────── Task ────────────────────────────
class SwarmTask(BaseModel):
"""蜂群中的子任务"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
description: str = Field(..., description="任务描述")
assigned_agent_id: Optional[str] = Field(default=None, description="分配的 Agent ID")
dependencies: List[str] = Field(default_factory=list, description="依赖的任务 ID 列表")
context_hint: str = Field(default="", description="额外上下文提示")
status: TaskStatus = TaskStatus.PENDING
result: Optional[str] = None
error: Optional[str] = None
iterations_used: int = 0
tool_calls_made: int = 0
duration_ms: int = 0
# ──────────────────────────── Leader ────────────────────────────
_LEADER_DECOMPOSE_PROMPT = """你是一个任务分解专家。将用户的问题拆解为多个可并行执行的子任务。
用户问题: {user_input}
{context_hint}
请返回 JSON 格式(不要 markdown 包裹):
{{
"mode": "parallel",
"sub_tasks": [
{{
"description": "子任务描述(清晰具体,包含所需上下文)",
"dependencies": [],
"context_hint": "额外说明"
}}
]
}}
规则:
- 每个子任务应该是独立的、可单独完成的
- 标注依赖关系(如果任务 B 需要任务 A 的结果)
- 子任务数量不超过 {max_tasks}
- 模式可选: "parallel" (全部并行) 或 "pipeline" (需要顺序执行)
- 如果用户问题很简单不需要分解,返回空 sub_tasks 列表"""
_LEADER_AGGREGATE_PROMPT = """你是一个信息汇总专家。将多个子任务的执行结果整合为统一回答。
用户原始问题: {user_input}
各子任务执行结果:
{task_results}
请综合以上信息,给出一个完整、有条理的回答。如果结果之间有矛盾,说明矛盾所在。
直接输出最终回答,不要用 JSON 包裹。"""
class SwarmLeader:
"""蜂群 Leader — 负责任务分解、分发和结果汇总。"""
def __init__(
self,
config: SwarmConfig,
llm_client: Any, # _LLMClient
):
self.config = config
self.llm = llm_client
async def decompose(self, user_input: str, context_hint: str = "") -> List[SwarmTask]:
"""使用 LLM 将用户输入分解为子任务列表。"""
prompt = _LEADER_DECOMPOSE_PROMPT.format(
user_input=user_input,
context_hint=context_hint or "",
max_tasks=self.config.max_teammates,
)
try:
response = await self.llm.chat(
messages=[{"role": "user", "content": prompt}],
tools=None,
iteration=0,
)
plan = self._parse_json(response.content or "")
except Exception as e:
logger.warning("Leader 任务分解失败: %s,退化为单任务模式", e)
plan = {"mode": "parallel", "sub_tasks": []}
if not plan or not plan.get("sub_tasks"):
# 无需分解Leader 自行处理
return []
tasks = []
for i, t in enumerate(plan.get("sub_tasks", [])):
if i >= self.config.max_teammates:
break
task = SwarmTask(
description=t.get("description", f"子任务 {i+1}"),
dependencies=t.get("dependencies", []),
context_hint=t.get("context_hint", ""),
)
tasks.append(task)
logger.info("Leader 分解完成: %d 个子任务 (mode=%s)", len(tasks), plan.get("mode", "parallel"))
return tasks
async def aggregate(self, user_input: str, tasks: List[SwarmTask]) -> str:
"""汇总所有子任务结果。"""
task_results_parts = []
for t in tasks:
status = "" if t.status == TaskStatus.DONE else ""
result_text = t.result[:1000] if t.result else "(无输出)"
task_results_parts.append(
f"[{status}] {t.description}\n结果: {result_text}"
)
task_results_text = "\n\n".join(task_results_parts)
prompt = _LEADER_AGGREGATE_PROMPT.format(
user_input=user_input,
task_results=task_results_text,
)
try:
response = await self.llm.chat(
messages=[{"role": "user", "content": prompt}],
tools=None,
iteration=0,
)
return response.content or "汇总失败"
except Exception as e:
logger.error("Leader 汇总失败: %s", e)
# 降级:直接拼接
return "\n\n".join(
f"## {t.description}\n{t.result or '无输出'}"
for t in tasks if t.status == TaskStatus.DONE
)
@staticmethod
def _parse_json(text: str) -> Optional[Dict[str, Any]]:
"""从 LLM 输出中提取 JSON。"""
import re
cleaned = text.strip()
cleaned = re.sub(r'^```(?:json)?\s*', '', cleaned)
cleaned = re.sub(r'\s*```$', '', cleaned)
try:
return json.loads(cleaned)
except json.JSONDecodeError:
pass
m = re.search(r'\{[\s\S]*\}', cleaned)
if m:
try:
return json.loads(m.group(0))
except json.JSONDecodeError:
pass
return None
# ──────────────────────────── Teammate ────────────────────────────
@dataclass
class TeammateResult:
"""单个 Teammate 的执行结果"""
task_id: str
agent_id: str
agent_name: str
success: bool
output: str
iterations_used: int
tool_calls_made: int
duration_ms: int
error: Optional[str] = None
steps: List[Dict[str, Any]] = field(default_factory=list)
class SwarmTeammate:
"""蜂群 Teammate — 执行单个子任务的 Agent 包装器。"""
def __init__(
self,
agent_id: str,
agent_name: str,
config: AgentConfig,
mailbox: Optional[SwarmMailbox] = None,
):
self.agent_id = agent_id
self.agent_name = agent_name
self.config = config
self.mailbox = mailbox
async def execute(
self,
task: SwarmTask,
swarm_context: str = "",
on_llm_call: Optional[Callable] = None,
) -> TeammateResult:
"""执行子任务。"""
from app.agent_runtime import AgentRuntime
task.status = TaskStatus.RUNNING
start = time.time()
# 构建增强的输入:包含 Swarm 上下文 + Mailbox 消息
enhanced_input = task.description
extra_context: List[str] = []
if swarm_context:
extra_context.append(f"[Swarm 上下文]\n{swarm_context}")
# 读取 Mailbox 中相关消息
if self.mailbox:
msgs = await self.mailbox.receive(self.agent_id)
if msgs:
mailbox_text = "\n".join(
f"[{m.from_agent}]: {m.content[:300]}" for m in msgs[-5:] # 最近 5 条
)
extra_context.append(f"[Mailbox 消息]\n{mailbox_text}")
if extra_context:
enhanced_input = "\n\n".join(extra_context) + "\n\n---\n任务:\n" + enhanced_input
try:
runtime = AgentRuntime(config=self.config, on_llm_call=on_llm_call)
result = await asyncio.wait_for(
runtime.run(enhanced_input),
timeout=self.config.llm.request_timeout,
)
task.result = result.content
task.status = TaskStatus.DONE if result.success else TaskStatus.FAILED
task.iterations_used = result.iterations_used
task.tool_calls_made = result.tool_calls_made
task.error = result.error
# 将重要发现写入 Mailbox供其他 Teammate 参考)
if self.mailbox and result.success and result.content:
await self._share_findings(task, result)
return TeammateResult(
task_id=task.id,
agent_id=self.agent_id,
agent_name=self.agent_name,
success=result.success,
output=result.content,
iterations_used=result.iterations_used,
tool_calls_made=result.tool_calls_made,
duration_ms=int((time.time() - start) * 1000),
error=result.error,
steps=[s.model_dump() for s in result.steps] if result.steps else [],
)
except asyncio.TimeoutError:
task.status = TaskStatus.FAILED
task.error = f"超时({self.config.llm.request_timeout}s"
return TeammateResult(
task_id=task.id, agent_id=self.agent_id, agent_name=self.agent_name,
success=False, output="", iterations_used=0, tool_calls_made=0,
duration_ms=int((time.time() - start) * 1000),
error=task.error,
)
except Exception as e:
task.status = TaskStatus.FAILED
task.error = str(e)
logger.error("Teammate %s 执行失败: %s", self.agent_name, e)
return TeammateResult(
task_id=task.id, agent_id=self.agent_id, agent_name=self.agent_name,
success=False, output="", iterations_used=0, tool_calls_made=0,
duration_ms=int((time.time() - start) * 1000),
error=str(e),
)
async def _share_findings(self, task: SwarmTask, result: AgentResult) -> None:
"""将完成的任务结果广播到 Mailbox。"""
if not self.mailbox:
return
summary = (
f"完成任务: {task.description}\n"
f"关键发现: {result.content[:300]}"
)
await self.mailbox.broadcast(self.agent_id, summary)
# ──────────────────────────── Swarm Runtime ────────────────────────────
class SwarmResult(BaseModel):
"""蜂群执行结果"""
success: bool = True
final_answer: str = ""
mode: SwarmMode = SwarmMode.PARALLEL
tasks: List[SwarmTask] = Field(default_factory=list)
teammate_results: List[Dict[str, Any]] = Field(default_factory=list)
mailbox_messages: List[Dict[str, Any]] = Field(default_factory=list)
total_duration_ms: int = 0
total_iterations: int = 0
total_tool_calls: int = 0
error: Optional[str] = None
class SwarmRuntime:
"""Agent 蜂群运行时。
完整生命周期:
1. Leader 分解用户输入 → 子任务列表
2. 并行执行无依赖的子任务asyncio.gather
3. 检查 Mailbox 消息,有依赖的先等依赖完成
4. Leader 汇总所有结果 → 最终回答
用法::
swarm = SwarmRuntime(config=SwarmConfig(mode=SwarmMode.PARALLEL))
result = await swarm.run("帮我做三件事: ...")
"""
def __init__(
self,
config: Optional[SwarmConfig] = None,
leader_config: Optional[AgentConfig] = None,
teammate_configs: Optional[List[AgentConfig]] = None,
on_llm_call: Optional[Callable] = None,
):
self.config = config or SwarmConfig()
self.leader_config = leader_config or AgentConfig(
name="SwarmLeader",
llm=AgentLLMConfig(model=self.config.leader_model, temperature=0.3, max_iterations=10),
)
self.teammate_configs = teammate_configs or []
self.on_llm_call = on_llm_call
self.mailbox = SwarmMailbox() if self.config.mailbox_enabled else None
async def run(self, user_input: str) -> SwarmResult:
"""运行蜂群。"""
from app.agent_runtime.core import _LLMClient
start_time = time.time()
# 1. Leader 分解任务
leader_llm = _LLMClient(self.leader_config.llm)
leader = SwarmLeader(config=self.config, llm_client=leader_llm)
tasks = await leader.decompose(user_input)
# 无子任务 → Leader 自行处理
if not tasks:
logger.info("Swarm: 无需分解Leader 直接处理")
from app.agent_runtime import AgentRuntime
runtime = AgentRuntime(config=self.leader_config, on_llm_call=self.on_llm_call)
result = await runtime.run(user_input)
return SwarmResult(
success=result.success,
final_answer=result.content,
mode=SwarmMode.LEADER_ONLY,
total_duration_ms=int((time.time() - start_time) * 1000),
total_iterations=result.iterations_used,
total_tool_calls=result.tool_calls_made,
error=result.error,
)
# 2. 构建 Teammates不足则自动生成
teammates = self._build_teammates(tasks)
# 3. 按依赖关系分组执行
teammate_results = await self._execute_with_deps(
tasks=tasks,
teammates=teammates,
swarm_context=user_input,
)
# 4. Leader 汇总
final_answer = await leader.aggregate(user_input, tasks)
total_duration = int((time.time() - start_time) * 1000)
logger.info(
"Swarm 完成: %d tasks, %d success, %d fail, %dms",
len(tasks),
sum(1 for t in tasks if t.status == TaskStatus.DONE),
sum(1 for t in tasks if t.status == TaskStatus.FAILED),
total_duration,
)
return SwarmResult(
success=all(t.status == TaskStatus.DONE for t in tasks),
final_answer=final_answer,
mode=self.config.mode,
tasks=tasks,
teammate_results=[
{
"agent_id": tr.agent_id, "agent_name": tr.agent_name,
"task_id": tr.task_id, "success": tr.success,
"output": tr.output[:500], "duration_ms": tr.duration_ms,
"iterations_used": tr.iterations_used, "tool_calls_made": tr.tool_calls_made,
"error": tr.error,
}
for tr in teammate_results
],
mailbox_messages=self.mailbox.snapshot() if self.mailbox else [],
total_duration_ms=total_duration,
total_iterations=sum(tr.iterations_used for tr in teammate_results),
total_tool_calls=sum(tr.tool_calls_made for tr in teammate_results),
)
def _build_teammates(self, tasks: List[SwarmTask]) -> Dict[str, SwarmTeammate]:
"""为每个子任务构建 Teammate。
策略:
- 如果有预配置的 teammate_configs按数量分配
- 不足部分自动基于 leader_config 生成轻量配置
"""
teammates: Dict[str, SwarmTeammate] = {}
pre_configs = list(self.teammate_configs)
for i, task in enumerate(tasks):
if i < len(pre_configs):
cfg = pre_configs[i]
agent_id = cfg.name or f"teammate_{i}"
agent_name = cfg.name or f"Teammate-{i+1}"
else:
# 自动生成:使用 teammate_model轻量模型
agent_id = f"teammate_{i}"
agent_name = f"Teammate-{i+1}"
cfg = AgentConfig(
name=agent_name,
system_prompt=f"你是一个专门处理以下类型任务的 AI Agent: {task.description[:200]}",
llm=AgentLLMConfig(
model=self.config.teammate_model,
temperature=0.7,
max_iterations=10,
),
tools=AgentToolConfig(),
memory=AgentMemoryConfig(enabled=False), # Teammate 不需要长记忆
user_id=self.leader_config.user_id,
)
task.assigned_agent_id = agent_id
teammates[agent_id] = SwarmTeammate(
agent_id=agent_id,
agent_name=agent_name,
config=cfg,
mailbox=self.mailbox,
)
return teammates
async def _execute_with_deps(
self,
tasks: List[SwarmTask],
teammates: Dict[str, SwarmTeammate],
swarm_context: str,
) -> List[TeammateResult]:
"""按依赖关系分批并行执行。
算法:
1. 找出所有无依赖的"就绪"任务 → 并行执行
2. 等待它们完成 → 标记依赖已解决
3. 重复直到所有任务完成
"""
results: List[TeammateResult] = []
pending = list(tasks)
completed_ids: Set[str] = set()
failed_ids: Set[str] = set()
while pending:
# 找出就绪任务(依赖全部满足)
ready = [
t for t in pending
if all(dep in completed_ids for dep in t.dependencies)
]
if not ready:
# 死锁检测:剩余任务都有未完成的依赖
stuck_ids = {t.id for t in pending}
unresolved = set()
for t in pending:
for dep in t.dependencies:
if dep in stuck_ids and dep not in completed_ids and dep not in failed_ids:
unresolved.add(dep)
if unresolved:
logger.warning("Swarm: 检测到未解决的依赖 %s,跳过阻塞任务", unresolved)
# 将阻塞任务的依赖标记为 failed跳过
for t in pending:
for dep in unresolved:
if dep in t.dependencies:
t.dependencies.remove(dep)
continue
break # 不应该到这里
# 并行执行就绪任务
batch_results = await asyncio.gather(
*[
teammates[t.assigned_agent_id or f"teammate_{i}"].execute(
task=t,
swarm_context=swarm_context,
on_llm_call=self.on_llm_call,
)
for i, t in enumerate(ready)
],
return_exceptions=True,
)
# 处理结果
for task, tr in zip(ready, batch_results):
if isinstance(tr, Exception):
task.status = TaskStatus.FAILED
task.error = str(tr)
failed_ids.add(task.id)
logger.error("Swarm teammate %s 异常: %s", task.assigned_agent_id, tr)
results.append(TeammateResult(
task_id=task.id, agent_id=task.assigned_agent_id or "unknown",
agent_name="unknown", success=False, output="",
iterations_used=0, tool_calls_made=0, duration_ms=0,
error=str(tr),
))
else:
results.append(tr)
if tr.success:
completed_ids.add(task.id)
else:
failed_ids.add(task.id)
# 失败重试
if self.config.retry_failed and task.status == TaskStatus.FAILED:
logger.info("Swarm: 重试失败任务 %s", task.description[:50])
task.status = TaskStatus.PENDING
# 更新 pending 列表
pending = [t for t in pending if t.id not in (completed_ids | failed_ids)]
return results
# ──────────────────────────── 便捷工厂 ────────────────────────────
def create_swarm(
user_id: Optional[str] = None,
mode: SwarmMode = SwarmMode.PARALLEL,
max_teammates: int = 5,
leader_model: str = "deepseek-v4-pro",
teammate_model: str = "deepseek-v4-flash",
mailbox_enabled: bool = True,
teammate_configs: Optional[List[AgentConfig]] = None,
) -> SwarmRuntime:
"""创建预配置的 Swarm 运行时。"""
config = SwarmConfig(
mode=mode,
max_teammates=max_teammates,
leader_model=leader_model,
teammate_model=teammate_model,
mailbox_enabled=mailbox_enabled,
)
leader_config = AgentConfig(
name="SwarmLeader",
system_prompt="你是一个AI任务协调者。你将复杂问题分解为子任务协调多个AI Agent并行处理并汇总结果。",
llm=AgentLLMConfig(model=leader_model, temperature=0.3, max_iterations=10),
user_id=user_id,
)
return SwarmRuntime(
config=config,
leader_config=leader_config,
teammate_configs=teammate_configs or [],
)