Files
aiagent/backend/app/agent_runtime/core.py
renjianbo 036f533881 feat: Agent 监控与编排、仪表盘/配置页及文档更新
- agent_runtime: orchestrator、core/memory/schemas 调整
- agent_monitoring API、service、agent_llm_log 模型与 database 注册
- 前端 AgentDashboard、AgentConfig、Agents/MainLayout/路由与 AgentChat
- 文档:(红头)项目核心文档汇总、自主AI Agent改造完成情况、AI agent改造计划

Made-with: Cursor
2026-05-01 19:32:59 +08:00

455 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Agent Runtime 核心 —— 自主 ReAct 循环。
流程:
1. 接收用户输入 → 追加到消息列表
2. 调用 LLM携带 tools schema
3. 如果 LLM 返回工具调用 → 执行工具 → 结果追加到消息列表 → 回到 2
4. 如果 LLM 返回文本 → 作为最终回答返回
5. 超过 max_iterations → 强制终止
"""
from __future__ import annotations
import json
import logging
import time
from typing import Any, Callable, Dict, List, Optional, Protocol, TypedDict
from app.agent_runtime.schemas import (
AgentConfig,
AgentResult,
AgentStep,
)
from app.agent_runtime.context import AgentContext
from app.agent_runtime.memory import AgentMemory
from app.agent_runtime.tool_manager import AgentToolManager
logger = logging.getLogger(__name__)
class LLMCallMetrics(TypedDict, total=False):
"""一次 LLM 调用的度量数据"""
agent_id: Optional[str]
session_id: str
user_id: Optional[str]
model: str
provider: Optional[str]
prompt_tokens: int
completion_tokens: int
total_tokens: int
latency_ms: int
iteration_number: int
step_type: str # think / final
tool_name: Optional[str]
status: str # success / error
error_message: Optional[str]
# 可重试的 API 异常
_RETRYABLE_ERRORS = (
"timed out",
"timeout",
"connection error",
"temporarily unavailable",
"server disconnected",
"rate limit",
"too many requests",
"internal server error",
"service unavailable",
)
class AgentRuntime:
"""
自主 Agent 运行时。
用法:
runtime = AgentRuntime(config)
result = await runtime.run("帮我写个Python脚本")
"""
def __init__(
self,
config: Optional[AgentConfig] = None,
context: Optional[AgentContext] = None,
memory: Optional[AgentMemory] = None,
tool_manager: Optional[AgentToolManager] = None,
execution_logger: Optional[Any] = None,
on_tool_executed: Optional[Callable[[str], Any]] = None,
on_llm_call: Optional[Callable[[Dict[str, Any]], Any]] = None,
):
self.config = config or AgentConfig()
self.context = context or AgentContext(
system_prompt=self.config.system_prompt,
user_id=self.config.user_id,
)
self.memory = memory or AgentMemory(
scope_id=self.config.user_id or self.config.name,
max_history=self.config.memory.max_history_messages,
persist=self.config.memory.persist_to_db,
)
self.tool_manager = tool_manager or AgentToolManager(
include_tools=self.config.tools.include_tools,
exclude_tools=self.config.tools.exclude_tools,
)
self.execution_logger = execution_logger
self.on_tool_executed = on_tool_executed
self.on_llm_call = on_llm_call
self._memory_context_loaded = False
async def run(self, user_input: str) -> AgentResult:
"""
执行 Agent 单轮对话。
流程:加载记忆 → 追加用户消息 → ReAct 循环 → 保存记忆 → 返回结果。
"""
max_iter = max(1, self.config.llm.max_iterations)
self.context.iteration = 0
self.context.tool_calls_made = 0
# 1. 首次运行时加载长期记忆到 system prompt
if not self._memory_context_loaded:
await self._inject_memory_context()
self._memory_context_loaded = True
# 2. 追加用户消息
self.context.add_user_message(user_input)
# 3. ReAct 循环
llm = _LLMClient(self.config.llm)
tool_schemas = self.tool_manager.get_tool_schemas()
has_tools = self.tool_manager.has_tools()
steps: List[AgentStep] = []
# 构建 LLM 调用回调(包装 on_llm_call补充上下文
llm_callback_ctx = {"step_type": "think", "tool_name": None}
def _llm_callback(metrics: Dict[str, Any]):
if self.on_llm_call:
metrics.update({
"session_id": self.context.session_id,
"user_id": self.config.user_id,
"step_type": llm_callback_ctx["step_type"],
"tool_name": llm_callback_ctx["tool_name"],
})
self.on_llm_call(metrics)
while self.context.iteration < max_iter:
self.context.iteration += 1
# 裁剪过长历史
messages = self.memory.trim_messages(self.context.messages)
# 调用 LLM
try:
response = await llm.chat(
messages=messages,
tools=tool_schemas if has_tools and self.context.iteration == 1 else
(tool_schemas if has_tools else None),
iteration=self.context.iteration,
on_completion=_llm_callback,
)
except Exception as e:
err_str = str(e)
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
if self.context.iteration < max_iter and self._is_retryable(err_str):
steps.append(AgentStep(
iteration=self.context.iteration,
type="tool_result",
content=f"LLM 调用失败(可重试): {err_str}",
))
continue
return AgentResult(
success=False,
content=f"LLM 调用失败: {err_str}",
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
error=err_str,
)
# 解析工具调用
tool_calls = self._extract_tool_calls(response)
content = self._extract_content(response)
reasoning = getattr(response, "reasoning_content", None) or (
response.get("reasoning_content") if isinstance(response, dict) else None
)
if not tool_calls:
# LLM 直接返回文本 → 结束
self.context.add_assistant_message(content)
final_text = content or "(模型未返回有效内容)"
steps.append(AgentStep(
iteration=self.context.iteration,
type="final",
content=final_text,
reasoning=reasoning,
))
# 保存记忆
await self.memory.save_context(user_input, final_text, self.context.messages)
return AgentResult(
success=True,
content=final_text,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps,
)
# 有工具调用 → 先记录 assistant 消息(含 tool_calls
self.context.add_assistant_message(content or "", tool_calls, reasoning)
# 记录思考步骤(含工具调用意图)
tc_names = [tc["function"]["name"] for tc in tool_calls]
tc_args_list = []
for tc in tool_calls:
try:
tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
except (json.JSONDecodeError, TypeError):
tc_args_list.append({})
steps.append(AgentStep(
iteration=self.context.iteration,
type="think",
content=content or f"调用工具: {', '.join(tc_names)}",
reasoning=reasoning,
tool_name=tc_names[0] if len(tc_names) == 1 else None,
tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
))
if self.execution_logger:
self.execution_logger.info(
f"Agent 调用 {len(tool_calls)} 个工具",
data={"tool_calls": tc_names,
"iteration": self.context.iteration},
)
# 逐一执行工具
for tc in tool_calls:
tfn = tc.get("function", {})
tname = tfn.get("name", "unknown")
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
try:
targs = json.loads(tfn.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
targs = {}
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
result = await self.tool_manager.execute(tname, targs)
steps.append(AgentStep(
iteration=self.context.iteration,
type="tool_result",
content=f"工具 {tname} 返回结果",
tool_name=tname,
tool_input=targs,
tool_result=result[:500] + "..." if len(result) > 500 else result,
))
self.context.add_tool_result(tcid, tname, result)
self.context.tool_calls_made += 1
if self.on_tool_executed:
try:
await self.on_tool_executed(tname)
except Exception:
pass
if self.execution_logger:
preview = result[:300] + "..." if len(result) > 300 else result
self.execution_logger.info(
f"工具 {tname} 执行完成",
data={"tool_name": tname, "result_preview": preview},
)
# 达到最大迭代次数
last_content = ""
for m in reversed(self.context.messages):
if m.get("role") == "assistant" and m.get("content"):
last_content = m["content"]
break
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages)
if last_content:
steps.append(AgentStep(
iteration=self.context.iteration,
type="final",
content=last_content,
))
return AgentResult(
success=True,
content=last_content or "已达最大迭代次数,但模型未返回最终回答。",
truncated=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps,
)
async def _inject_memory_context(self) -> None:
"""加载长期记忆并注入 system prompt。"""
mem_text = await self.memory.initialize()
if mem_text:
enriched = (
self.config.system_prompt.rstrip("\n")
+ "\n\n"
+ mem_text
)
self.context.set_system_prompt(enriched)
logger.info("Agent 已注入长期记忆上下文")
@staticmethod
def _extract_tool_calls(response: Any) -> List[Dict[str, Any]]:
"""从 LLM 响应中提取工具调用列表。"""
if response is None:
return []
# OpenAI SDK 格式
if hasattr(response, "tool_calls") and response.tool_calls:
result = []
for tc in response.tool_calls:
result.append({
"id": tc.id,
"type": tc.type,
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
})
return result
# 字典格式
if isinstance(response, dict):
tc_list = response.get("tool_calls") or []
if tc_list:
return tc_list
# 检查 content 中是否嵌入了 DSML
content = response.get("content") or ""
if "invoke" in content or "function_call" in content:
from app.services.llm_service import _parse_dsml_tool_invocations
dsml = _parse_dsml_tool_invocations(content)
if dsml:
return [
{
"id": f"dsml-{i}",
"type": "function",
"function": {
"name": inv["name"],
"arguments": json.dumps(inv["arguments"], ensure_ascii=False),
},
}
for i, inv in enumerate(dsml)
]
return []
@staticmethod
def _extract_content(response: Any) -> str:
"""从 LLM 响应中提取文本内容。"""
if response is None:
return ""
if hasattr(response, "content"):
return response.content or ""
if isinstance(response, dict):
return response.get("content") or ""
return str(response)
@staticmethod
def _is_retryable(err_str: str) -> bool:
"""判断错误是否可重试。"""
err_lower = err_str.lower()
return any(kw in err_lower for kw in _RETRYABLE_ERRORS)
class _LLMClient:
"""轻量 LLM 客户端包装,复用已有 LLMService 能力。"""
def __init__(self, config: Any):
from app.services.llm_service import llm_service
self._service = llm_service
self._config = config
async def chat(
self,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
iteration: int = 1,
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
) -> Any:
"""
调用 LLM。
优先使用 llm_service.call_openai_with_tools支持 ReAct 的多次工具调用)。
但为避免外层 ReAct 与内部 ReAct 冲突:
- 第 1 轮:使用标准 chat无内部 ReAct由外层 AgentRuntime 控制循环
- 后续轮次:也使用标准 chat仅追加工具结果
"""
# 直接用 OpenAI/DeepSeek SDK 调用,由 AgentRuntime 控制循环
from openai import AsyncOpenAI
from app.core.config import settings
# 优先从配置读取,其次从 settings.env 加载),最后 os.environ
api_key = self._config.api_key or settings.OPENAI_API_KEY or ""
base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""
if not api_key or api_key == "your-openai-api-key":
# 尝试 DeepSeek
api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"
if not api_key:
raise ValueError("未配置 API Key")
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
kwargs: Dict[str, Any] = {
"model": self._config.model,
"messages": messages,
"temperature": self._config.temperature,
"timeout": self._config.request_timeout,
}
if self._config.max_tokens:
kwargs["max_tokens"] = self._config.max_tokens
if self._config.extra_body:
kwargs["extra_body"] = self._config.extra_body
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = "auto"
start_time = time.perf_counter()
try:
response = await client.chat.completions.create(**kwargs)
latency_ms = int((time.perf_counter() - start_time) * 1000)
message = response.choices[0].message
# 提取 token 用量
usage = getattr(response, "usage", None)
prompt_tokens = usage.prompt_tokens if usage else 0
completion_tokens = usage.completion_tokens if usage else 0
total_tokens = usage.total_tokens if usage else 0
# 调用完成回调
if on_completion:
on_completion({
"model": self._config.model,
"provider": self._config.provider,
"prompt_tokens": prompt_tokens or 0,
"completion_tokens": completion_tokens or 0,
"total_tokens": total_tokens or 0,
"latency_ms": latency_ms,
"iteration_number": iteration,
"status": "success",
})
return message
except Exception as e:
latency_ms = int((time.perf_counter() - start_time) * 1000)
if on_completion:
on_completion({
"model": self._config.model,
"provider": self._config.provider,
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
"latency_ms": latency_ms,
"iteration_number": iteration,
"status": "error",
"error_message": str(e),
})
raise