2026-05-01 11:31:48 +08:00
|
|
|
|
"""
|
|
|
|
|
|
Agent Runtime 核心 —— 自主 ReAct 循环。
|
|
|
|
|
|
|
|
|
|
|
|
流程:
|
|
|
|
|
|
1. 接收用户输入 → 追加到消息列表
|
|
|
|
|
|
2. 调用 LLM(携带 tools schema)
|
|
|
|
|
|
3. 如果 LLM 返回工具调用 → 执行工具 → 结果追加到消息列表 → 回到 2
|
|
|
|
|
|
4. 如果 LLM 返回文本 → 作为最终回答返回
|
|
|
|
|
|
5. 超过 max_iterations → 强制终止
|
|
|
|
|
|
"""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
import logging
|
2026-05-01 19:32:59 +08:00
|
|
|
|
import time
|
2026-05-01 22:30:46 +08:00
|
|
|
|
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Protocol, TypedDict
|
2026-05-01 11:31:48 +08:00
|
|
|
|
|
|
|
|
|
|
from app.agent_runtime.schemas import (
|
|
|
|
|
|
AgentConfig,
|
|
|
|
|
|
AgentResult,
|
2026-05-01 19:32:59 +08:00
|
|
|
|
AgentStep,
|
2026-05-01 11:31:48 +08:00
|
|
|
|
)
|
|
|
|
|
|
from app.agent_runtime.context import AgentContext
|
|
|
|
|
|
from app.agent_runtime.memory import AgentMemory
|
|
|
|
|
|
from app.agent_runtime.tool_manager import AgentToolManager
|
2026-05-01 22:30:46 +08:00
|
|
|
|
from app.core.exceptions import WorkflowExecutionError
|
2026-05-01 11:31:48 +08:00
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
2026-05-01 19:32:59 +08:00
|
|
|
|
|
|
|
|
|
|
class LLMCallMetrics(TypedDict, total=False):
|
|
|
|
|
|
"""一次 LLM 调用的度量数据"""
|
|
|
|
|
|
agent_id: Optional[str]
|
|
|
|
|
|
session_id: str
|
|
|
|
|
|
user_id: Optional[str]
|
|
|
|
|
|
model: str
|
|
|
|
|
|
provider: Optional[str]
|
|
|
|
|
|
prompt_tokens: int
|
|
|
|
|
|
completion_tokens: int
|
|
|
|
|
|
total_tokens: int
|
|
|
|
|
|
latency_ms: int
|
|
|
|
|
|
iteration_number: int
|
|
|
|
|
|
step_type: str # think / final
|
|
|
|
|
|
tool_name: Optional[str]
|
|
|
|
|
|
status: str # success / error
|
|
|
|
|
|
error_message: Optional[str]
|
|
|
|
|
|
|
2026-05-01 11:31:48 +08:00
|
|
|
|
# 可重试的 API 异常
|
|
|
|
|
|
_RETRYABLE_ERRORS = (
|
|
|
|
|
|
"timed out",
|
|
|
|
|
|
"timeout",
|
|
|
|
|
|
"connection error",
|
|
|
|
|
|
"temporarily unavailable",
|
|
|
|
|
|
"server disconnected",
|
|
|
|
|
|
"rate limit",
|
|
|
|
|
|
"too many requests",
|
|
|
|
|
|
"internal server error",
|
|
|
|
|
|
"service unavailable",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AgentRuntime:
|
|
|
|
|
|
"""
|
|
|
|
|
|
自主 Agent 运行时。
|
|
|
|
|
|
|
|
|
|
|
|
用法:
|
|
|
|
|
|
runtime = AgentRuntime(config)
|
|
|
|
|
|
result = await runtime.run("帮我写个Python脚本")
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
|
self,
|
|
|
|
|
|
config: Optional[AgentConfig] = None,
|
|
|
|
|
|
context: Optional[AgentContext] = None,
|
|
|
|
|
|
memory: Optional[AgentMemory] = None,
|
|
|
|
|
|
tool_manager: Optional[AgentToolManager] = None,
|
|
|
|
|
|
execution_logger: Optional[Any] = None,
|
|
|
|
|
|
on_tool_executed: Optional[Callable[[str], Any]] = None,
|
2026-05-01 19:32:59 +08:00
|
|
|
|
on_llm_call: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
2026-05-01 11:31:48 +08:00
|
|
|
|
):
|
|
|
|
|
|
self.config = config or AgentConfig()
|
|
|
|
|
|
self.context = context or AgentContext(
|
|
|
|
|
|
system_prompt=self.config.system_prompt,
|
|
|
|
|
|
user_id=self.config.user_id,
|
|
|
|
|
|
)
|
2026-05-02 00:38:41 +08:00
|
|
|
|
_mem_scope = self.config.memory_scope_id or self.config.user_id or self.config.name
|
2026-05-01 11:31:48 +08:00
|
|
|
|
self.memory = memory or AgentMemory(
|
2026-05-02 00:38:41 +08:00
|
|
|
|
scope_id=_mem_scope,
|
2026-05-01 11:31:48 +08:00
|
|
|
|
max_history=self.config.memory.max_history_messages,
|
|
|
|
|
|
persist=self.config.memory.persist_to_db,
|
|
|
|
|
|
)
|
|
|
|
|
|
self.tool_manager = tool_manager or AgentToolManager(
|
|
|
|
|
|
include_tools=self.config.tools.include_tools,
|
|
|
|
|
|
exclude_tools=self.config.tools.exclude_tools,
|
|
|
|
|
|
)
|
|
|
|
|
|
self.execution_logger = execution_logger
|
|
|
|
|
|
self.on_tool_executed = on_tool_executed
|
2026-05-01 19:32:59 +08:00
|
|
|
|
self.on_llm_call = on_llm_call
|
2026-05-01 11:31:48 +08:00
|
|
|
|
self._memory_context_loaded = False
|
2026-05-01 22:30:46 +08:00
|
|
|
|
self._llm_invocations = 0
|
|
|
|
|
|
|
|
|
|
|
|
# 预算回调:供 WorkflowEngine 注入,使 Agent 内部计数计入工作流预算
|
|
|
|
|
|
# 返回 True 表示预算充足;返回 False 或抛出异常表示超限
|
|
|
|
|
|
self.on_llm_invocation: Optional[Callable[[], Any]] = None
|
2026-05-01 11:31:48 +08:00
|
|
|
|
|
|
|
|
|
|
async def run(self, user_input: str) -> AgentResult:
|
|
|
|
|
|
"""
|
|
|
|
|
|
执行 Agent 单轮对话。
|
|
|
|
|
|
|
|
|
|
|
|
流程:加载记忆 → 追加用户消息 → ReAct 循环 → 保存记忆 → 返回结果。
|
|
|
|
|
|
"""
|
|
|
|
|
|
max_iter = max(1, self.config.llm.max_iterations)
|
|
|
|
|
|
self.context.iteration = 0
|
|
|
|
|
|
self.context.tool_calls_made = 0
|
|
|
|
|
|
|
|
|
|
|
|
# 1. 首次运行时加载长期记忆到 system prompt
|
|
|
|
|
|
if not self._memory_context_loaded:
|
2026-05-01 22:30:46 +08:00
|
|
|
|
await self._inject_memory_context(user_input)
|
2026-05-01 11:31:48 +08:00
|
|
|
|
self._memory_context_loaded = True
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 追加用户消息
|
|
|
|
|
|
self.context.add_user_message(user_input)
|
|
|
|
|
|
|
|
|
|
|
|
# 3. ReAct 循环
|
|
|
|
|
|
llm = _LLMClient(self.config.llm)
|
|
|
|
|
|
tool_schemas = self.tool_manager.get_tool_schemas()
|
|
|
|
|
|
has_tools = self.tool_manager.has_tools()
|
2026-05-01 19:32:59 +08:00
|
|
|
|
steps: List[AgentStep] = []
|
|
|
|
|
|
|
|
|
|
|
|
# 构建 LLM 调用回调(包装 on_llm_call,补充上下文)
|
|
|
|
|
|
llm_callback_ctx = {"step_type": "think", "tool_name": None}
|
|
|
|
|
|
|
|
|
|
|
|
def _llm_callback(metrics: Dict[str, Any]):
|
|
|
|
|
|
if self.on_llm_call:
|
|
|
|
|
|
metrics.update({
|
|
|
|
|
|
"session_id": self.context.session_id,
|
|
|
|
|
|
"user_id": self.config.user_id,
|
|
|
|
|
|
"step_type": llm_callback_ctx["step_type"],
|
|
|
|
|
|
"tool_name": llm_callback_ctx["tool_name"],
|
|
|
|
|
|
})
|
|
|
|
|
|
self.on_llm_call(metrics)
|
2026-05-01 11:31:48 +08:00
|
|
|
|
|
|
|
|
|
|
while self.context.iteration < max_iter:
|
|
|
|
|
|
self.context.iteration += 1
|
|
|
|
|
|
|
|
|
|
|
|
# 裁剪过长历史
|
|
|
|
|
|
messages = self.memory.trim_messages(self.context.messages)
|
|
|
|
|
|
|
2026-05-01 22:30:46 +08:00
|
|
|
|
# 预算检查:LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
|
|
|
|
|
|
budget = self.config.budget
|
|
|
|
|
|
if self._llm_invocations >= budget.max_llm_invocations:
|
|
|
|
|
|
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
|
|
|
|
|
|
logger.warning(err)
|
|
|
|
|
|
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
|
|
|
|
|
|
await self.memory.save_context(user_input, err, self.context.messages)
|
|
|
|
|
|
return AgentResult(success=False, content=err, truncated=True,
|
|
|
|
|
|
iterations_used=self.context.iteration,
|
|
|
|
|
|
tool_calls_made=self.context.tool_calls_made,
|
|
|
|
|
|
steps=steps, error=err)
|
|
|
|
|
|
|
|
|
|
|
|
# 调用外部 LLM 预算回调(WorkflowEngine 注入,将 Agent 的 LLM 计入工作流预算)
|
|
|
|
|
|
if self.on_llm_invocation:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.on_llm_invocation()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
err = f"LLM 调用超出工作流预算: {e}"
|
|
|
|
|
|
logger.warning(err)
|
|
|
|
|
|
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
|
|
|
|
|
|
await self.memory.save_context(user_input, err, self.context.messages)
|
|
|
|
|
|
return AgentResult(success=False, content=err, truncated=True,
|
|
|
|
|
|
iterations_used=self.context.iteration,
|
|
|
|
|
|
tool_calls_made=self.context.tool_calls_made,
|
|
|
|
|
|
steps=steps, error=str(e))
|
|
|
|
|
|
|
2026-05-01 11:31:48 +08:00
|
|
|
|
# 调用 LLM
|
|
|
|
|
|
try:
|
|
|
|
|
|
response = await llm.chat(
|
|
|
|
|
|
messages=messages,
|
|
|
|
|
|
tools=tool_schemas if has_tools and self.context.iteration == 1 else
|
|
|
|
|
|
(tool_schemas if has_tools else None),
|
|
|
|
|
|
iteration=self.context.iteration,
|
2026-05-01 19:32:59 +08:00
|
|
|
|
on_completion=_llm_callback,
|
2026-05-01 11:31:48 +08:00
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
err_str = str(e)
|
|
|
|
|
|
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
|
|
|
|
|
|
if self.context.iteration < max_iter and self._is_retryable(err_str):
|
2026-05-01 19:32:59 +08:00
|
|
|
|
steps.append(AgentStep(
|
|
|
|
|
|
iteration=self.context.iteration,
|
|
|
|
|
|
type="tool_result",
|
|
|
|
|
|
content=f"LLM 调用失败(可重试): {err_str}",
|
|
|
|
|
|
))
|
2026-05-01 11:31:48 +08:00
|
|
|
|
continue
|
|
|
|
|
|
return AgentResult(
|
|
|
|
|
|
success=False,
|
|
|
|
|
|
content=f"LLM 调用失败: {err_str}",
|
|
|
|
|
|
iterations_used=self.context.iteration,
|
|
|
|
|
|
tool_calls_made=self.context.tool_calls_made,
|
|
|
|
|
|
error=err_str,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-01 22:30:46 +08:00
|
|
|
|
# 记录 LLM 调用次数(内部计数)
|
|
|
|
|
|
self._llm_invocations += 1
|
|
|
|
|
|
|
2026-05-01 11:31:48 +08:00
|
|
|
|
# 解析工具调用
|
|
|
|
|
|
tool_calls = self._extract_tool_calls(response)
|
|
|
|
|
|
content = self._extract_content(response)
|
2026-05-01 19:32:59 +08:00
|
|
|
|
reasoning = getattr(response, "reasoning_content", None) or (
|
|
|
|
|
|
response.get("reasoning_content") if isinstance(response, dict) else None
|
|
|
|
|
|
)
|
2026-05-01 11:31:48 +08:00
|
|
|
|
|
|
|
|
|
|
if not tool_calls:
|
|
|
|
|
|
# LLM 直接返回文本 → 结束
|
|
|
|
|
|
self.context.add_assistant_message(content)
|
|
|
|
|
|
final_text = content or "(模型未返回有效内容)"
|
2026-05-01 19:32:59 +08:00
|
|
|
|
steps.append(AgentStep(
|
|
|
|
|
|
iteration=self.context.iteration,
|
|
|
|
|
|
type="final",
|
|
|
|
|
|
content=final_text,
|
|
|
|
|
|
reasoning=reasoning,
|
|
|
|
|
|
))
|
2026-05-01 11:31:48 +08:00
|
|
|
|
# 保存记忆
|
2026-05-01 19:32:59 +08:00
|
|
|
|
await self.memory.save_context(user_input, final_text, self.context.messages)
|
2026-05-01 11:31:48 +08:00
|
|
|
|
return AgentResult(
|
|
|
|
|
|
success=True,
|
|
|
|
|
|
content=final_text,
|
|
|
|
|
|
iterations_used=self.context.iteration,
|
|
|
|
|
|
tool_calls_made=self.context.tool_calls_made,
|
2026-05-01 19:32:59 +08:00
|
|
|
|
steps=steps,
|
2026-05-01 11:31:48 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-01 19:32:59 +08:00
|
|
|
|
# 有工具调用 → 先记录 assistant 消息(含 tool_calls)
|
2026-05-01 11:31:48 +08:00
|
|
|
|
self.context.add_assistant_message(content or "", tool_calls, reasoning)
|
2026-05-01 19:32:59 +08:00
|
|
|
|
|
|
|
|
|
|
# 记录思考步骤(含工具调用意图)
|
|
|
|
|
|
tc_names = [tc["function"]["name"] for tc in tool_calls]
|
|
|
|
|
|
tc_args_list = []
|
|
|
|
|
|
for tc in tool_calls:
|
|
|
|
|
|
try:
|
|
|
|
|
|
tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
|
|
|
|
|
|
except (json.JSONDecodeError, TypeError):
|
|
|
|
|
|
tc_args_list.append({})
|
|
|
|
|
|
|
|
|
|
|
|
steps.append(AgentStep(
|
|
|
|
|
|
iteration=self.context.iteration,
|
|
|
|
|
|
type="think",
|
|
|
|
|
|
content=content or f"调用工具: {', '.join(tc_names)}",
|
|
|
|
|
|
reasoning=reasoning,
|
|
|
|
|
|
tool_name=tc_names[0] if len(tc_names) == 1 else None,
|
|
|
|
|
|
tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
|
|
|
|
|
|
))
|
|
|
|
|
|
|
2026-05-01 11:31:48 +08:00
|
|
|
|
if self.execution_logger:
|
|
|
|
|
|
self.execution_logger.info(
|
|
|
|
|
|
f"Agent 调用 {len(tool_calls)} 个工具",
|
2026-05-01 19:32:59 +08:00
|
|
|
|
data={"tool_calls": tc_names,
|
2026-05-01 11:31:48 +08:00
|
|
|
|
"iteration": self.context.iteration},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 逐一执行工具
|
|
|
|
|
|
for tc in tool_calls:
|
|
|
|
|
|
tfn = tc.get("function", {})
|
|
|
|
|
|
tname = tfn.get("name", "unknown")
|
|
|
|
|
|
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
targs = json.loads(tfn.get("arguments", "{}"))
|
|
|
|
|
|
except (json.JSONDecodeError, TypeError):
|
|
|
|
|
|
targs = {}
|
|
|
|
|
|
|
|
|
|
|
|
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
|
|
|
|
|
|
result = await self.tool_manager.execute(tname, targs)
|
|
|
|
|
|
|
2026-05-01 19:32:59 +08:00
|
|
|
|
steps.append(AgentStep(
|
|
|
|
|
|
iteration=self.context.iteration,
|
|
|
|
|
|
type="tool_result",
|
|
|
|
|
|
content=f"工具 {tname} 返回结果",
|
|
|
|
|
|
tool_name=tname,
|
|
|
|
|
|
tool_input=targs,
|
|
|
|
|
|
tool_result=result[:500] + "..." if len(result) > 500 else result,
|
|
|
|
|
|
))
|
|
|
|
|
|
|
2026-05-01 11:31:48 +08:00
|
|
|
|
self.context.add_tool_result(tcid, tname, result)
|
|
|
|
|
|
self.context.tool_calls_made += 1
|
|
|
|
|
|
|
2026-05-01 22:30:46 +08:00
|
|
|
|
# 预算检查:工具调用次数
|
|
|
|
|
|
if self.context.tool_calls_made > budget.max_tool_calls:
|
|
|
|
|
|
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
|
|
|
|
|
|
logger.warning(err)
|
|
|
|
|
|
steps.append(AgentStep(iteration=self.context.iteration, type="tool_result",
|
|
|
|
|
|
content=err, tool_name=tname))
|
|
|
|
|
|
return AgentResult(success=False, content=err, truncated=True,
|
|
|
|
|
|
iterations_used=self.context.iteration,
|
|
|
|
|
|
tool_calls_made=self.context.tool_calls_made,
|
|
|
|
|
|
steps=steps, error=err)
|
|
|
|
|
|
|
2026-05-01 11:31:48 +08:00
|
|
|
|
if self.on_tool_executed:
|
|
|
|
|
|
try:
|
|
|
|
|
|
await self.on_tool_executed(tname)
|
2026-05-01 22:30:46 +08:00
|
|
|
|
except WorkflowExecutionError:
|
|
|
|
|
|
raise
|
2026-05-01 11:31:48 +08:00
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
if self.execution_logger:
|
|
|
|
|
|
preview = result[:300] + "..." if len(result) > 300 else result
|
|
|
|
|
|
self.execution_logger.info(
|
|
|
|
|
|
f"工具 {tname} 执行完成",
|
|
|
|
|
|
data={"tool_name": tname, "result_preview": preview},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 达到最大迭代次数
|
|
|
|
|
|
last_content = ""
|
|
|
|
|
|
for m in reversed(self.context.messages):
|
|
|
|
|
|
if m.get("role") == "assistant" and m.get("content"):
|
|
|
|
|
|
last_content = m["content"]
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
|
2026-05-01 19:32:59 +08:00
|
|
|
|
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages)
|
|
|
|
|
|
if last_content:
|
|
|
|
|
|
steps.append(AgentStep(
|
|
|
|
|
|
iteration=self.context.iteration,
|
|
|
|
|
|
type="final",
|
|
|
|
|
|
content=last_content,
|
|
|
|
|
|
))
|
2026-05-01 11:31:48 +08:00
|
|
|
|
return AgentResult(
|
|
|
|
|
|
success=True,
|
|
|
|
|
|
content=last_content or "已达最大迭代次数,但模型未返回最终回答。",
|
|
|
|
|
|
truncated=True,
|
|
|
|
|
|
iterations_used=self.context.iteration,
|
|
|
|
|
|
tool_calls_made=self.context.tool_calls_made,
|
2026-05-01 19:32:59 +08:00
|
|
|
|
steps=steps,
|
2026-05-01 11:31:48 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-01 22:30:46 +08:00
|
|
|
|
async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
流式执行 Agent 单轮对话。
|
|
|
|
|
|
|
|
|
|
|
|
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件:
|
|
|
|
|
|
- think: LLM 思考中,准备调用工具
|
|
|
|
|
|
- tool_call: 即将执行工具
|
|
|
|
|
|
- tool_result: 工具执行完毕
|
|
|
|
|
|
- final: 最终回答
|
|
|
|
|
|
- error: 出错/预算超限
|
|
|
|
|
|
"""
|
|
|
|
|
|
max_iter = max(1, self.config.llm.max_iterations)
|
|
|
|
|
|
self.context.iteration = 0
|
|
|
|
|
|
self.context.tool_calls_made = 0
|
|
|
|
|
|
|
|
|
|
|
|
# 1. 首次运行时加载长期记忆到 system prompt
|
|
|
|
|
|
if not self._memory_context_loaded:
|
|
|
|
|
|
await self._inject_memory_context(user_input)
|
|
|
|
|
|
self._memory_context_loaded = True
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 追加用户消息
|
|
|
|
|
|
self.context.add_user_message(user_input)
|
|
|
|
|
|
|
|
|
|
|
|
# 3. ReAct 循环
|
|
|
|
|
|
llm = _LLMClient(self.config.llm)
|
|
|
|
|
|
tool_schemas = self.tool_manager.get_tool_schemas()
|
|
|
|
|
|
has_tools = self.tool_manager.has_tools()
|
|
|
|
|
|
steps: List[AgentStep] = []
|
|
|
|
|
|
|
|
|
|
|
|
llm_callback_ctx = {"step_type": "think", "tool_name": None}
|
|
|
|
|
|
|
|
|
|
|
|
def _llm_callback(metrics: Dict[str, Any]):
|
|
|
|
|
|
if self.on_llm_call:
|
|
|
|
|
|
metrics.update({
|
|
|
|
|
|
"session_id": self.context.session_id,
|
|
|
|
|
|
"user_id": self.config.user_id,
|
|
|
|
|
|
"step_type": llm_callback_ctx["step_type"],
|
|
|
|
|
|
"tool_name": llm_callback_ctx["tool_name"],
|
|
|
|
|
|
})
|
|
|
|
|
|
self.on_llm_call(metrics)
|
|
|
|
|
|
|
|
|
|
|
|
while self.context.iteration < max_iter:
|
|
|
|
|
|
self.context.iteration += 1
|
|
|
|
|
|
messages = self.memory.trim_messages(self.context.messages)
|
|
|
|
|
|
|
|
|
|
|
|
# 预算检查:LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
|
|
|
|
|
|
budget = self.config.budget
|
|
|
|
|
|
if self._llm_invocations >= budget.max_llm_invocations:
|
|
|
|
|
|
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
|
|
|
|
|
|
logger.warning(err)
|
|
|
|
|
|
yield {"type": "error", "content": err, "iteration": self.context.iteration,
|
|
|
|
|
|
"truncated": True}
|
|
|
|
|
|
await self.memory.save_context(user_input, err, self.context.messages)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# 调用外部 LLM 预算回调(WorkflowEngine 注入)
|
|
|
|
|
|
if self.on_llm_invocation:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.on_llm_invocation()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
err = f"LLM 调用超出工作流预算: {e}"
|
|
|
|
|
|
logger.warning(err)
|
|
|
|
|
|
yield {"type": "error", "content": err, "iteration": self.context.iteration,
|
|
|
|
|
|
"truncated": True}
|
|
|
|
|
|
return
|
|
|
|
|
|
|
2026-05-02 00:38:41 +08:00
|
|
|
|
# think 事件:告知前端 Agent 正在思考(让 UI 即时反馈,避免假死感)
|
|
|
|
|
|
yield {"type": "think", "content": "", "reasoning": None, "iteration": self.context.iteration}
|
|
|
|
|
|
|
2026-05-01 22:30:46 +08:00
|
|
|
|
# 调用 LLM
|
|
|
|
|
|
try:
|
|
|
|
|
|
response = await llm.chat(
|
|
|
|
|
|
messages=messages,
|
|
|
|
|
|
tools=tool_schemas if has_tools and self.context.iteration == 1 else
|
|
|
|
|
|
(tool_schemas if has_tools else None),
|
|
|
|
|
|
iteration=self.context.iteration,
|
|
|
|
|
|
on_completion=_llm_callback,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
err_str = str(e)
|
|
|
|
|
|
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
|
|
|
|
|
|
if self.context.iteration < max_iter and self._is_retryable(err_str):
|
|
|
|
|
|
yield {"type": "error", "content": f"LLM 调用失败(可重试): {err_str}",
|
|
|
|
|
|
"iteration": self.context.iteration}
|
|
|
|
|
|
continue
|
|
|
|
|
|
yield {"type": "error", "content": f"LLM 调用失败: {err_str}",
|
|
|
|
|
|
"iteration": self.context.iteration}
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# 记录 LLM 调用次数(内部计数)
|
|
|
|
|
|
self._llm_invocations += 1
|
|
|
|
|
|
|
|
|
|
|
|
# 解析工具调用
|
|
|
|
|
|
tool_calls = self._extract_tool_calls(response)
|
|
|
|
|
|
content = self._extract_content(response)
|
|
|
|
|
|
reasoning = getattr(response, "reasoning_content", None) or (
|
|
|
|
|
|
response.get("reasoning_content") if isinstance(response, dict) else None
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not tool_calls:
|
|
|
|
|
|
# LLM 直接返回文本 → 结束
|
|
|
|
|
|
self.context.add_assistant_message(content)
|
|
|
|
|
|
final_text = content or "(模型未返回有效内容)"
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"type": "final",
|
|
|
|
|
|
"content": final_text,
|
|
|
|
|
|
"reasoning": reasoning,
|
|
|
|
|
|
"iteration": self.context.iteration,
|
|
|
|
|
|
"iterations_used": self.context.iteration,
|
|
|
|
|
|
"tool_calls_made": self.context.tool_calls_made,
|
|
|
|
|
|
"session_id": self.context.session_id,
|
|
|
|
|
|
}
|
|
|
|
|
|
await self.memory.save_context(user_input, final_text, self.context.messages)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# 有工具调用 → 先记录 assistant 消息
|
|
|
|
|
|
self.context.add_assistant_message(content or "", tool_calls, reasoning)
|
|
|
|
|
|
|
|
|
|
|
|
# yield think 事件
|
|
|
|
|
|
tc_names = [tc["function"]["name"] for tc in tool_calls]
|
|
|
|
|
|
tc_args_list = []
|
|
|
|
|
|
for tc in tool_calls:
|
|
|
|
|
|
try:
|
|
|
|
|
|
tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
|
|
|
|
|
|
except (json.JSONDecodeError, TypeError):
|
|
|
|
|
|
tc_args_list.append({})
|
|
|
|
|
|
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"type": "think",
|
|
|
|
|
|
"content": content or f"调用工具: {', '.join(tc_names)}",
|
|
|
|
|
|
"reasoning": reasoning,
|
|
|
|
|
|
"tool_names": tc_names,
|
|
|
|
|
|
"iteration": self.context.iteration,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
steps.append(AgentStep(
|
|
|
|
|
|
iteration=self.context.iteration,
|
|
|
|
|
|
type="think",
|
|
|
|
|
|
content=content or f"调用工具: {', '.join(tc_names)}",
|
|
|
|
|
|
reasoning=reasoning,
|
|
|
|
|
|
tool_name=tc_names[0] if len(tc_names) == 1 else None,
|
|
|
|
|
|
tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
if self.execution_logger:
|
|
|
|
|
|
self.execution_logger.info(
|
|
|
|
|
|
f"Agent 调用 {len(tool_calls)} 个工具",
|
|
|
|
|
|
data={"tool_calls": tc_names, "iteration": self.context.iteration},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 逐一执行工具
|
|
|
|
|
|
for tc in tool_calls:
|
|
|
|
|
|
tfn = tc.get("function", {})
|
|
|
|
|
|
tname = tfn.get("name", "unknown")
|
|
|
|
|
|
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
targs = json.loads(tfn.get("arguments", "{}"))
|
|
|
|
|
|
except (json.JSONDecodeError, TypeError):
|
|
|
|
|
|
targs = {}
|
|
|
|
|
|
|
|
|
|
|
|
# yield tool_call 事件
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"type": "tool_call",
|
|
|
|
|
|
"name": tname,
|
|
|
|
|
|
"input": targs,
|
|
|
|
|
|
"iteration": self.context.iteration,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
|
|
|
|
|
|
result = await self.tool_manager.execute(tname, targs)
|
|
|
|
|
|
|
|
|
|
|
|
# yield tool_result 事件
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"type": "tool_result",
|
|
|
|
|
|
"name": tname,
|
|
|
|
|
|
"result": result[:500] + "..." if len(result) > 500 else result,
|
|
|
|
|
|
"iteration": self.context.iteration,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
steps.append(AgentStep(
|
|
|
|
|
|
iteration=self.context.iteration,
|
|
|
|
|
|
type="tool_result",
|
|
|
|
|
|
content=f"工具 {tname} 返回结果",
|
|
|
|
|
|
tool_name=tname,
|
|
|
|
|
|
tool_input=targs,
|
|
|
|
|
|
tool_result=result[:500] + "..." if len(result) > 500 else result,
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
self.context.add_tool_result(tcid, tname, result)
|
|
|
|
|
|
self.context.tool_calls_made += 1
|
|
|
|
|
|
|
|
|
|
|
|
# 预算检查:工具调用次数
|
|
|
|
|
|
if self.context.tool_calls_made > budget.max_tool_calls:
|
|
|
|
|
|
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
|
|
|
|
|
|
logger.warning(err)
|
|
|
|
|
|
yield {"type": "error", "content": err, "iteration": self.context.iteration,
|
|
|
|
|
|
"truncated": True}
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
if self.on_tool_executed:
|
|
|
|
|
|
try:
|
|
|
|
|
|
await self.on_tool_executed(tname)
|
|
|
|
|
|
except WorkflowExecutionError:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
if self.execution_logger:
|
|
|
|
|
|
preview = result[:300] + "..." if len(result) > 300 else result
|
|
|
|
|
|
self.execution_logger.info(
|
|
|
|
|
|
f"工具 {tname} 执行完成",
|
|
|
|
|
|
data={"tool_name": tname, "result_preview": preview},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 达到最大迭代次数
|
|
|
|
|
|
last_content = ""
|
|
|
|
|
|
for m in reversed(self.context.messages):
|
|
|
|
|
|
if m.get("role") == "assistant" and m.get("content"):
|
|
|
|
|
|
last_content = m["content"]
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
|
|
|
|
|
|
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages)
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"type": "final",
|
|
|
|
|
|
"content": last_content or "已达最大迭代次数,但模型未返回最终回答。",
|
|
|
|
|
|
"iteration": self.context.iteration,
|
|
|
|
|
|
"iterations_used": self.context.iteration,
|
|
|
|
|
|
"tool_calls_made": self.context.tool_calls_made,
|
|
|
|
|
|
"truncated": True,
|
|
|
|
|
|
"session_id": self.context.session_id,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async def _inject_memory_context(self, query: str = "") -> None:
|
2026-05-01 11:31:48 +08:00
|
|
|
|
"""加载长期记忆并注入 system prompt。"""
|
2026-05-01 22:30:46 +08:00
|
|
|
|
mem_text = await self.memory.initialize(query=query)
|
2026-05-01 11:31:48 +08:00
|
|
|
|
if mem_text:
|
|
|
|
|
|
enriched = (
|
|
|
|
|
|
self.config.system_prompt.rstrip("\n")
|
|
|
|
|
|
+ "\n\n"
|
|
|
|
|
|
+ mem_text
|
|
|
|
|
|
)
|
|
|
|
|
|
self.context.set_system_prompt(enriched)
|
|
|
|
|
|
logger.info("Agent 已注入长期记忆上下文")
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _extract_tool_calls(response: Any) -> List[Dict[str, Any]]:
|
|
|
|
|
|
"""从 LLM 响应中提取工具调用列表。"""
|
|
|
|
|
|
if response is None:
|
|
|
|
|
|
return []
|
|
|
|
|
|
# OpenAI SDK 格式
|
|
|
|
|
|
if hasattr(response, "tool_calls") and response.tool_calls:
|
|
|
|
|
|
result = []
|
|
|
|
|
|
for tc in response.tool_calls:
|
|
|
|
|
|
result.append({
|
|
|
|
|
|
"id": tc.id,
|
|
|
|
|
|
"type": tc.type,
|
|
|
|
|
|
"function": {
|
|
|
|
|
|
"name": tc.function.name,
|
|
|
|
|
|
"arguments": tc.function.arguments,
|
|
|
|
|
|
},
|
|
|
|
|
|
})
|
|
|
|
|
|
return result
|
|
|
|
|
|
# 字典格式
|
|
|
|
|
|
if isinstance(response, dict):
|
|
|
|
|
|
tc_list = response.get("tool_calls") or []
|
|
|
|
|
|
if tc_list:
|
|
|
|
|
|
return tc_list
|
|
|
|
|
|
# 检查 content 中是否嵌入了 DSML
|
|
|
|
|
|
content = response.get("content") or ""
|
|
|
|
|
|
if "invoke" in content or "function_call" in content:
|
|
|
|
|
|
from app.services.llm_service import _parse_dsml_tool_invocations
|
|
|
|
|
|
dsml = _parse_dsml_tool_invocations(content)
|
|
|
|
|
|
if dsml:
|
|
|
|
|
|
return [
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": f"dsml-{i}",
|
|
|
|
|
|
"type": "function",
|
|
|
|
|
|
"function": {
|
|
|
|
|
|
"name": inv["name"],
|
|
|
|
|
|
"arguments": json.dumps(inv["arguments"], ensure_ascii=False),
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
for i, inv in enumerate(dsml)
|
|
|
|
|
|
]
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _extract_content(response: Any) -> str:
|
|
|
|
|
|
"""从 LLM 响应中提取文本内容。"""
|
|
|
|
|
|
if response is None:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
if hasattr(response, "content"):
|
|
|
|
|
|
return response.content or ""
|
|
|
|
|
|
if isinstance(response, dict):
|
|
|
|
|
|
return response.get("content") or ""
|
|
|
|
|
|
return str(response)
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _is_retryable(err_str: str) -> bool:
|
|
|
|
|
|
"""判断错误是否可重试。"""
|
|
|
|
|
|
err_lower = err_str.lower()
|
|
|
|
|
|
return any(kw in err_lower for kw in _RETRYABLE_ERRORS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class _LLMClient:
|
|
|
|
|
|
"""轻量 LLM 客户端包装,复用已有 LLMService 能力。"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, config: Any):
|
|
|
|
|
|
from app.services.llm_service import llm_service
|
|
|
|
|
|
self._service = llm_service
|
|
|
|
|
|
self._config = config
|
|
|
|
|
|
|
|
|
|
|
|
async def chat(
|
|
|
|
|
|
self,
|
|
|
|
|
|
messages: List[Dict[str, Any]],
|
|
|
|
|
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
|
|
|
|
iteration: int = 1,
|
2026-05-01 19:32:59 +08:00
|
|
|
|
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
2026-05-01 11:31:48 +08:00
|
|
|
|
) -> Any:
|
|
|
|
|
|
"""
|
|
|
|
|
|
调用 LLM。
|
|
|
|
|
|
优先使用 llm_service.call_openai_with_tools(支持 ReAct 的多次工具调用)。
|
|
|
|
|
|
|
|
|
|
|
|
但为避免外层 ReAct 与内部 ReAct 冲突:
|
|
|
|
|
|
- 第 1 轮:使用标准 chat(无内部 ReAct),由外层 AgentRuntime 控制循环
|
|
|
|
|
|
- 后续轮次:也使用标准 chat,仅追加工具结果
|
|
|
|
|
|
"""
|
|
|
|
|
|
# 直接用 OpenAI/DeepSeek SDK 调用,由 AgentRuntime 控制循环
|
|
|
|
|
|
from openai import AsyncOpenAI
|
|
|
|
|
|
from app.core.config import settings
|
|
|
|
|
|
|
|
|
|
|
|
# 优先从配置读取,其次从 settings(.env 加载),最后 os.environ
|
|
|
|
|
|
api_key = self._config.api_key or settings.OPENAI_API_KEY or ""
|
|
|
|
|
|
base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""
|
|
|
|
|
|
|
|
|
|
|
|
if not api_key or api_key == "your-openai-api-key":
|
|
|
|
|
|
# 尝试 DeepSeek
|
|
|
|
|
|
api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
|
|
|
|
|
|
base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"
|
|
|
|
|
|
|
|
|
|
|
|
if not api_key:
|
|
|
|
|
|
raise ValueError("未配置 API Key")
|
|
|
|
|
|
|
|
|
|
|
|
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
|
|
|
|
|
|
|
|
|
|
kwargs: Dict[str, Any] = {
|
|
|
|
|
|
"model": self._config.model,
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"temperature": self._config.temperature,
|
|
|
|
|
|
"timeout": self._config.request_timeout,
|
|
|
|
|
|
}
|
|
|
|
|
|
if self._config.max_tokens:
|
|
|
|
|
|
kwargs["max_tokens"] = self._config.max_tokens
|
|
|
|
|
|
if self._config.extra_body:
|
|
|
|
|
|
kwargs["extra_body"] = self._config.extra_body
|
|
|
|
|
|
if tools:
|
2026-05-02 00:38:41 +08:00
|
|
|
|
# Normalize tool schemas to OpenAI format: custom tools from the
|
|
|
|
|
|
# marketplace may be stored as {"name":..., "parameters":...}
|
|
|
|
|
|
# or {"function":{...}} without the required "type": "function".
|
|
|
|
|
|
normalized = []
|
|
|
|
|
|
for t in tools:
|
|
|
|
|
|
if isinstance(t, dict):
|
|
|
|
|
|
if t.get("type") == "function":
|
|
|
|
|
|
# Already in correct format: {"type":"function","function":{...}}
|
|
|
|
|
|
normalized.append(t)
|
|
|
|
|
|
elif "function" in t:
|
|
|
|
|
|
# Has function key but missing type: {"function":{...}}
|
|
|
|
|
|
normalized.append({"type": "function", "function": t["function"]})
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Raw schema: {"name":..., "parameters":...}
|
|
|
|
|
|
normalized.append({"type": "function", "function": t})
|
|
|
|
|
|
else:
|
|
|
|
|
|
normalized.append(t)
|
|
|
|
|
|
kwargs["tools"] = normalized
|
2026-05-01 11:31:48 +08:00
|
|
|
|
kwargs["tool_choice"] = "auto"
|
|
|
|
|
|
|
2026-05-01 19:32:59 +08:00
|
|
|
|
start_time = time.perf_counter()
|
|
|
|
|
|
try:
|
|
|
|
|
|
response = await client.chat.completions.create(**kwargs)
|
|
|
|
|
|
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
|
|
|
|
|
message = response.choices[0].message
|
|
|
|
|
|
|
|
|
|
|
|
# 提取 token 用量
|
|
|
|
|
|
usage = getattr(response, "usage", None)
|
|
|
|
|
|
prompt_tokens = usage.prompt_tokens if usage else 0
|
|
|
|
|
|
completion_tokens = usage.completion_tokens if usage else 0
|
|
|
|
|
|
total_tokens = usage.total_tokens if usage else 0
|
|
|
|
|
|
|
|
|
|
|
|
# 调用完成回调
|
|
|
|
|
|
if on_completion:
|
|
|
|
|
|
on_completion({
|
|
|
|
|
|
"model": self._config.model,
|
|
|
|
|
|
"provider": self._config.provider,
|
|
|
|
|
|
"prompt_tokens": prompt_tokens or 0,
|
|
|
|
|
|
"completion_tokens": completion_tokens or 0,
|
|
|
|
|
|
"total_tokens": total_tokens or 0,
|
|
|
|
|
|
"latency_ms": latency_ms,
|
|
|
|
|
|
"iteration_number": iteration,
|
|
|
|
|
|
"status": "success",
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
return message
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
|
|
|
|
|
if on_completion:
|
|
|
|
|
|
on_completion({
|
|
|
|
|
|
"model": self._config.model,
|
|
|
|
|
|
"provider": self._config.provider,
|
|
|
|
|
|
"prompt_tokens": 0,
|
|
|
|
|
|
"completion_tokens": 0,
|
|
|
|
|
|
"total_tokens": 0,
|
|
|
|
|
|
"latency_ms": latency_ms,
|
|
|
|
|
|
"iteration_number": iteration,
|
|
|
|
|
|
"status": "error",
|
|
|
|
|
|
"error_message": str(e),
|
|
|
|
|
|
})
|
|
|
|
|
|
raise
|