feat: Agent 运行时、对话 API、作业助手与引擎修复及前端执行超时

- agent_runtime 模块与 agent_chat API,前端 AgentChat 视图与路由对接
- workflow_engine: code 节点命名空间与 json 引用修复
- llm_service: 工具调用 extra_body(如 DeepSeek)
- create_homework_manager_agent / _3 脚本与测试脚本扩展
- frontend: WORKFLOW_EXECUTION_HTTP_TIMEOUT_MS、AgentChatPreview/MainLayout 等
- 文档:架构说明与自主 Agent 改造完成情况

Made-with: Cursor
This commit is contained in:
renjianbo
2026-05-01 11:31:48 +08:00
parent 4366312946
commit 09467568ec
23 changed files with 2798 additions and 77 deletions

View File

@@ -0,0 +1,330 @@
"""
Agent Runtime 核心 —— 自主 ReAct 循环。
流程:
1. 接收用户输入 → 追加到消息列表
2. 调用 LLM携带 tools schema
3. 如果 LLM 返回工具调用 → 执行工具 → 结果追加到消息列表 → 回到 2
4. 如果 LLM 返回文本 → 作为最终回答返回
5. 超过 max_iterations → 强制终止
"""
from __future__ import annotations
import json
import logging
from typing import Any, Callable, Dict, List, Optional
from app.agent_runtime.schemas import (
AgentConfig,
AgentResult,
)
from app.agent_runtime.context import AgentContext
from app.agent_runtime.memory import AgentMemory
from app.agent_runtime.tool_manager import AgentToolManager
logger = logging.getLogger(__name__)
# 可重试的 API 异常
_RETRYABLE_ERRORS = (
"timed out",
"timeout",
"connection error",
"temporarily unavailable",
"server disconnected",
"rate limit",
"too many requests",
"internal server error",
"service unavailable",
)
class AgentRuntime:
"""
自主 Agent 运行时。
用法:
runtime = AgentRuntime(config)
result = await runtime.run("帮我写个Python脚本")
"""
def __init__(
self,
config: Optional[AgentConfig] = None,
context: Optional[AgentContext] = None,
memory: Optional[AgentMemory] = None,
tool_manager: Optional[AgentToolManager] = None,
execution_logger: Optional[Any] = None,
on_tool_executed: Optional[Callable[[str], Any]] = None,
):
self.config = config or AgentConfig()
self.context = context or AgentContext(
system_prompt=self.config.system_prompt,
user_id=self.config.user_id,
)
self.memory = memory or AgentMemory(
scope_id=self.config.user_id or self.config.name,
max_history=self.config.memory.max_history_messages,
persist=self.config.memory.persist_to_db,
)
self.tool_manager = tool_manager or AgentToolManager(
include_tools=self.config.tools.include_tools,
exclude_tools=self.config.tools.exclude_tools,
)
self.execution_logger = execution_logger
self.on_tool_executed = on_tool_executed
self._memory_context_loaded = False
async def run(self, user_input: str) -> AgentResult:
"""
执行 Agent 单轮对话。
流程:加载记忆 → 追加用户消息 → ReAct 循环 → 保存记忆 → 返回结果。
"""
max_iter = max(1, self.config.llm.max_iterations)
self.context.iteration = 0
self.context.tool_calls_made = 0
# 1. 首次运行时加载长期记忆到 system prompt
if not self._memory_context_loaded:
await self._inject_memory_context()
self._memory_context_loaded = True
# 2. 追加用户消息
self.context.add_user_message(user_input)
# 3. ReAct 循环
llm = _LLMClient(self.config.llm)
tool_schemas = self.tool_manager.get_tool_schemas()
has_tools = self.tool_manager.has_tools()
while self.context.iteration < max_iter:
self.context.iteration += 1
# 裁剪过长历史
messages = self.memory.trim_messages(self.context.messages)
# 调用 LLM
try:
response = await llm.chat(
messages=messages,
tools=tool_schemas if has_tools and self.context.iteration == 1 else
(tool_schemas if has_tools else None),
iteration=self.context.iteration,
)
except Exception as e:
err_str = str(e)
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
if self.context.iteration < max_iter and self._is_retryable(err_str):
continue
return AgentResult(
success=False,
content=f"LLM 调用失败: {err_str}",
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
error=err_str,
)
# 解析工具调用
tool_calls = self._extract_tool_calls(response)
content = self._extract_content(response)
if not tool_calls:
# LLM 直接返回文本 → 结束
self.context.add_assistant_message(content)
final_text = content or "(模型未返回有效内容)"
# 保存记忆
await self.memory.save_context(user_input, final_text)
return AgentResult(
success=True,
content=final_text,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
)
# 有工具调用 → 先记录 assistant 消息(含 tool_calls + reasoning_content
reasoning = getattr(response, "reasoning_content", None) or (
response.get("reasoning_content") if isinstance(response, dict) else None
)
self.context.add_assistant_message(content or "", tool_calls, reasoning)
if self.execution_logger:
self.execution_logger.info(
f"Agent 调用 {len(tool_calls)} 个工具",
data={"tool_calls": [tc["function"]["name"] for tc in tool_calls],
"iteration": self.context.iteration},
)
# 逐一执行工具
for tc in tool_calls:
tfn = tc.get("function", {})
tname = tfn.get("name", "unknown")
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
try:
targs = json.loads(tfn.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
targs = {}
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
result = await self.tool_manager.execute(tname, targs)
self.context.add_tool_result(tcid, tname, result)
self.context.tool_calls_made += 1
if self.on_tool_executed:
try:
await self.on_tool_executed(tname)
except Exception:
pass
if self.execution_logger:
preview = result[:300] + "..." if len(result) > 300 else result
self.execution_logger.info(
f"工具 {tname} 执行完成",
data={"tool_name": tname, "result_preview": preview},
)
# 达到最大迭代次数
last_content = ""
for m in reversed(self.context.messages):
if m.get("role") == "assistant" and m.get("content"):
last_content = m["content"]
break
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)")
return AgentResult(
success=True,
content=last_content or "已达最大迭代次数,但模型未返回最终回答。",
truncated=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
)
async def _inject_memory_context(self) -> None:
"""加载长期记忆并注入 system prompt。"""
mem_text = await self.memory.initialize()
if mem_text:
enriched = (
self.config.system_prompt.rstrip("\n")
+ "\n\n"
+ mem_text
)
self.context.set_system_prompt(enriched)
logger.info("Agent 已注入长期记忆上下文")
@staticmethod
def _extract_tool_calls(response: Any) -> List[Dict[str, Any]]:
"""从 LLM 响应中提取工具调用列表。"""
if response is None:
return []
# OpenAI SDK 格式
if hasattr(response, "tool_calls") and response.tool_calls:
result = []
for tc in response.tool_calls:
result.append({
"id": tc.id,
"type": tc.type,
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
})
return result
# 字典格式
if isinstance(response, dict):
tc_list = response.get("tool_calls") or []
if tc_list:
return tc_list
# 检查 content 中是否嵌入了 DSML
content = response.get("content") or ""
if "invoke" in content or "function_call" in content:
from app.services.llm_service import _parse_dsml_tool_invocations
dsml = _parse_dsml_tool_invocations(content)
if dsml:
return [
{
"id": f"dsml-{i}",
"type": "function",
"function": {
"name": inv["name"],
"arguments": json.dumps(inv["arguments"], ensure_ascii=False),
},
}
for i, inv in enumerate(dsml)
]
return []
@staticmethod
def _extract_content(response: Any) -> str:
"""从 LLM 响应中提取文本内容。"""
if response is None:
return ""
if hasattr(response, "content"):
return response.content or ""
if isinstance(response, dict):
return response.get("content") or ""
return str(response)
@staticmethod
def _is_retryable(err_str: str) -> bool:
"""判断错误是否可重试。"""
err_lower = err_str.lower()
return any(kw in err_lower for kw in _RETRYABLE_ERRORS)
class _LLMClient:
"""轻量 LLM 客户端包装,复用已有 LLMService 能力。"""
def __init__(self, config: Any):
from app.services.llm_service import llm_service
self._service = llm_service
self._config = config
async def chat(
self,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
iteration: int = 1,
) -> Any:
"""
调用 LLM。
优先使用 llm_service.call_openai_with_tools支持 ReAct 的多次工具调用)。
但为避免外层 ReAct 与内部 ReAct 冲突:
- 第 1 轮:使用标准 chat无内部 ReAct由外层 AgentRuntime 控制循环
- 后续轮次:也使用标准 chat仅追加工具结果
"""
# 直接用 OpenAI/DeepSeek SDK 调用,由 AgentRuntime 控制循环
from openai import AsyncOpenAI
from app.core.config import settings
# 优先从配置读取,其次从 settings.env 加载),最后 os.environ
api_key = self._config.api_key or settings.OPENAI_API_KEY or ""
base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""
if not api_key or api_key == "your-openai-api-key":
# 尝试 DeepSeek
api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"
if not api_key:
raise ValueError("未配置 API Key")
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
kwargs: Dict[str, Any] = {
"model": self._config.model,
"messages": messages,
"temperature": self._config.temperature,
"timeout": self._config.request_timeout,
}
if self._config.max_tokens:
kwargs["max_tokens"] = self._config.max_tokens
if self._config.extra_body:
kwargs["extra_body"] = self._config.extra_body
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = "auto"
response = await client.chat.completions.create(**kwargs)
return response.choices[0].message