Files
aiagent/backend/app/agent_runtime/core.py
renjianbo 9054f42cda feat: Agent间知识共享 — 全局知识池(去重+置信度+过期)
- GlobalKnowledge 模型新增 confidence 和 expires_at 字段
- save_global_knowledge 增加 MD5 去重、置信度评分、TTL过期
- _global_knowledge_search 增加过期过滤、置信度优先排序
- run()/run_stream() 所有完成路径补齐知识提取调用
- 新增 Alembic 迁移 010_add_global_knowledge

Cl  oses #9

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-06 22:04:56 +08:00

1160 lines
50 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Agent Runtime 核心 —— 自主 ReAct 循环。
流程:
1. 接收用户输入 → 追加到消息列表
2. 调用 LLM携带 tools schema
3. 如果 LLM 返回工具调用 → 执行工具 → 结果追加到消息列表 → 回到 2
4. 如果 LLM 返回文本 → 作为最终回答返回
5. 超过 max_iterations → 强制终止
"""
from __future__ import annotations
import hashlib
import json
import logging
import time
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Protocol, TypedDict
from app.agent_runtime.schemas import (
AgentConfig,
AgentResult,
AgentStep,
)
from app.agent_runtime.context import AgentContext
from app.agent_runtime.memory import AgentMemory
from app.agent_runtime.tool_manager import AgentToolManager
from app.core.exceptions import WorkflowExecutionError
from app.services.agent_learning_service import (
extract_pattern_from_result,
format_pattern_hint,
load_relevant_patterns,
save_learning_pattern,
)
logger = logging.getLogger(__name__)
class LLMCallMetrics(TypedDict, total=False):
"""一次 LLM 调用的度量数据"""
agent_id: Optional[str]
session_id: str
user_id: Optional[str]
model: str
provider: Optional[str]
prompt_tokens: int
completion_tokens: int
total_tokens: int
latency_ms: int
iteration_number: int
step_type: str # think / final
tool_name: Optional[str]
status: str # success / error
error_message: Optional[str]
# 可重试的 API 异常
_RETRYABLE_ERRORS = (
"timed out",
"timeout",
"connection error",
"temporarily unavailable",
"server disconnected",
"rate limit",
"too many requests",
"internal server error",
"service unavailable",
)
class AgentRuntime:
"""
自主 Agent 运行时。
用法:
runtime = AgentRuntime(config)
result = await runtime.run("帮我写个Python脚本")
"""
def __init__(
self,
config: Optional[AgentConfig] = None,
context: Optional[AgentContext] = None,
memory: Optional[AgentMemory] = None,
tool_manager: Optional[AgentToolManager] = None,
execution_logger: Optional[Any] = None,
on_tool_executed: Optional[Callable[[str], Any]] = None,
on_llm_call: Optional[Callable[[Dict[str, Any]], Any]] = None,
):
self.config = config or AgentConfig()
self.context = context or AgentContext(
system_prompt=self.config.system_prompt,
user_id=self.config.user_id,
)
_mem_scope = self.config.memory_scope_id or self.config.user_id or self.config.name
self.memory = memory or AgentMemory(
scope_id=_mem_scope,
max_history=self.config.memory.max_history_messages,
persist=self.config.memory.persist_to_db,
)
self.tool_manager = tool_manager or AgentToolManager(
include_tools=self.config.tools.include_tools,
exclude_tools=self.config.tools.exclude_tools,
cache_enabled=self.config.tools.cache_enabled,
cache_tool_whitelist=self.config.tools.cache_tool_whitelist,
cache_ttl_ms=self.config.tools.cache_ttl_ms,
)
self.execution_logger = execution_logger
self.on_tool_executed = on_tool_executed
self.on_llm_call = on_llm_call
self._memory_context_loaded = False
self._llm_invocations = 0
# 自主学习作用域bare 聊天用 "bare"Agent 用 "agent"
self._learning_scope_kind = "bare" if "bare" in str(_mem_scope) else "agent"
# 预算回调:供 WorkflowEngine 注入,使 Agent 内部计数计入工作流预算
# 返回 True 表示预算充足;返回 False 或抛出异常表示超限
self.on_llm_invocation: Optional[Callable[[], Any]] = None
async def run(self, user_input: str) -> AgentResult:
"""
执行 Agent 单轮对话。
流程:加载记忆 → 追加用户消息 → ReAct 循环 → 保存记忆 → 返回结果。
"""
max_iter = max(1, self.config.llm.max_iterations)
self.context.iteration = 0
self.context.tool_calls_made = 0
# 1. 首次运行时加载长期记忆到 system prompt
if not self._memory_context_loaded:
await self._inject_memory_context(user_input)
self._memory_context_loaded = True
# 2. 追加用户消息
self.context.add_user_message(user_input)
# 3. ReAct 循环
llm = _LLMClient(self.config.llm)
tool_schemas = self.tool_manager.get_tool_schemas()
has_tools = self.tool_manager.has_tools()
steps: List[AgentStep] = []
_self_review_attempted = False # 防止无限修正循环
# 构建 LLM 调用回调(包装 on_llm_call补充上下文
llm_callback_ctx = {"step_type": "think", "tool_name": None}
def _llm_callback(metrics: Dict[str, Any]):
if self.on_llm_call:
metrics.update({
"session_id": self.context.session_id,
"user_id": self.config.user_id,
"step_type": llm_callback_ctx["step_type"],
"tool_name": llm_callback_ctx["tool_name"],
})
self.on_llm_call(metrics)
while self.context.iteration < max_iter:
self.context.iteration += 1
# 裁剪过长历史
messages = self.memory.trim_messages(self.context.messages)
# 预算检查LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
budget = self.config.budget
if self._llm_invocations >= budget.max_llm_invocations:
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
logger.warning(err)
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
await self.memory.save_context(user_input, err, self.context.messages)
return AgentResult(success=False, content=err, truncated=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps, error=err)
# 调用外部 LLM 预算回调WorkflowEngine 注入,将 Agent 的 LLM 计入工作流预算)
if self.on_llm_invocation:
try:
self.on_llm_invocation()
except Exception as e:
err = f"LLM 调用超出工作流预算: {e}"
logger.warning(err)
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
await self.memory.save_context(user_input, err, self.context.messages)
return AgentResult(success=False, content=err, truncated=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps, error=str(e))
# 调用 LLM
try:
response = await llm.chat(
messages=messages,
tools=tool_schemas if has_tools and self.context.iteration == 1 else
(tool_schemas if has_tools else None),
iteration=self.context.iteration,
on_completion=_llm_callback,
)
except Exception as e:
err_str = str(e)
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
if self.context.iteration < max_iter and self._is_retryable(err_str):
steps.append(AgentStep(
iteration=self.context.iteration,
type="tool_result",
content=f"LLM 调用失败(可重试): {err_str}",
))
continue
return AgentResult(
success=False,
content=f"LLM 调用失败: {err_str}",
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
error=err_str,
)
# 记录 LLM 调用次数(内部计数)
self._llm_invocations += 1
# 解析工具调用
tool_calls = self._extract_tool_calls(response)
content = self._extract_content(response)
reasoning = getattr(response, "reasoning_content", None) or (
response.get("reasoning_content") if isinstance(response, dict) else None
)
if not tool_calls:
# LLM 直接返回文本 → 结束
self.context.add_assistant_message(content)
final_text = content or "(模型未返回有效内容)"
review_score = 0.0
# 输出质量自检默认关闭Agent 节点可开启)
if self.config.self_review_enabled and not _self_review_attempted:
review = await self._self_review(final_text, task_context=user_input)
steps.append(AgentStep(
iteration=self.context.iteration,
type="tool_result",
content=f"self_review: score={review['score']:.2f} passed={review['passed']}",
tool_name="self_review",
tool_input={"content": final_text[:200]},
tool_result=json.dumps(review, ensure_ascii=False),
))
if review["passed"]:
review_score = review["score"]
logger.info("self_review 通过 (%.2f >= %.2f)", review["score"], review["threshold"])
else:
logger.info("self_review 未通过 (%.2f < %.2f),追加修正", review["score"], review["threshold"])
_self_review_attempted = True
# 追加修正提示
fix_prompt = (
f"你的上一个回答未通过质量检查(评分 {review['score']:.1f}/{review['threshold']})。\n"
f"问题:{''.join(review['issues'][:3])}\n"
f"改进建议:{''.join(review['suggestions'][:3])}\n"
"请修正你的回答,确保满足上述建议。"
)
self.context.add_user_message(fix_prompt)
continue # 回到 ReAct 循环,让 LLM 修正
steps.append(AgentStep(
iteration=self.context.iteration,
type="final",
content=final_text,
reasoning=reasoning,
))
# 保存记忆
await self.memory.save_context(user_input, final_text, self.context.messages)
# 保存学习模式
if self.config.memory.learning_enabled:
await self._save_learning_pattern(
user_input, steps, success=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
)
# 提取知识到全局知识池Agent 间知识共享)
await self._extract_global_knowledge(user_input, final_text, steps, review_score)
return AgentResult(
success=True,
content=final_text,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps,
)
# 有工具调用 → 先记录 assistant 消息(含 tool_calls
self.context.add_assistant_message(content or "", tool_calls, reasoning)
# 记录思考步骤(含工具调用意图)
tc_names = [tc["function"]["name"] for tc in tool_calls]
tc_args_list = []
for tc in tool_calls:
try:
tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
except (json.JSONDecodeError, TypeError):
tc_args_list.append({})
steps.append(AgentStep(
iteration=self.context.iteration,
type="think",
content=content or f"调用工具: {', '.join(tc_names)}",
reasoning=reasoning,
tool_name=tc_names[0] if len(tc_names) == 1 else None,
tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
))
if self.execution_logger:
self.execution_logger.info(
f"Agent 调用 {len(tool_calls)} 个工具",
data={"tool_calls": tc_names,
"iteration": self.context.iteration},
)
# 逐一执行工具
for tc in tool_calls:
tfn = tc.get("function", {})
tname = tfn.get("name", "unknown")
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
try:
targs = json.loads(tfn.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
targs = {}
# 工具执行前审批检查
if tname in self.config.tools.require_approval:
from app.services.approval_manager import approval_manager as _am
logger.info("Agent 工具需审批 [%s]: %s", tname, targs)
approval_req = await _am.submit(
tool_name=tname, args=targs,
timeout_ms=self.config.tools.approval_timeout_ms,
)
decision = approval_req.decision
if decision == "denied":
result = f"[审批拒绝] 工具 {tname} 需要人工审批但被拒绝。"
self.context.add_tool_result(tcid, tname, result)
continue
elif decision == "skip":
result = f"[审批跳过] 工具 {tname} 被跳过。"
self.context.add_tool_result(tcid, tname, result)
continue
# decision == "approved" → 继续执行
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
result = await self.tool_manager.execute(tname, targs)
steps.append(AgentStep(
iteration=self.context.iteration,
type="tool_result",
content=f"工具 {tname} 返回结果",
tool_name=tname,
tool_input=targs,
tool_result=result[:500] + "..." if len(result) > 500 else result,
))
self.context.add_tool_result(tcid, tname, result)
self.context.tool_calls_made += 1
# 预算检查:工具调用次数
if self.context.tool_calls_made > budget.max_tool_calls:
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
logger.warning(err)
steps.append(AgentStep(iteration=self.context.iteration, type="tool_result",
content=err, tool_name=tname))
return AgentResult(success=False, content=err, truncated=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps, error=err)
if self.on_tool_executed:
try:
await self.on_tool_executed(tname)
except WorkflowExecutionError:
raise
except Exception:
pass
if self.execution_logger:
preview = result[:300] + "..." if len(result) > 300 else result
self.execution_logger.info(
f"工具 {tname} 执行完成",
data={"tool_name": tname, "result_preview": preview},
)
# 达到最大迭代次数
last_content = ""
for m in reversed(self.context.messages):
if m.get("role") == "assistant" and m.get("content"):
last_content = m["content"]
break
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages)
# 保存学习模式(即便截断,工具调用模式仍有参考价值)
if self.config.memory.learning_enabled:
await self._save_learning_pattern(
user_input, steps, success=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
)
# 提取知识到全局知识池(即便截断,工具调用序列仍有参考价值)
if last_content:
await self._extract_global_knowledge(user_input, last_content, steps)
if last_content:
steps.append(AgentStep(
iteration=self.context.iteration,
type="final",
content=last_content,
))
return AgentResult(
success=True,
content=last_content or "已达最大迭代次数,但模型未返回最终回答。",
truncated=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
steps=steps,
)
async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]:
"""
流式执行 Agent 单轮对话。
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件:
- think: LLM 思考中,准备调用工具
- tool_call: 即将执行工具
- tool_result: 工具执行完毕
- final: 最终回答
- error: 出错/预算超限
"""
max_iter = max(1, self.config.llm.max_iterations)
self.context.iteration = 0
self.context.tool_calls_made = 0
# 1. 首次运行时加载长期记忆到 system prompt
if not self._memory_context_loaded:
await self._inject_memory_context(user_input)
self._memory_context_loaded = True
# 2. 追加用户消息
self.context.add_user_message(user_input)
# 3. ReAct 循环
llm = _LLMClient(self.config.llm)
tool_schemas = self.tool_manager.get_tool_schemas()
has_tools = self.tool_manager.has_tools()
steps: List[AgentStep] = []
_self_review_attempted = False
llm_callback_ctx = {"step_type": "think", "tool_name": None}
def _llm_callback(metrics: Dict[str, Any]):
if self.on_llm_call:
metrics.update({
"session_id": self.context.session_id,
"user_id": self.config.user_id,
"step_type": llm_callback_ctx["step_type"],
"tool_name": llm_callback_ctx["tool_name"],
})
self.on_llm_call(metrics)
while self.context.iteration < max_iter:
self.context.iteration += 1
messages = self.memory.trim_messages(self.context.messages)
# 预算检查LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
budget = self.config.budget
if self._llm_invocations >= budget.max_llm_invocations:
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
logger.warning(err)
yield {"type": "error", "content": err, "iteration": self.context.iteration,
"truncated": True}
await self.memory.save_context(user_input, err, self.context.messages)
return
# 调用外部 LLM 预算回调WorkflowEngine 注入)
if self.on_llm_invocation:
try:
self.on_llm_invocation()
except Exception as e:
err = f"LLM 调用超出工作流预算: {e}"
logger.warning(err)
yield {"type": "error", "content": err, "iteration": self.context.iteration,
"truncated": True}
return
# think 事件:告知前端 Agent 正在思考(让 UI 即时反馈,避免假死感)
yield {"type": "think", "content": "", "reasoning": None, "iteration": self.context.iteration}
# 调用 LLM
try:
response = await llm.chat(
messages=messages,
tools=tool_schemas if has_tools and self.context.iteration == 1 else
(tool_schemas if has_tools else None),
iteration=self.context.iteration,
on_completion=_llm_callback,
)
except Exception as e:
err_str = str(e)
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
if self.context.iteration < max_iter and self._is_retryable(err_str):
yield {"type": "error", "content": f"LLM 调用失败(可重试): {err_str}",
"iteration": self.context.iteration}
continue
yield {"type": "error", "content": f"LLM 调用失败: {err_str}",
"iteration": self.context.iteration}
return
# 记录 LLM 调用次数(内部计数)
self._llm_invocations += 1
# 解析工具调用
tool_calls = self._extract_tool_calls(response)
content = self._extract_content(response)
reasoning = getattr(response, "reasoning_content", None) or (
response.get("reasoning_content") if isinstance(response, dict) else None
)
if not tool_calls:
# LLM 直接返回文本 → 结束
self.context.add_assistant_message(content)
final_text = content or "(模型未返回有效内容)"
review_score = 0.0
# 输出质量自检(默认关闭)
if self.config.self_review_enabled and not _self_review_attempted:
review = await self._self_review(final_text, task_context=user_input)
yield {
"type": "tool_result",
"content": f"self_review: score={review['score']:.2f} passed={review['passed']}",
"tool_name": "self_review",
"iteration": self.context.iteration,
"session_id": self.context.session_id,
}
if review["passed"]:
review_score = review["score"]
logger.info("self_review 通过 (%.2f >= %.2f)", review["score"], review["threshold"])
else:
logger.info("self_review 未通过 (%.2f < %.2f),追加修正", review["score"], review["threshold"])
_self_review_attempted = True
yield {
"type": "think",
"content": f"自检未通过({review['score']:.1f}),正在修正:{''.join(review['suggestions'][:2])}",
"iteration": self.context.iteration,
"session_id": self.context.session_id,
}
fix_prompt = (
f"你的上一个回答未通过质量检查(评分 {review['score']:.1f}/{review['threshold']})。\n"
f"问题:{''.join(review['issues'][:3])}\n"
f"改进建议:{''.join(review['suggestions'][:3])}\n"
"请修正你的回答,确保满足上述建议。"
)
self.context.add_user_message(fix_prompt)
continue # 回到 ReAct 循环,让 LLM 修正
yield {
"type": "final",
"content": final_text,
"reasoning": reasoning,
"iteration": self.context.iteration,
"iterations_used": self.context.iteration,
"tool_calls_made": self.context.tool_calls_made,
"session_id": self.context.session_id,
}
await self.memory.save_context(user_input, final_text, self.context.messages)
# 保存学习模式
if self.config.memory.learning_enabled:
await self._save_learning_pattern(
user_input, steps, success=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
)
# 提取知识到全局知识池Agent 间知识共享)
await self._extract_global_knowledge(user_input, final_text, steps, review_score)
return
# 有工具调用 → 先记录 assistant 消息
self.context.add_assistant_message(content or "", tool_calls, reasoning)
# yield think 事件
tc_names = [tc["function"]["name"] for tc in tool_calls]
tc_args_list = []
for tc in tool_calls:
try:
tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
except (json.JSONDecodeError, TypeError):
tc_args_list.append({})
yield {
"type": "think",
"content": content or f"调用工具: {', '.join(tc_names)}",
"reasoning": reasoning,
"tool_names": tc_names,
"iteration": self.context.iteration,
}
steps.append(AgentStep(
iteration=self.context.iteration,
type="think",
content=content or f"调用工具: {', '.join(tc_names)}",
reasoning=reasoning,
tool_name=tc_names[0] if len(tc_names) == 1 else None,
tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
))
if self.execution_logger:
self.execution_logger.info(
f"Agent 调用 {len(tool_calls)} 个工具",
data={"tool_calls": tc_names, "iteration": self.context.iteration},
)
# 逐一执行工具
for tc in tool_calls:
tfn = tc.get("function", {})
tname = tfn.get("name", "unknown")
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
try:
targs = json.loads(tfn.get("arguments", "{}"))
except (json.JSONDecodeError, TypeError):
targs = {}
# yield tool_call 事件
yield {
"type": "tool_call",
"name": tname,
"input": targs,
"iteration": self.context.iteration,
}
# 工具执行前审批检查(流式:先 create → yield 事件带 ID → 等待决定)
if tname in self.config.tools.require_approval:
from app.services.approval_manager import approval_manager as _am
logger.info("Agent 工具需审批 [%s]: %s", tname, targs)
approval_req = _am.create(tool_name=tname, args=targs)
yield {
"type": "approval_required",
"approval_id": approval_req.approval_id,
"tool_name": tname,
"args": targs,
"iteration": self.context.iteration,
}
decision = await _am.wait_for_decision(
approval_req.approval_id,
timeout_ms=self.config.tools.approval_timeout_ms,
)
if decision == "denied":
result = f"[审批拒绝] 工具 {tname} 需要人工审批但被拒绝。"
yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
self.context.add_tool_result(tcid, tname, result)
continue
elif decision == "skip":
result = f"[审批跳过] 工具 {tname} 被跳过。"
yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
self.context.add_tool_result(tcid, tname, result)
continue
# decision == "approved" → 继续执行
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
result = await self.tool_manager.execute(tname, targs)
# yield tool_result 事件
yield {
"type": "tool_result",
"name": tname,
"result": result[:500] + "..." if len(result) > 500 else result,
"iteration": self.context.iteration,
}
steps.append(AgentStep(
iteration=self.context.iteration,
type="tool_result",
content=f"工具 {tname} 返回结果",
tool_name=tname,
tool_input=targs,
tool_result=result[:500] + "..." if len(result) > 500 else result,
))
self.context.add_tool_result(tcid, tname, result)
self.context.tool_calls_made += 1
# 预算检查:工具调用次数
if self.context.tool_calls_made > budget.max_tool_calls:
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
logger.warning(err)
yield {"type": "error", "content": err, "iteration": self.context.iteration,
"truncated": True}
return
if self.on_tool_executed:
try:
await self.on_tool_executed(tname)
except WorkflowExecutionError:
raise
except Exception:
pass
if self.execution_logger:
preview = result[:300] + "..." if len(result) > 300 else result
self.execution_logger.info(
f"工具 {tname} 执行完成",
data={"tool_name": tname, "result_preview": preview},
)
# 达到最大迭代次数
last_content = ""
for m in reversed(self.context.messages):
if m.get("role") == "assistant" and m.get("content"):
last_content = m["content"]
break
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages)
# 保存学习模式(即便截断,工具调用模式仍有参考价值)
if self.config.memory.learning_enabled:
await self._save_learning_pattern(
user_input, steps, success=True,
iterations_used=self.context.iteration,
tool_calls_made=self.context.tool_calls_made,
)
# 提取知识到全局知识池(即便截断,工具调用序列仍有参考价值)
if last_content:
await self._extract_global_knowledge(user_input, last_content, steps)
yield {
"type": "final",
"content": last_content or "已达最大迭代次数,但模型未返回最终回答。",
"iteration": self.context.iteration,
"iterations_used": self.context.iteration,
"tool_calls_made": self.context.tool_calls_made,
"truncated": True,
"session_id": self.context.session_id,
}
async def _inject_memory_context(self, query: str = "") -> None:
"""加载长期记忆并注入 system prompt。"""
mem_text = await self.memory.initialize(query=query)
enriched = self.config.system_prompt.rstrip("\n")
if mem_text:
enriched += "\n\n" + mem_text
# 注入学习模式提示(历史工具使用建议)
if self.config.memory.learning_enabled:
pattern_hint = await self._inject_learning_patterns(query)
if pattern_hint:
enriched += "\n\n" + pattern_hint
self.context.set_system_prompt(enriched)
logger.info("Agent 已注入长期记忆上下文")
async def _inject_learning_patterns(self, query: str) -> str:
"""查询学习模式,返回格式化的提示文本。"""
from app.core.database import SessionLocal
db = None
try:
db = SessionLocal()
patterns = load_relevant_patterns(
db, self._learning_scope_kind, self.memory.scope_id, query
)
return format_pattern_hint(patterns, query)
except Exception as e:
logger.warning("加载学习模式失败: %s", e)
return ""
finally:
if db:
db.close()
async def _save_learning_pattern(
self, query: str, steps: List[AgentStep],
success: bool, iterations_used: int, tool_calls_made: int,
) -> None:
"""从执行结果中提取模式并保存。"""
from app.core.database import SessionLocal
db = None
try:
db = SessionLocal()
pattern_data = extract_pattern_from_result(
query=query,
steps=steps,
success=success,
iterations_used=iterations_used,
tool_calls_made=tool_calls_made,
)
save_learning_pattern(
db, self._learning_scope_kind,
self.memory.scope_id, pattern_data,
)
except Exception as e:
logger.warning("保存学习模式失败: %s", e)
finally:
if db:
db.close()
async def _extract_global_knowledge(
self, user_input: str, final_answer: str, steps: List[AgentStep],
self_review_score: float = 0.0,
) -> None:
"""从 Agent 执行结果中提取知识写入全局知识池Agent 间共享)。"""
# 提取工具调用名称作为 tags
tool_names = list(dict.fromkeys(
s.tool_name for s in (steps or [])
if s.tool_name and s.type == "tool_result"
))
tags = tool_names[:5] if tool_names else ["对话"]
# 提取关键信息:用户问题摘要 + 回答要点(前 500 字)
content = (
f"问题: {user_input[:300]}\n"
f"回答要点: {final_answer[:500]}"
)
if tool_names:
content += f"\n使用工具: {', '.join(tool_names[:5])}"
source_agent_id = self.config.name if self.config.name != "default_agent" else ""
source_user_id = self.config.user_id or ""
# 置信度评估:基于 self_review 评分和工具执行成功数
confidence = "medium"
if self_review_score >= 0.8:
confidence = "high"
elif self_review_score > 0 and self_review_score < 0.5:
confidence = "low"
elif tool_names and len(tool_names) >= 2:
confidence = "high" # 多工具协作通常质量更高
# TTL: 高置信度知识有效期更长
ttl_hours = 720 if confidence == "high" else 168 if confidence == "medium" else 24
await self.memory.save_global_knowledge(
content=content,
source_agent_id=source_agent_id,
source_user_id=source_user_id,
tags=tags,
confidence=confidence,
ttl_hours=ttl_hours,
)
async def _self_review(self, content: str, task_context: str = "") -> dict:
"""输出质量自检:用轻量 LLM 评判输出,返回 {score, passed, issues, suggestions}。"""
criteria = (
"回答必须准确、完整、切题。"
"包含具体可执行的步骤或代码示例。"
"无明显事实错误或遗漏。"
"格式清晰,便于阅读。"
)
try:
from app.agent_runtime.core import _LLMClient
from app.agent_runtime.schemas import AgentLLMConfig
review_config = AgentLLMConfig(
provider=getattr(self.config.llm, 'provider', 'deepseek'),
model="deepseek-v4-flash",
temperature=0.1,
max_tokens=800,
request_timeout=30.0,
)
if self.config.llm.api_key:
review_config.api_key = self.config.llm.api_key
if self.config.llm.base_url:
review_config.base_url = self.config.llm.base_url
client = _LLMClient(review_config)
judge_prompt = (
"你是严格的内容质量评审专家。请根据以下标准对内容进行评分。\n\n"
f"【评判标准】\n{criteria}\n\n"
f"【待评审内容】\n{content[:8000]}\n"
)
if task_context:
judge_prompt += f"\n【任务背景】\n{task_context[:2000]}\n"
judge_prompt += (
"\n请以 JSON 格式返回评审结果(严格只返回 JSON不要任何其他文字\n"
'{"score": 0.75, "passed": true, "issues": ["问题1"], '
'"suggestions": ["建议1"], "summary": "一句话总结"}\n\n'
"评分规则1.0完美 0.8良好 0.6基本满足 0.4大部分未满足 0.2完全不满足\n"
"score >= 0.6 时 passed=true否则 passed=false\n"
)
messages = [{"role": "user", "content": judge_prompt}]
resp = await client.chat(messages=messages, tools=None, iteration=0)
judge_text = getattr(resp, 'content', '') or (
resp.get('content', '') if isinstance(resp, dict) else str(resp)
)
# 解析 JSON
try:
judge_clean = judge_text.strip()
if judge_clean.startswith("```"):
lines = judge_clean.split("\n")
judge_clean = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
result = json.loads(judge_clean)
except json.JSONDecodeError:
import re as _sr_re
m = _sr_re.search(r'\{[^{}]*"score"\s*:\s*[\d.]+[^{}]*\}', judge_text, _sr_re.DOTALL)
if m:
try:
result = json.loads(m.group())
except json.JSONDecodeError:
result = {"score": 0.5, "passed": False, "issues": ["无法解析评审结果"], "suggestions": [], "summary": ""}
else:
result = {"score": 0.5, "passed": False, "issues": ["无法解析评审结果"], "suggestions": [], "summary": ""}
score = float(result.get("score", 0.5))
threshold = self.config.llm.self_review_threshold
passed = score >= threshold
return {
"score": score,
"passed": passed,
"threshold": threshold,
"issues": result.get("issues", []),
"suggestions": result.get("suggestions", []),
"summary": result.get("summary", ""),
}
except Exception as e:
logger.warning("self_review 执行失败: %s", e)
return {"score": 0.5, "passed": True, "issues": [], "suggestions": [], "error": str(e)}
@staticmethod
def _extract_tool_calls(response: Any) -> List[Dict[str, Any]]:
"""从 LLM 响应中提取工具调用列表。"""
if response is None:
return []
# OpenAI SDK 格式
if hasattr(response, "tool_calls") and response.tool_calls:
result = []
for tc in response.tool_calls:
result.append({
"id": tc.id,
"type": tc.type,
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
})
return result
# 字典格式
if isinstance(response, dict):
tc_list = response.get("tool_calls") or []
if tc_list:
return tc_list
# 检查 content 中是否嵌入了 DSML
content = response.get("content") or ""
if "invoke" in content or "function_call" in content:
from app.services.llm_service import _parse_dsml_tool_invocations
dsml = _parse_dsml_tool_invocations(content)
if dsml:
return [
{
"id": f"dsml-{i}",
"type": "function",
"function": {
"name": inv["name"],
"arguments": json.dumps(inv["arguments"], ensure_ascii=False),
},
}
for i, inv in enumerate(dsml)
]
return []
@staticmethod
def _extract_content(response: Any) -> str:
"""从 LLM 响应中提取文本内容。"""
if response is None:
return ""
if hasattr(response, "content"):
return response.content or ""
if isinstance(response, dict):
return response.get("content") or ""
return str(response)
@staticmethod
def _is_retryable(err_str: str) -> bool:
"""判断错误是否可重试。"""
err_lower = err_str.lower()
return any(kw in err_lower for kw in _RETRYABLE_ERRORS)
# LLM 缓存辅助
def _llm_cache_key(messages: list, model: str) -> str:
import hashlib
raw = json.dumps({"msgs": messages[-4:], "model": model}, sort_keys=True, ensure_ascii=False)
return f"llm:{model}:{hashlib.sha256(raw.encode()).hexdigest()[:16]}"
async def _llm_cache_get(key: str) -> Optional[str]:
try:
from app.core.redis_client import get_redis_client
redis = get_redis_client()
if redis:
return await redis.get(key)
except Exception:
pass
return None
async def _llm_cache_set(key: str, value: str, ttl_ms: int):
try:
from app.core.redis_client import get_redis_client
redis = get_redis_client()
if redis:
await redis.setex(key, max(1, int(ttl_ms / 1000)), value)
except Exception:
pass
class _LLMClient:
"""轻量 LLM 客户端包装,复用已有 LLMService 能力。"""
def __init__(self, config: Any):
from app.services.llm_service import llm_service
self._service = llm_service
self._config = config
async def chat(
self,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
iteration: int = 1,
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
) -> Any:
"""调用 LLM主模型失败时自动切换 fallback_llm 重试。"""
from openai import AsyncOpenAI
from app.core.config import settings
# 优先从配置读取,其次从 settings.env 加载),最后 os.environ
api_key = self._config.api_key or settings.OPENAI_API_KEY or ""
base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""
if not api_key or api_key == "your-openai-api-key":
api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"
if not api_key:
raise ValueError("未配置 API Key")
return await self._do_chat(
api_key=api_key, base_url=base_url, model=self._config.model,
messages=messages, tools=tools, iteration=iteration,
on_completion=on_completion,
)
async def _do_chat(
self,
api_key: str,
base_url: str,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
iteration: int = 1,
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
_is_fallback: bool = False,
) -> Any:
from openai import AsyncOpenAI
from app.core.config import settings
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
kwargs: Dict[str, Any] = {
"model": model,
"messages": messages,
"temperature": self._config.temperature,
"timeout": self._config.request_timeout,
}
if self._config.max_tokens:
kwargs["max_tokens"] = self._config.max_tokens
if self._config.extra_body:
kwargs["extra_body"] = self._config.extra_body
if tools:
# Normalize tool schemas to OpenAI format: custom tools from the
# marketplace may be stored as {"name":..., "parameters":...}
# or {"function":{...}} without the required "type": "function".
normalized = []
for t in tools:
if isinstance(t, dict):
if t.get("type") == "function":
# Already in correct format: {"type":"function","function":{...}}
normalized.append(t)
elif "function" in t:
# Has function key but missing type: {"function":{...}}
normalized.append({"type": "function", "function": t["function"]})
else:
# Raw schema: {"name":..., "parameters":...}
normalized.append({"type": "function", "function": t})
else:
normalized.append(t)
kwargs["tools"] = normalized
kwargs["tool_choice"] = "auto"
# LLM 响应缓存(仅不用工具时缓存,避免复杂序列化)
if self._config.cache_enabled and not tools and not _is_fallback:
cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
cached = await _llm_cache_get(cache_key)
if cached is not None:
logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
class _CachedMsg:
content = cached
tool_calls = None
return _CachedMsg()
start_time = time.perf_counter()
last_error = None
try:
response = await client.chat.completions.create(**kwargs)
except Exception as e:
last_error = e
# 降级回退:主模型失败时尝试 fallback_llm
fallback = self._config.fallback_llm
if fallback and isinstance(fallback, dict) and not _is_fallback:
fb_model = fallback.get("model")
fb_api_key = fallback.get("api_key")
fb_base_url = fallback.get("base_url")
if fb_model and (fb_api_key or fb_base_url):
logger.warning(
"主模型 %s 调用失败,降级到 %s: %s",
model, fb_model, str(e)[:200],
)
# 先报告主模型失败
latency_ms = int((time.perf_counter() - start_time) * 1000)
if on_completion:
on_completion({
"model": model, "provider": self._config.provider,
"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0,
"latency_ms": latency_ms, "iteration_number": iteration,
"status": "fallback", "error_message": str(e),
})
return await self._do_chat(
api_key=fb_api_key or api_key,
base_url=fb_base_url or base_url,
model=fb_model,
messages=messages, tools=tools,
iteration=iteration, on_completion=on_completion,
_is_fallback=True,
)
raise
latency_ms = int((time.perf_counter() - start_time) * 1000)
message = response.choices[0].message
# 缓存写入(仅不用工具时)
if self._config.cache_enabled and not tools and message.content:
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
# 提取 token 用量
usage = getattr(response, "usage", None)
prompt_tokens = usage.prompt_tokens if usage else 0
completion_tokens = usage.completion_tokens if usage else 0
total_tokens = usage.total_tokens if usage else 0
# 调用完成回调
if on_completion:
on_completion({
"model": model,
"provider": self._config.provider,
"prompt_tokens": prompt_tokens or 0,
"completion_tokens": completion_tokens or 0,
"total_tokens": total_tokens or 0,
"latency_ms": latency_ms,
"iteration_number": iteration,
"status": "success",
})
return message