- Fix 8 Feishu agent handlers to use permission_level="acceptEdits" so file_write tool works without Web UI approval popup (lingxi/renshenguo/suyao/tiantian/orange/main/schedule) - Add P5-P7 memory improvements: offline keyword fallback, team sharing, file-based memory - Add auto_dream_service for daily memory consolidation - Add 99 memory system test cases (basic 18 + advanced 43 + pytest 38) - Add platform capability assessment report and unfinished project checklist Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1882 lines
85 KiB
Python
1882 lines
85 KiB
Python
"""
|
||
Agent Runtime 核心 —— 自主 ReAct 循环。
|
||
|
||
流程:
|
||
1. 接收用户输入 → 追加到消息列表
|
||
2. 调用 LLM(携带 tools schema)
|
||
3. 如果 LLM 返回工具调用 → 执行工具 → 结果追加到消息列表 → 回到 2
|
||
4. 如果 LLM 返回文本 → 作为最终回答返回
|
||
5. 超过 max_iterations → 强制终止
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import hashlib
|
||
import json
|
||
import logging
|
||
import time
|
||
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Protocol, TypedDict
|
||
|
||
from app.agent_runtime.schemas import (
|
||
AgentConfig,
|
||
AgentResult,
|
||
AgentStep,
|
||
)
|
||
from app.agent_runtime.context import AgentContext
|
||
from app.agent_runtime.memory import AgentMemory
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
from app.core.exceptions import WorkflowExecutionError
|
||
from app.core.hooks import HookManager, HookEvent, HookContext, HookResult
|
||
from app.agent_runtime.plan_mode import PlanMode, Plan, PlanStatus
|
||
from app.core.error_recovery import ErrorClassifier, ErrorType, ConversationRecovery
|
||
from app.core.memdir import MemoryDir, MemoryType as MemType, MemoryManifest, parse_frontmatter
|
||
from app.core.memory_selector import memory_selector
|
||
from app.core.compaction import CompactionEngine, CompactionResult, CompactionStrategy
|
||
from app.core.compaction_config import CompactionConfig
|
||
from app.core.token_counter import is_context_length_error
|
||
from app.core.streamlined_output import (
|
||
StreamlinedTransformer,
|
||
create_streamlined_transformer,
|
||
get_tool_summary_text,
|
||
ToolCounts,
|
||
categorize_tool,
|
||
)
|
||
from app.core.prompt_sections import (
|
||
PromptComposer,
|
||
PromptSection,
|
||
create_prompt_composer,
|
||
create_default_static_sections,
|
||
create_default_dynamic_sections,
|
||
section_environment,
|
||
section_language,
|
||
)
|
||
from app.core.token_budget import (
|
||
TokenBudget,
|
||
TokenBudgetConfig,
|
||
create_token_budget,
|
||
)
|
||
from app.services.agent_learning_service import (
|
||
extract_pattern_from_result,
|
||
format_pattern_hint,
|
||
load_relevant_patterns,
|
||
save_learning_pattern,
|
||
)
|
||
from app.services.execution_logger import execution_logger as _exec_logger
|
||
from app.services.knowledge_retriever import knowledge_retriever
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class LLMCallMetrics(TypedDict, total=False):
|
||
"""一次 LLM 调用的度量数据"""
|
||
agent_id: Optional[str]
|
||
session_id: str
|
||
user_id: Optional[str]
|
||
model: str
|
||
provider: Optional[str]
|
||
prompt_tokens: int
|
||
completion_tokens: int
|
||
total_tokens: int
|
||
latency_ms: int
|
||
iteration_number: int
|
||
step_type: str # think / final
|
||
tool_name: Optional[str]
|
||
status: str # success / error
|
||
error_message: Optional[str]
|
||
|
||
# 全局错误分类器(可重试判定 + 退避策略)
|
||
_error_classifier = ErrorClassifier()
|
||
|
||
|
||
class AgentRuntime:
|
||
"""
|
||
自主 Agent 运行时。
|
||
|
||
用法:
|
||
runtime = AgentRuntime(config)
|
||
result = await runtime.run("帮我写个Python脚本")
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
config: Optional[AgentConfig] = None,
|
||
context: Optional[AgentContext] = None,
|
||
memory: Optional[AgentMemory] = None,
|
||
tool_manager: Optional[AgentToolManager] = None,
|
||
execution_logger: Optional[Any] = None,
|
||
on_tool_executed: Optional[Callable[[str], Any]] = None,
|
||
on_llm_call: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
||
hook_manager: Optional[HookManager] = None,
|
||
streamlined: bool = False,
|
||
):
|
||
self.config = config or AgentConfig()
|
||
self.context = context or AgentContext(
|
||
system_prompt=self.config.system_prompt,
|
||
user_id=self.config.user_id,
|
||
)
|
||
_mem_scope = self.config.memory_scope_id or self.config.user_id or self.config.name
|
||
self.memory = memory or AgentMemory(
|
||
scope_id=_mem_scope,
|
||
max_history=self.config.memory.max_history_messages,
|
||
persist=self.config.memory.persist_to_db,
|
||
vector_memory_enabled=self.config.memory.vector_memory_enabled,
|
||
vector_memory_top_k=self.config.memory.vector_memory_top_k,
|
||
vector_memory_rerank=self.config.memory.vector_memory_rerank,
|
||
memory_type_filter=self.config.memory.memory_type_filter,
|
||
team_id=self.config.memory.team_id,
|
||
team_share_enabled=self.config.memory.team_share_enabled,
|
||
memory_dir_enabled=self.config.memory.memory_dir_enabled,
|
||
memory_dir_path=self.config.memory.memory_dir_path,
|
||
)
|
||
self.tool_manager = tool_manager or AgentToolManager(
|
||
include_tools=self.config.tools.include_tools,
|
||
exclude_tools=self.config.tools.exclude_tools,
|
||
cache_enabled=self.config.tools.cache_enabled,
|
||
cache_tool_whitelist=self.config.tools.cache_tool_whitelist,
|
||
cache_ttl_ms=self.config.tools.cache_ttl_ms,
|
||
permission_level=self.config.tools.permission_level,
|
||
auto_approve_rules=self.config.tools.auto_approve_rules,
|
||
deny_tools=self.config.tools.deny_tools,
|
||
)
|
||
self.execution_logger = execution_logger
|
||
self.on_tool_executed = on_tool_executed
|
||
self.on_llm_call = on_llm_call
|
||
self._memory_context_loaded = False
|
||
self._llm_invocations = 0
|
||
# 自主学习作用域:bare 聊天用 "bare",Agent 用 "agent"
|
||
self._learning_scope_kind = "bare" if "bare" in str(_mem_scope) else "agent"
|
||
|
||
# Hook 管理器 (P1)
|
||
self.hook_manager = hook_manager or HookManager()
|
||
|
||
# 计划模式 (P2)
|
||
self.plan_mode = PlanMode(self.config.llm) if self.config.llm.plan_mode_enabled else None
|
||
|
||
# 对话自动压缩 (参考 Claude Code compact)
|
||
self.compaction_engine: Optional[CompactionEngine] = None
|
||
compaction_cfg = getattr(self.config.memory, 'compaction', None)
|
||
if compaction_cfg is None:
|
||
compaction_cfg = CompactionConfig()
|
||
if compaction_cfg.enabled:
|
||
self.compaction_engine = CompactionEngine(
|
||
config=compaction_cfg,
|
||
model=self.config.llm.model,
|
||
)
|
||
logger.info("对话压缩引擎已启用 (model=%s, window=%d)",
|
||
self.config.llm.model, self.config.llm.context_window)
|
||
|
||
# 工具结果流式美化 (参考 Claude Code streamlinedTransform)
|
||
self.streamlined = streamlined
|
||
self._streamlined_transformer: Optional[StreamlinedTransformer] = None
|
||
if streamlined:
|
||
self._streamlined_transformer = create_streamlined_transformer(enabled=True)
|
||
logger.info("工具结果流式美化已启用")
|
||
|
||
# 系统提示词分层装配 (P2 — 参考 Claude Code systemPromptSections.ts)
|
||
self._prompt_composer: Optional[PromptComposer] = None
|
||
self._prompt_sections_enabled = self.config.prompt_sections.enabled
|
||
if self._prompt_sections_enabled:
|
||
ps_config = self.config.prompt_sections
|
||
# 构建静态段(按开关过滤)
|
||
static_sections = []
|
||
s_switches = ps_config.static_sections
|
||
if s_switches.get("persona", True):
|
||
static_sections.append(PromptSection(
|
||
"persona",
|
||
lambda cfg=self.config: f"{cfg.system_prompt}\n\n"
|
||
))
|
||
if s_switches.get("capabilities", True):
|
||
from app.core.prompt_sections import section_capabilities
|
||
static_sections.append(PromptSection("capabilities", section_capabilities))
|
||
if s_switches.get("tool_instructions", True):
|
||
from app.core.prompt_sections import section_tool_instructions
|
||
static_sections.append(PromptSection("tool_instructions", section_tool_instructions))
|
||
if s_switches.get("safety_rules", True):
|
||
from app.core.prompt_sections import section_safety_rules
|
||
static_sections.append(PromptSection("safety_rules", section_safety_rules))
|
||
if s_switches.get("output_style", True):
|
||
from app.core.prompt_sections import section_output_style
|
||
static_sections.append(PromptSection("output_style", section_output_style))
|
||
|
||
self._prompt_composer = PromptComposer()
|
||
self._prompt_composer.add_static_sections(static_sections)
|
||
logger.info("系统提示词分层装配已启用 (%d 静态段)", len(static_sections))
|
||
|
||
# Token 预算管理 (P2 — 参考 Claude Code tokenBudget.ts)
|
||
self._token_budget: Optional[TokenBudget] = None
|
||
tb_config = self.config.token_budget
|
||
if tb_config.enabled:
|
||
self._token_budget = TokenBudget(
|
||
config=TokenBudgetConfig(
|
||
enabled=True,
|
||
context_window=tb_config.context_window or self.config.llm.context_window,
|
||
output_reserve=tb_config.output_reserve,
|
||
warning_threshold_pct=tb_config.warning_threshold_pct,
|
||
compact_threshold_pct=tb_config.compact_threshold_pct,
|
||
hard_limit_pct=tb_config.hard_limit_pct,
|
||
user_budget=tb_config.user_budget,
|
||
auto_continue=tb_config.auto_continue,
|
||
compaction_after_warning=tb_config.compaction_after_warning,
|
||
max_compaction_attempts=tb_config.max_compaction_attempts,
|
||
),
|
||
model=self.config.llm.model,
|
||
)
|
||
logger.info("Token 预算管理已启用 (window=%d, compact@%d%%)",
|
||
self._token_budget.config.context_window,
|
||
int(tb_config.compact_threshold_pct * 100))
|
||
|
||
# 崩溃恢复 (P4)
|
||
self.recovery = ConversationRecovery()
|
||
self._recovery_snapshot_counter = 0
|
||
|
||
# 文件式记忆 (MEMORY.md)
|
||
self._memdir: Optional[MemoryDir] = None
|
||
self._memdir_manifest: Optional[MemoryManifest] = None
|
||
if self.config.memory.memory_dir_enabled:
|
||
mem_path = self.config.memory.memory_dir_path
|
||
if not mem_path:
|
||
# 默认路径: 项目根目录下的 .claude/memory
|
||
import os as _os
|
||
mem_path = _os.path.join(
|
||
_os.path.dirname(_os.path.dirname(_os.path.dirname(__file__))),
|
||
".claude", "memory",
|
||
)
|
||
self._memdir = MemoryDir(mem_path)
|
||
# 启动时扫描一次
|
||
self._memdir_manifest = self._memdir.scan()
|
||
memory_selector.reset()
|
||
logger.info("文件式记忆已启用: %s (%d 条)", mem_path,
|
||
self._memdir_manifest.total_files)
|
||
|
||
# 预算回调:供 WorkflowEngine 注入,使 Agent 内部计数计入工作流预算
|
||
# 返回 True 表示预算充足;返回 False 或抛出异常表示超限
|
||
self.on_llm_invocation: Optional[Callable[[], Any]] = None
|
||
|
||
def _attach_token_usage(self, result: AgentResult) -> AgentResult:
|
||
"""将 TokenBudget 摘要附加到 AgentResult(若启用)。"""
|
||
if self._token_budget:
|
||
from app.agent_runtime.schemas import TokenUsageInfo
|
||
result.token_usage = TokenUsageInfo(**self._token_budget.summary())
|
||
return result
|
||
|
||
def _build_execution_log_kwargs(self, user_input: str, result: AgentResult, latency_ms: int) -> dict:
|
||
"""从 AgentResult 构建 execution_logger 所需的参数字典。"""
|
||
tool_chain = []
|
||
for s in result.steps:
|
||
if s.type == "tool_result" and s.tool_name:
|
||
tool_chain.append({
|
||
"tool_name": s.tool_name,
|
||
"tool_input": s.tool_input,
|
||
"tool_output": s.tool_result[:500] if s.tool_result else None,
|
||
})
|
||
steps_summary = [
|
||
{"iteration": s.iteration, "type": s.type, "tool_name": s.tool_name,
|
||
"content": (s.content or "")[:300]}
|
||
for s in result.steps[-20:] # 最多保留最近 20 步
|
||
]
|
||
return dict(
|
||
agent_id=None, # 由调用方设置
|
||
agent_name=self.config.name,
|
||
user_id=self.config.user_id,
|
||
session_id=self.context.session_id,
|
||
input_text=user_input,
|
||
output_text=result.content,
|
||
output_truncated=result.truncated,
|
||
success=result.success,
|
||
error_message=result.error,
|
||
latency_ms=latency_ms,
|
||
iterations_used=result.iterations_used,
|
||
tool_calls_made=result.tool_calls_made,
|
||
tool_chain=tool_chain if tool_chain else None,
|
||
steps=steps_summary if steps_summary else None,
|
||
model=self.config.llm.model,
|
||
provider=self.config.llm.provider,
|
||
)
|
||
|
||
def _fire_recovery_snapshot(self):
|
||
"""Fire-and-forget 保存崩溃恢复快照(每 5 次工具调用保存一次)。"""
|
||
self._recovery_snapshot_counter += 1
|
||
if self._recovery_snapshot_counter % 5 != 0:
|
||
return
|
||
try:
|
||
import asyncio
|
||
asyncio.ensure_future(
|
||
self.recovery.save_snapshot(
|
||
session_id=self.context.session_id,
|
||
messages=self.context.messages,
|
||
extra={
|
||
"agent_name": self.config.name,
|
||
"iteration": self.context.iteration,
|
||
"tool_calls_made": self.context.tool_calls_made,
|
||
},
|
||
)
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
def _fire_execution_log(self, user_input: str, result: AgentResult, start_time: float):
|
||
"""Fire-and-forget 记录执行日志(非阻塞)。"""
|
||
try:
|
||
latency_ms = int((time.time() - start_time) * 1000)
|
||
kwargs = self._build_execution_log_kwargs(user_input, result, latency_ms)
|
||
_exec_logger.log_execution_fire_and_forget(**kwargs)
|
||
except Exception:
|
||
pass # 日志记录失败不影响主流程
|
||
|
||
async def run(self, user_input: str) -> AgentResult:
|
||
"""
|
||
执行 Agent 单轮对话。
|
||
|
||
流程:加载记忆 → 追加用户消息 → ReAct 循环 → 保存记忆 → 返回结果。
|
||
"""
|
||
max_iter = max(1, self.config.llm.max_iterations)
|
||
self.context.iteration = 0
|
||
self.context.tool_calls_made = 0
|
||
self._llm_invocations = 0 # 每次 run() 重置 LLM 调用计数
|
||
_run_start = time.time() # 执行开始时间,用于计算总延迟
|
||
|
||
# 1. 系统提示词分层装配(首次加载全部段,后续只刷新动态段)
|
||
if self._prompt_sections_enabled:
|
||
system_prompt = await self._compose_system_prompt(user_input)
|
||
self.context.set_system_prompt(system_prompt)
|
||
if not self._memory_context_loaded:
|
||
self._memory_context_loaded = True
|
||
logger.info("分层装配已完成(静态段 + 动态段)")
|
||
elif not self._memory_context_loaded:
|
||
await self._inject_memory_context(user_input)
|
||
self._memory_context_loaded = True
|
||
await self._inject_knowledge_context(user_input)
|
||
|
||
# 2. 追加用户消息
|
||
self.context.add_user_message(user_input)
|
||
|
||
# 2.5 计划模式 (P2) — 生成执行计划
|
||
plan: Optional[Plan] = None
|
||
if self.plan_mode and self.config.llm.plan_mode_enabled:
|
||
try:
|
||
plan = await self.plan_mode.generate_plan(
|
||
user_input=user_input,
|
||
available_tools=self.tool_manager.tool_names(),
|
||
messages_history=self.context.messages,
|
||
)
|
||
logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps))
|
||
if self.config.llm.plan_approval_required:
|
||
approved = await self.plan_mode.present_plan(plan)
|
||
if not approved:
|
||
logger.info("计划模式: 计划被拒绝")
|
||
result = AgentResult(
|
||
success=False,
|
||
content=f"计划已被拒绝。\n\n{plan.to_markdown()}",
|
||
iterations_used=0,
|
||
tool_calls_made=0,
|
||
error="plan_rejected",
|
||
)
|
||
self._fire_execution_log(user_input, result, _run_start)
|
||
self._attach_token_usage(result)
|
||
return result
|
||
except Exception as e:
|
||
logger.warning("计划生成失败,回退到直接执行: %s", e)
|
||
plan = None
|
||
|
||
# 3. ReAct 循环
|
||
llm = _LLMClient(self.config.llm)
|
||
tool_schemas = self.tool_manager.get_tool_schemas()
|
||
has_tools = self.tool_manager.has_tools()
|
||
steps: List[AgentStep] = []
|
||
_self_review_attempted = False # 防止无限修正循环
|
||
|
||
# 构建 LLM 调用回调(包装 on_llm_call,补充上下文)
|
||
llm_callback_ctx = {"step_type": "think", "tool_name": None}
|
||
|
||
def _llm_callback(metrics: Dict[str, Any]):
|
||
# Token 预算追踪 (P2)
|
||
if self._token_budget:
|
||
prompt_tok = metrics.get("prompt_tokens", 0)
|
||
comp_tok = metrics.get("completion_tokens", 0)
|
||
if prompt_tok <= 0:
|
||
prompt_tok = self._token_budget.input_tokens # fallback estimate
|
||
self._token_budget.record_llm_call(
|
||
prompt_tokens=prompt_tok,
|
||
completion_tokens=comp_tok,
|
||
iteration=self.context.iteration,
|
||
step_type=llm_callback_ctx["step_type"],
|
||
)
|
||
if self.on_llm_call:
|
||
metrics.update({
|
||
"session_id": self.context.session_id,
|
||
"user_id": self.config.user_id,
|
||
"step_type": llm_callback_ctx["step_type"],
|
||
"tool_name": llm_callback_ctx["tool_name"],
|
||
})
|
||
self.on_llm_call(metrics)
|
||
|
||
while self.context.iteration < max_iter:
|
||
self.context.iteration += 1
|
||
|
||
# Token 预算检查:每次迭代前更新输入 token 估计
|
||
if self._token_budget:
|
||
self._token_budget.update_from_counter(self.context.messages)
|
||
self._token_budget.reset_compaction_attempts()
|
||
|
||
# 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩
|
||
_should_compact = self.compaction_engine and self.context.iteration > 1
|
||
if _should_compact and self._token_budget and self._token_budget.needs_compaction:
|
||
self._token_budget.record_compaction_attempt()
|
||
logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line)
|
||
if self.compaction_engine and self.context.iteration > 1:
|
||
compact_result = await self.compaction_engine.maybe_compact(
|
||
self.context.messages,
|
||
self.config.llm.context_window,
|
||
)
|
||
if compact_result.strategy != CompactionStrategy.NONE:
|
||
self.context.replace_internal_messages(
|
||
[m for m in compact_result.messages
|
||
if m.get("role") != "system"] # 去掉 system(由 context 管理)
|
||
)
|
||
logger.debug(
|
||
"压缩完成: strategy=%s saved=%d tokens",
|
||
compact_result.strategy.value, compact_result.tokens_saved,
|
||
)
|
||
|
||
# 裁剪过长历史
|
||
messages = self.memory.trim_messages(self.context.messages)
|
||
|
||
# 预算检查:LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
|
||
budget = self.config.budget
|
||
if self._llm_invocations >= budget.max_llm_invocations:
|
||
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
|
||
logger.warning(err)
|
||
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
|
||
await self.memory.save_context(user_input, err, self.context.messages)
|
||
result = AgentResult(success=False, content=err, truncated=True,
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
steps=steps, error=err)
|
||
self._fire_execution_log(user_input, result, _run_start)
|
||
self._attach_token_usage(result)
|
||
return result
|
||
|
||
# 调用外部 LLM 预算回调(WorkflowEngine 注入,将 Agent 的 LLM 计入工作流预算)
|
||
if self.on_llm_invocation:
|
||
try:
|
||
self.on_llm_invocation()
|
||
except Exception as e:
|
||
err = f"LLM 调用超出工作流预算: {e}"
|
||
logger.warning(err)
|
||
steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
|
||
await self.memory.save_context(user_input, err, self.context.messages)
|
||
result = AgentResult(success=False, content=err, truncated=True,
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
steps=steps, error=str(e))
|
||
self._fire_execution_log(user_input, result, _run_start)
|
||
self._attach_token_usage(result)
|
||
return result
|
||
|
||
# 调用 LLM
|
||
try:
|
||
response = await llm.chat(
|
||
messages=messages,
|
||
tools=tool_schemas if has_tools and self.context.iteration == 1 else
|
||
(tool_schemas if has_tools else None),
|
||
iteration=self.context.iteration,
|
||
on_completion=_llm_callback,
|
||
)
|
||
except Exception as e:
|
||
err_str = str(e)
|
||
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
|
||
if self.context.iteration < max_iter and self._is_retryable(err_str):
|
||
steps.append(AgentStep(
|
||
iteration=self.context.iteration,
|
||
type="tool_result",
|
||
content=f"LLM 调用失败(可重试): {err_str}",
|
||
))
|
||
self._llm_invocations += 1 # 重试也计入 LLM 调用预算
|
||
continue
|
||
result = AgentResult(
|
||
success=False,
|
||
content=f"LLM 调用失败: {err_str}",
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
error=err_str,
|
||
)
|
||
self._fire_execution_log(user_input, result, _run_start)
|
||
self._attach_token_usage(result)
|
||
return result
|
||
|
||
# 记录 LLM 调用次数(内部计数)
|
||
self._llm_invocations += 1
|
||
|
||
# 解析工具调用
|
||
tool_calls = self._extract_tool_calls(response)
|
||
content = self._extract_content(response)
|
||
reasoning = getattr(response, "reasoning_content", None) or (
|
||
response.get("reasoning_content") if isinstance(response, dict) else None
|
||
)
|
||
|
||
if not tool_calls:
|
||
# LLM 直接返回文本 → 结束
|
||
self.context.add_assistant_message(content)
|
||
final_text = content or "(模型未返回有效内容)"
|
||
review_score = 0.0
|
||
|
||
# 输出质量自检(默认关闭,Agent 节点可开启)
|
||
if self.config.self_review_enabled and not _self_review_attempted:
|
||
review = await self._self_review(final_text, task_context=user_input)
|
||
steps.append(AgentStep(
|
||
iteration=self.context.iteration,
|
||
type="tool_result",
|
||
content=f"self_review: score={review['score']:.2f} passed={review['passed']}",
|
||
tool_name="self_review",
|
||
tool_input={"content": final_text[:200]},
|
||
tool_result=json.dumps(review, ensure_ascii=False),
|
||
))
|
||
if review["passed"]:
|
||
review_score = review["score"]
|
||
logger.info("self_review 通过 (%.2f >= %.2f)", review["score"], review["threshold"])
|
||
else:
|
||
logger.info("self_review 未通过 (%.2f < %.2f),追加修正", review["score"], review["threshold"])
|
||
_self_review_attempted = True
|
||
# 追加修正提示
|
||
fix_prompt = (
|
||
f"你的上一个回答未通过质量检查(评分 {review['score']:.1f}/{review['threshold']})。\n"
|
||
f"问题:{';'.join(review['issues'][:3])}\n"
|
||
f"改进建议:{';'.join(review['suggestions'][:3])}\n"
|
||
"请修正你的回答,确保满足上述建议。"
|
||
)
|
||
self.context.add_user_message(fix_prompt)
|
||
continue # 回到 ReAct 循环,让 LLM 修正
|
||
|
||
steps.append(AgentStep(
|
||
iteration=self.context.iteration,
|
||
type="final",
|
||
content=final_text,
|
||
reasoning=reasoning,
|
||
))
|
||
# 保存记忆
|
||
await self.memory.save_context(user_input, final_text, self.context.messages)
|
||
# 保存学习模式
|
||
if self.config.memory.learning_enabled:
|
||
await self._save_learning_pattern(
|
||
user_input, steps, success=True,
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
)
|
||
# 提取知识到全局知识池(Agent 间知识共享)
|
||
await self._extract_global_knowledge(user_input, final_text, steps, review_score)
|
||
result = AgentResult(
|
||
success=True,
|
||
content=final_text,
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
steps=steps,
|
||
)
|
||
self._fire_execution_log(user_input, result, _run_start)
|
||
self._attach_token_usage(result)
|
||
return result
|
||
|
||
# 有工具调用 → 先记录 assistant 消息(含 tool_calls)
|
||
self.context.add_assistant_message(content or "", tool_calls, reasoning)
|
||
|
||
# 记录思考步骤(含工具调用意图)
|
||
tc_names = [tc["function"]["name"] for tc in tool_calls]
|
||
tc_args_list = []
|
||
for tc in tool_calls:
|
||
try:
|
||
tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
|
||
except (json.JSONDecodeError, TypeError):
|
||
raw_args = tc["function"].get("arguments", "")
|
||
logger.warning("工具参数 JSON 解析失败,使用空对象: %.200s", str(raw_args))
|
||
tc_args_list.append({})
|
||
|
||
steps.append(AgentStep(
|
||
iteration=self.context.iteration,
|
||
type="think",
|
||
content=content or f"调用工具: {', '.join(tc_names)}",
|
||
reasoning=reasoning,
|
||
tool_name=tc_names[0] if len(tc_names) == 1 else None,
|
||
tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
|
||
))
|
||
|
||
if self.execution_logger:
|
||
self.execution_logger.info(
|
||
f"Agent 调用 {len(tool_calls)} 个工具",
|
||
data={"tool_calls": tc_names,
|
||
"iteration": self.context.iteration},
|
||
)
|
||
|
||
# 逐一执行工具
|
||
for tc in tool_calls:
|
||
tfn = tc.get("function", {})
|
||
tname = tfn.get("name", "unknown")
|
||
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
|
||
|
||
try:
|
||
targs = json.loads(tfn.get("arguments", "{}"))
|
||
except (json.JSONDecodeError, TypeError):
|
||
targs = {}
|
||
|
||
# Hook: PreToolUse — 可拦截/修改工具调用
|
||
hook_ctx = HookContext(
|
||
event=HookEvent.PRE_TOOL_USE,
|
||
tool_name=tname,
|
||
tool_input=targs,
|
||
session_id=self.context.session_id,
|
||
agent_name=self.config.name,
|
||
user_id=self.config.user_id,
|
||
)
|
||
hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx)
|
||
if not hook_res.allowed:
|
||
result = json.dumps({"error": hook_res.reason}, ensure_ascii=False)
|
||
self.context.add_tool_result(tcid, tname, result)
|
||
continue
|
||
if hook_res.modified_input:
|
||
targs = hook_res.modified_input
|
||
# 审批检查需要原始参数,所以审批在前;但如果 hook 改了参数,需要重新构建
|
||
if hook_res.modified_input and tname in self.config.tools.require_approval:
|
||
tfn["arguments"] = json.dumps(targs, ensure_ascii=False)
|
||
|
||
# 工具执行前审批检查
|
||
if tname in self.config.tools.require_approval:
|
||
from app.services.approval_manager import approval_manager as _am
|
||
logger.info("Agent 工具需审批 [%s]: %s", tname, targs)
|
||
approval_req = await _am.submit(
|
||
tool_name=tname, args=targs,
|
||
timeout_ms=self.config.tools.approval_timeout_ms,
|
||
)
|
||
decision = approval_req.decision
|
||
if decision == "denied":
|
||
result = f"[审批拒绝] 工具 {tname} 需要人工审批但被拒绝。"
|
||
self.context.add_tool_result(tcid, tname, result)
|
||
continue
|
||
elif decision == "skip":
|
||
result = f"[审批跳过] 工具 {tname} 被跳过。"
|
||
self.context.add_tool_result(tcid, tname, result)
|
||
continue
|
||
# decision == "approved" → 继续执行
|
||
|
||
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
|
||
try:
|
||
result = await self.tool_manager.execute(tname, targs)
|
||
except Exception as tool_err:
|
||
logger.error("工具 '%s' 执行异常: %s", tname, tool_err, exc_info=True)
|
||
result = json.dumps({
|
||
"error": f"工具 '{tname}' 执行异常: {tool_err}"
|
||
}, ensure_ascii=False)
|
||
|
||
steps.append(AgentStep(
|
||
iteration=self.context.iteration,
|
||
type="tool_result",
|
||
content=f"工具 {tname} 返回结果",
|
||
tool_name=tname,
|
||
tool_input=targs,
|
||
tool_result=result[:500] + "..." if len(result) > 500 else result,
|
||
))
|
||
|
||
self.context.add_tool_result(tcid, tname, result)
|
||
self.context.tool_calls_made += 1
|
||
|
||
# Hook: PostToolUse — 工具执行后处理
|
||
post_ctx = HookContext(
|
||
event=HookEvent.POST_TOOL_USE,
|
||
tool_name=tname,
|
||
tool_input=targs,
|
||
tool_output=result,
|
||
session_id=self.context.session_id,
|
||
agent_name=self.config.name,
|
||
user_id=self.config.user_id,
|
||
)
|
||
await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx)
|
||
|
||
# 崩溃恢复快照 (P4)
|
||
self._fire_recovery_snapshot()
|
||
|
||
# 预算检查:工具调用次数
|
||
if self.context.tool_calls_made > budget.max_tool_calls:
|
||
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
|
||
logger.warning(err)
|
||
steps.append(AgentStep(iteration=self.context.iteration, type="tool_result",
|
||
content=err, tool_name=tname))
|
||
result = AgentResult(success=False, content=err, truncated=True,
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
steps=steps, error=err)
|
||
self._fire_execution_log(user_input, result, _run_start)
|
||
self._attach_token_usage(result)
|
||
return result
|
||
|
||
if self.on_tool_executed:
|
||
try:
|
||
await self.on_tool_executed(tname)
|
||
except WorkflowExecutionError:
|
||
raise
|
||
except Exception:
|
||
pass
|
||
|
||
if self.execution_logger:
|
||
preview = result[:300] + "..." if len(result) > 300 else result
|
||
self.execution_logger.info(
|
||
f"工具 {tname} 执行完成",
|
||
data={"tool_name": tname, "result_preview": preview},
|
||
)
|
||
|
||
# 达到最大迭代次数
|
||
last_content = ""
|
||
for m in reversed(self.context.messages):
|
||
if m.get("role") == "assistant" and m.get("content"):
|
||
last_content = m["content"]
|
||
break
|
||
|
||
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
|
||
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages)
|
||
# 保存学习模式(即使截断,标记为未成功以便后续分析)
|
||
if self.config.memory.learning_enabled:
|
||
await self._save_learning_pattern(
|
||
user_input, steps, success=False,
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
)
|
||
# 提取知识到全局知识池(即便截断,工具调用序列仍有参考价值)
|
||
if last_content:
|
||
await self._extract_global_knowledge(user_input, last_content, steps)
|
||
if last_content:
|
||
steps.append(AgentStep(
|
||
iteration=self.context.iteration,
|
||
type="final",
|
||
content=last_content,
|
||
))
|
||
truncation_msg = f"已达最大迭代次数 ({max_iter}),任务被截断"
|
||
result = AgentResult(
|
||
success=False,
|
||
content=last_content or truncation_msg,
|
||
truncated=True,
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
steps=steps,
|
||
error=truncation_msg,
|
||
)
|
||
self._fire_execution_log(user_input, result, _run_start)
|
||
self._attach_token_usage(result)
|
||
return result
|
||
|
||
async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]:
|
||
"""
|
||
流式执行 Agent 单轮对话(支持 streamlined 模式)。
|
||
|
||
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件。
|
||
当 streamlined=True 时,工具调用会被折叠为累计摘要。
|
||
"""
|
||
if self._streamlined_transformer:
|
||
self._streamlined_transformer.reset()
|
||
async for event in self._run_stream_impl(user_input):
|
||
transformed = self._streamlined_transformer.transform(event)
|
||
if transformed is not None:
|
||
yield transformed
|
||
flushed = self._streamlined_transformer.flush()
|
||
if flushed:
|
||
yield flushed
|
||
else:
|
||
async for event in self._run_stream_impl(user_input):
|
||
yield event
|
||
|
||
async def _run_stream_impl(self, user_input: str) -> AsyncGenerator[dict, None]:
|
||
"""
|
||
流式执行 Agent 单轮对话(内部实现)。
|
||
|
||
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件:
|
||
- think: LLM 思考中,准备调用工具
|
||
- tool_call: 即将执行工具
|
||
- tool_result: 工具执行完毕
|
||
- final: 最终回答
|
||
- error: 出错/预算超限
|
||
"""
|
||
max_iter = max(1, self.config.llm.max_iterations)
|
||
self.context.iteration = 0
|
||
self.context.tool_calls_made = 0
|
||
|
||
# 1. 系统提示词分层装配
|
||
if self._prompt_sections_enabled:
|
||
system_prompt = await self._compose_system_prompt(user_input)
|
||
self.context.set_system_prompt(system_prompt)
|
||
if not self._memory_context_loaded:
|
||
self._memory_context_loaded = True
|
||
logger.info("分层装配已完成(静态段 + 动态段)")
|
||
elif not self._memory_context_loaded:
|
||
await self._inject_memory_context(user_input)
|
||
self._memory_context_loaded = True
|
||
await self._inject_knowledge_context(user_input)
|
||
|
||
# 2. 追加用户消息
|
||
self.context.add_user_message(user_input)
|
||
|
||
# 2.5 计划模式 (P2) — 流式生成执行计划
|
||
plan: Optional[Plan] = None
|
||
if self.plan_mode and self.config.llm.plan_mode_enabled:
|
||
yield {"type": "plan_generating", "content": "正在生成执行计划…", "iteration": 0}
|
||
try:
|
||
plan = await self.plan_mode.generate_plan(
|
||
user_input=user_input,
|
||
available_tools=self.tool_manager.tool_names(),
|
||
messages_history=self.context.messages,
|
||
)
|
||
logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps))
|
||
yield {
|
||
"type": "plan",
|
||
"content": plan.to_markdown(),
|
||
"plan_data": plan.to_dict(),
|
||
"iteration": 0,
|
||
"session_id": self.context.session_id,
|
||
}
|
||
if self.config.llm.plan_approval_required:
|
||
# 等待外部审批(通过 on_approval_required 回调)
|
||
approved = await self.plan_mode.present_plan(plan)
|
||
if not approved:
|
||
logger.info("计划模式: 计划被拒绝")
|
||
yield {
|
||
"type": "plan_rejected",
|
||
"content": "计划已被拒绝",
|
||
"plan_data": plan.to_dict(),
|
||
"iteration": 0,
|
||
"session_id": self.context.session_id,
|
||
}
|
||
return
|
||
yield {
|
||
"type": "plan_approved",
|
||
"content": "计划已批准,开始执行",
|
||
"iteration": 0,
|
||
"session_id": self.context.session_id,
|
||
}
|
||
except Exception as e:
|
||
logger.warning("计划生成失败,回退到直接执行: %s", e)
|
||
yield {"type": "plan_failed", "content": f"计划生成失败: {e}", "iteration": 0}
|
||
plan = None
|
||
|
||
# 3. ReAct 循环
|
||
llm = _LLMClient(self.config.llm)
|
||
tool_schemas = self.tool_manager.get_tool_schemas()
|
||
has_tools = self.tool_manager.has_tools()
|
||
steps: List[AgentStep] = []
|
||
_self_review_attempted = False
|
||
|
||
llm_callback_ctx = {"step_type": "think", "tool_name": None}
|
||
|
||
def _llm_callback(metrics: Dict[str, Any]):
|
||
# Token 预算追踪 (P2)
|
||
if self._token_budget:
|
||
prompt_tok = metrics.get("prompt_tokens", 0)
|
||
comp_tok = metrics.get("completion_tokens", 0)
|
||
if prompt_tok <= 0:
|
||
prompt_tok = self._token_budget.input_tokens # fallback estimate
|
||
self._token_budget.record_llm_call(
|
||
prompt_tokens=prompt_tok,
|
||
completion_tokens=comp_tok,
|
||
iteration=self.context.iteration,
|
||
step_type=llm_callback_ctx["step_type"],
|
||
)
|
||
if self.on_llm_call:
|
||
metrics.update({
|
||
"session_id": self.context.session_id,
|
||
"user_id": self.config.user_id,
|
||
"step_type": llm_callback_ctx["step_type"],
|
||
"tool_name": llm_callback_ctx["tool_name"],
|
||
})
|
||
self.on_llm_call(metrics)
|
||
|
||
while self.context.iteration < max_iter:
|
||
self.context.iteration += 1
|
||
|
||
# Token 预算检查:每次迭代前更新输入 token 估计
|
||
if self._token_budget:
|
||
self._token_budget.update_from_counter(self.context.messages)
|
||
self._token_budget.reset_compaction_attempts()
|
||
|
||
# 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩
|
||
if self.compaction_engine and self.context.iteration > 1:
|
||
if self._token_budget and self._token_budget.needs_compaction:
|
||
self._token_budget.record_compaction_attempt()
|
||
logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line)
|
||
compact_result = await self.compaction_engine.maybe_compact(
|
||
self.context.messages,
|
||
self.config.llm.context_window,
|
||
)
|
||
if compact_result.strategy != CompactionStrategy.NONE:
|
||
self.context.replace_internal_messages(
|
||
[m for m in compact_result.messages
|
||
if m.get("role") != "system"]
|
||
)
|
||
logger.debug(
|
||
"压缩完成: strategy=%s saved=%d tokens",
|
||
compact_result.strategy.value, compact_result.tokens_saved,
|
||
)
|
||
|
||
messages = self.memory.trim_messages(self.context.messages)
|
||
|
||
# 预算检查:LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
|
||
budget = self.config.budget
|
||
if self._llm_invocations >= budget.max_llm_invocations:
|
||
err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)"
|
||
logger.warning(err)
|
||
yield {"type": "error", "content": err, "iteration": self.context.iteration,
|
||
"truncated": True}
|
||
await self.memory.save_context(user_input, err, self.context.messages)
|
||
return
|
||
|
||
# 调用外部 LLM 预算回调(WorkflowEngine 注入)
|
||
if self.on_llm_invocation:
|
||
try:
|
||
self.on_llm_invocation()
|
||
except Exception as e:
|
||
err = f"LLM 调用超出工作流预算: {e}"
|
||
logger.warning(err)
|
||
yield {"type": "error", "content": err, "iteration": self.context.iteration,
|
||
"truncated": True}
|
||
return
|
||
|
||
# think 事件:告知前端 Agent 正在思考(让 UI 即时反馈,避免假死感)
|
||
yield {"type": "think", "content": "", "reasoning": None, "iteration": self.context.iteration}
|
||
|
||
# 调用 LLM
|
||
try:
|
||
response = await llm.chat(
|
||
messages=messages,
|
||
tools=tool_schemas if has_tools and self.context.iteration == 1 else
|
||
(tool_schemas if has_tools else None),
|
||
iteration=self.context.iteration,
|
||
on_completion=_llm_callback,
|
||
)
|
||
except Exception as e:
|
||
err_str = str(e)
|
||
logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
|
||
if self.context.iteration < max_iter and self._is_retryable(err_str):
|
||
yield {"type": "error", "content": f"LLM 调用失败(可重试): {err_str}",
|
||
"iteration": self.context.iteration}
|
||
continue
|
||
yield {"type": "error", "content": f"LLM 调用失败: {err_str}",
|
||
"iteration": self.context.iteration}
|
||
return
|
||
|
||
# 记录 LLM 调用次数(内部计数)
|
||
self._llm_invocations += 1
|
||
|
||
# 解析工具调用
|
||
tool_calls = self._extract_tool_calls(response)
|
||
content = self._extract_content(response)
|
||
reasoning = getattr(response, "reasoning_content", None) or (
|
||
response.get("reasoning_content") if isinstance(response, dict) else None
|
||
)
|
||
|
||
if not tool_calls:
|
||
# LLM 直接返回文本 → 结束
|
||
self.context.add_assistant_message(content)
|
||
final_text = content or "(模型未返回有效内容)"
|
||
review_score = 0.0
|
||
|
||
# 输出质量自检(默认关闭)
|
||
if self.config.self_review_enabled and not _self_review_attempted:
|
||
review = await self._self_review(final_text, task_context=user_input)
|
||
yield {
|
||
"type": "tool_result",
|
||
"content": f"self_review: score={review['score']:.2f} passed={review['passed']}",
|
||
"tool_name": "self_review",
|
||
"iteration": self.context.iteration,
|
||
"session_id": self.context.session_id,
|
||
}
|
||
if review["passed"]:
|
||
review_score = review["score"]
|
||
logger.info("self_review 通过 (%.2f >= %.2f)", review["score"], review["threshold"])
|
||
else:
|
||
logger.info("self_review 未通过 (%.2f < %.2f),追加修正", review["score"], review["threshold"])
|
||
_self_review_attempted = True
|
||
yield {
|
||
"type": "think",
|
||
"content": f"自检未通过({review['score']:.1f}),正在修正:{';'.join(review['suggestions'][:2])}",
|
||
"iteration": self.context.iteration,
|
||
"session_id": self.context.session_id,
|
||
}
|
||
fix_prompt = (
|
||
f"你的上一个回答未通过质量检查(评分 {review['score']:.1f}/{review['threshold']})。\n"
|
||
f"问题:{';'.join(review['issues'][:3])}\n"
|
||
f"改进建议:{';'.join(review['suggestions'][:3])}\n"
|
||
"请修正你的回答,确保满足上述建议。"
|
||
)
|
||
self.context.add_user_message(fix_prompt)
|
||
continue # 回到 ReAct 循环,让 LLM 修正
|
||
|
||
token_usage_final = self._token_budget.summary() if self._token_budget else None
|
||
yield {
|
||
"type": "final",
|
||
"content": final_text,
|
||
"reasoning": reasoning,
|
||
"iteration": self.context.iteration,
|
||
"iterations_used": self.context.iteration,
|
||
"tool_calls_made": self.context.tool_calls_made,
|
||
"session_id": self.context.session_id,
|
||
"token_usage": token_usage_final,
|
||
}
|
||
await self.memory.save_context(user_input, final_text, self.context.messages)
|
||
# 保存学习模式
|
||
if self.config.memory.learning_enabled:
|
||
await self._save_learning_pattern(
|
||
user_input, steps, success=True,
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
)
|
||
# 提取知识到全局知识池(Agent 间知识共享)
|
||
await self._extract_global_knowledge(user_input, final_text, steps, review_score)
|
||
return
|
||
|
||
# 有工具调用 → 先记录 assistant 消息
|
||
self.context.add_assistant_message(content or "", tool_calls, reasoning)
|
||
|
||
# yield think 事件
|
||
tc_names = [tc["function"]["name"] for tc in tool_calls]
|
||
tc_args_list = []
|
||
for tc in tool_calls:
|
||
try:
|
||
tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
|
||
except (json.JSONDecodeError, TypeError):
|
||
raw_args = tc["function"].get("arguments", "")
|
||
logger.warning("工具参数 JSON 解析失败,使用空对象: %.200s", str(raw_args))
|
||
tc_args_list.append({})
|
||
|
||
yield {
|
||
"type": "think",
|
||
"content": content or f"调用工具: {', '.join(tc_names)}",
|
||
"reasoning": reasoning,
|
||
"tool_names": tc_names,
|
||
"iteration": self.context.iteration,
|
||
}
|
||
|
||
steps.append(AgentStep(
|
||
iteration=self.context.iteration,
|
||
type="think",
|
||
content=content or f"调用工具: {', '.join(tc_names)}",
|
||
reasoning=reasoning,
|
||
tool_name=tc_names[0] if len(tc_names) == 1 else None,
|
||
tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
|
||
))
|
||
|
||
if self.execution_logger:
|
||
self.execution_logger.info(
|
||
f"Agent 调用 {len(tool_calls)} 个工具",
|
||
data={"tool_calls": tc_names, "iteration": self.context.iteration},
|
||
)
|
||
|
||
# 逐一执行工具
|
||
for tc in tool_calls:
|
||
tfn = tc.get("function", {})
|
||
tname = tfn.get("name", "unknown")
|
||
tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")
|
||
|
||
try:
|
||
targs = json.loads(tfn.get("arguments", "{}"))
|
||
except (json.JSONDecodeError, TypeError):
|
||
targs = {}
|
||
|
||
# Hook: PreToolUse — 可拦截/修改工具调用 (流式)
|
||
hook_ctx = HookContext(
|
||
event=HookEvent.PRE_TOOL_USE,
|
||
tool_name=tname,
|
||
tool_input=targs,
|
||
session_id=self.context.session_id,
|
||
agent_name=self.config.name,
|
||
user_id=self.config.user_id,
|
||
)
|
||
hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx)
|
||
if not hook_res.allowed:
|
||
result = json.dumps({"error": hook_res.reason}, ensure_ascii=False)
|
||
yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
|
||
self.context.add_tool_result(tcid, tname, result)
|
||
continue
|
||
if hook_res.modified_input:
|
||
targs = hook_res.modified_input
|
||
|
||
# yield tool_call 事件
|
||
yield {
|
||
"type": "tool_call",
|
||
"name": tname,
|
||
"input": targs,
|
||
"iteration": self.context.iteration,
|
||
}
|
||
|
||
# 工具执行前审批检查(流式:先 create → yield 事件带 ID → 等待决定)
|
||
if tname in self.config.tools.require_approval:
|
||
from app.services.approval_manager import approval_manager as _am
|
||
logger.info("Agent 工具需审批 [%s]: %s", tname, targs)
|
||
approval_req = _am.create(tool_name=tname, args=targs)
|
||
yield {
|
||
"type": "approval_required",
|
||
"approval_id": approval_req.approval_id,
|
||
"tool_name": tname,
|
||
"args": targs,
|
||
"iteration": self.context.iteration,
|
||
}
|
||
decision = await _am.wait_for_decision(
|
||
approval_req.approval_id,
|
||
timeout_ms=self.config.tools.approval_timeout_ms,
|
||
)
|
||
if decision == "denied":
|
||
result = f"[审批拒绝] 工具 {tname} 需要人工审批但被拒绝。"
|
||
yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
|
||
self.context.add_tool_result(tcid, tname, result)
|
||
continue
|
||
elif decision == "skip":
|
||
result = f"[审批跳过] 工具 {tname} 被跳过。"
|
||
yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
|
||
self.context.add_tool_result(tcid, tname, result)
|
||
continue
|
||
# decision == "approved" → 继续执行
|
||
|
||
logger.info("Agent 执行工具 [%s]: %s", tname, targs)
|
||
try:
|
||
result = await self.tool_manager.execute(tname, targs)
|
||
except Exception as tool_err:
|
||
logger.error("工具 '%s' 执行异常: %s", tname, tool_err, exc_info=True)
|
||
result = json.dumps({
|
||
"error": f"工具 '{tname}' 执行异常: {tool_err}"
|
||
}, ensure_ascii=False)
|
||
|
||
# yield tool_result 事件
|
||
yield {
|
||
"type": "tool_result",
|
||
"name": tname,
|
||
"result": result[:500] + "..." if len(result) > 500 else result,
|
||
"iteration": self.context.iteration,
|
||
}
|
||
|
||
steps.append(AgentStep(
|
||
iteration=self.context.iteration,
|
||
type="tool_result",
|
||
content=f"工具 {tname} 返回结果",
|
||
tool_name=tname,
|
||
tool_input=targs,
|
||
tool_result=result[:500] + "..." if len(result) > 500 else result,
|
||
))
|
||
|
||
self.context.add_tool_result(tcid, tname, result)
|
||
self.context.tool_calls_made += 1
|
||
|
||
# Hook: PostToolUse — 工具执行后处理 (流式)
|
||
post_ctx = HookContext(
|
||
event=HookEvent.POST_TOOL_USE,
|
||
tool_name=tname,
|
||
tool_input=targs,
|
||
tool_output=result,
|
||
session_id=self.context.session_id,
|
||
agent_name=self.config.name,
|
||
user_id=self.config.user_id,
|
||
)
|
||
await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx)
|
||
|
||
# 崩溃恢复快照 (P4)
|
||
self._fire_recovery_snapshot()
|
||
|
||
# 预算检查:工具调用次数
|
||
if self.context.tool_calls_made > budget.max_tool_calls:
|
||
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
|
||
logger.warning(err)
|
||
yield {"type": "error", "content": err, "iteration": self.context.iteration,
|
||
"truncated": True}
|
||
return
|
||
|
||
if self.on_tool_executed:
|
||
try:
|
||
await self.on_tool_executed(tname)
|
||
except WorkflowExecutionError:
|
||
raise
|
||
except Exception:
|
||
pass
|
||
|
||
if self.execution_logger:
|
||
preview = result[:300] + "..." if len(result) > 300 else result
|
||
self.execution_logger.info(
|
||
f"工具 {tname} 执行完成",
|
||
data={"tool_name": tname, "result_preview": preview},
|
||
)
|
||
|
||
# Hook: Stop — 对话完成
|
||
stop_ctx = HookContext(
|
||
event=HookEvent.STOP,
|
||
session_id=self.context.session_id,
|
||
agent_name=self.config.name,
|
||
user_id=self.config.user_id,
|
||
)
|
||
await self.hook_manager.trigger(HookEvent.STOP, stop_ctx)
|
||
|
||
# 达到最大迭代次数
|
||
last_content = ""
|
||
for m in reversed(self.context.messages):
|
||
if m.get("role") == "assistant" and m.get("content"):
|
||
last_content = m["content"]
|
||
break
|
||
|
||
logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
|
||
await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages)
|
||
# 保存学习模式(即便截断,工具调用模式仍有参考价值)
|
||
if self.config.memory.learning_enabled:
|
||
await self._save_learning_pattern(
|
||
user_input, steps, success=True,
|
||
iterations_used=self.context.iteration,
|
||
tool_calls_made=self.context.tool_calls_made,
|
||
)
|
||
# 提取知识到全局知识池(即便截断,工具调用序列仍有参考价值)
|
||
if last_content:
|
||
await self._extract_global_knowledge(user_input, last_content, steps)
|
||
token_usage_truncated = self._token_budget.summary() if self._token_budget else None
|
||
yield {
|
||
"type": "final",
|
||
"content": last_content or "已达最大迭代次数,但模型未返回最终回答。",
|
||
"iteration": self.context.iteration,
|
||
"iterations_used": self.context.iteration,
|
||
"tool_calls_made": self.context.tool_calls_made,
|
||
"truncated": True,
|
||
"session_id": self.context.session_id,
|
||
"token_usage": token_usage_truncated,
|
||
}
|
||
|
||
async def _compose_system_prompt(self, query: str = "") -> str:
|
||
"""使用分层装配构建完整系统提示词。
|
||
|
||
将静态段 + 动态段并行解析后拼接,替代原先的字符串拼接方式。
|
||
返回最终的 system_prompt 字符串。
|
||
"""
|
||
if not self._prompt_composer:
|
||
# 降级:使用原有字符串拼接方式
|
||
enriched = self.config.system_prompt.rstrip("\n")
|
||
mem_text = await self.memory.initialize(query=query)
|
||
if mem_text:
|
||
enriched += "\n\n" + mem_text
|
||
if self.config.memory.learning_enabled:
|
||
pattern_hint = await self._inject_learning_patterns(query)
|
||
if pattern_hint:
|
||
enriched += "\n\n" + pattern_hint
|
||
if self._memdir and self._memdir_manifest:
|
||
memdir_text = await self._inject_memdir_context(query)
|
||
if memdir_text:
|
||
enriched += "\n\n" + memdir_text
|
||
try:
|
||
enriched = knowledge_retriever.inject_knowledge(enriched, query)
|
||
except Exception:
|
||
pass
|
||
return enriched
|
||
|
||
# 分层装配路径
|
||
ps_config = self.config.prompt_sections
|
||
d_switches = ps_config.dynamic_sections
|
||
|
||
# 清除上一次运行的动态段
|
||
# (静态段保留缓存,动态段每次重算)
|
||
self._prompt_composer._dynamic_sections.clear()
|
||
|
||
# 动态段:环境信息
|
||
if d_switches.get("environment", True):
|
||
self._prompt_composer.add_dynamic(PromptSection(
|
||
"environment",
|
||
lambda uid=self.config.user_id: section_environment(uid),
|
||
cache_break=True,
|
||
))
|
||
|
||
# 动态段:语言偏好
|
||
if d_switches.get("language", True):
|
||
lang = ps_config.language
|
||
if lang:
|
||
self._prompt_composer.add_dynamic(PromptSection(
|
||
"language",
|
||
lambda l=lang: section_language(l),
|
||
cache_break=False,
|
||
))
|
||
|
||
# 动态段:长期记忆上下文
|
||
if d_switches.get("memory_context", True):
|
||
mem_text = await self.memory.initialize(query=query)
|
||
if mem_text:
|
||
self._prompt_composer.add_dynamic(PromptSection(
|
||
"memory_context",
|
||
lambda t=mem_text: f"# Long-term Memory\n\n{t}",
|
||
cache_break=True,
|
||
))
|
||
|
||
# 动态段:学习模式提示
|
||
if self.config.memory.learning_enabled:
|
||
pattern_hint = await self._inject_learning_patterns(query)
|
||
if pattern_hint:
|
||
self._prompt_composer.add_dynamic(PromptSection(
|
||
"learning_patterns",
|
||
lambda p=pattern_hint: p,
|
||
cache_break=True,
|
||
))
|
||
|
||
# 动态段:文件式记忆
|
||
if self._memdir and self._memdir_manifest:
|
||
memdir_text = await self._inject_memdir_context(query)
|
||
if memdir_text:
|
||
self._prompt_composer.add_dynamic(PromptSection(
|
||
"memdir",
|
||
lambda t=memdir_text: t,
|
||
cache_break=True,
|
||
))
|
||
|
||
# 动态段:知识库检索
|
||
if d_switches.get("memory_context", True):
|
||
try:
|
||
base_enriched = knowledge_retriever.inject_knowledge(
|
||
self.config.system_prompt, query
|
||
)
|
||
if base_enriched != self.config.system_prompt:
|
||
# 提取增量部分
|
||
knowledge_delta = base_enriched[len(self.config.system_prompt):].strip()
|
||
if knowledge_delta:
|
||
self._prompt_composer.add_dynamic(PromptSection(
|
||
"knowledge_base",
|
||
lambda kd=knowledge_delta: f"# Relevant Knowledge\n\n{kd}",
|
||
cache_break=True,
|
||
))
|
||
except Exception:
|
||
pass
|
||
|
||
# 工具列表段(默认关闭,太长)
|
||
if d_switches.get("tool_list", False):
|
||
tool_names = self.tool_manager.tool_names()
|
||
if tool_names:
|
||
tool_list_text = "\n".join(f"- {n}" for n in sorted(tool_names))
|
||
self._prompt_composer.add_dynamic(PromptSection(
|
||
"tool_list",
|
||
lambda t=tool_list_text: f"# Available Tools\n\n{t}",
|
||
cache_break=False,
|
||
))
|
||
|
||
# 解析 + 装配
|
||
return await self._prompt_composer.assemble_full()
|
||
|
||
async def _inject_memory_context(self, query: str = "") -> None:
|
||
"""加载长期记忆并注入 system prompt。"""
|
||
mem_text = await self.memory.initialize(query=query)
|
||
enriched = self.config.system_prompt.rstrip("\n")
|
||
|
||
if mem_text:
|
||
enriched += "\n\n" + mem_text
|
||
|
||
# 注入学习模式提示(历史工具使用建议)
|
||
if self.config.memory.learning_enabled:
|
||
pattern_hint = await self._inject_learning_patterns(query)
|
||
if pattern_hint:
|
||
enriched += "\n\n" + pattern_hint
|
||
|
||
# 注入文件式记忆 (MEMORY.md)
|
||
if self._memdir and self._memdir_manifest:
|
||
memdir_text = await self._inject_memdir_context(query)
|
||
if memdir_text:
|
||
enriched += "\n\n" + memdir_text
|
||
|
||
self.context.set_system_prompt(enriched)
|
||
logger.info("Agent 已注入长期记忆上下文")
|
||
|
||
async def _inject_memdir_context(self, query: str) -> str:
|
||
"""加载文件式记忆并构建注入文本。"""
|
||
if not self._memdir or not self._memdir_manifest:
|
||
return ""
|
||
|
||
parts: List[str] = []
|
||
|
||
# 记忆操作指导(首次注入)
|
||
memdir_prompt = self._memdir.build_system_prompt()
|
||
parts.append(memdir_prompt)
|
||
|
||
# AI 驱动的相关性选择
|
||
if self._memdir_manifest.entries:
|
||
try:
|
||
selected = await memory_selector.select(
|
||
query=query,
|
||
manifest=self._memdir_manifest,
|
||
recent_tools=self.tool_manager.tool_names(),
|
||
)
|
||
if selected:
|
||
# 读取选中的记忆文件
|
||
parts.append("\n## 相关记忆\n")
|
||
for fn in selected:
|
||
entry = next(
|
||
(e for e in self._memdir_manifest.entries
|
||
if e.filename == fn), None
|
||
)
|
||
if entry:
|
||
# 加载完整内容
|
||
try:
|
||
with open(entry.filepath, "r", encoding="utf-8") as _f:
|
||
_, content = parse_frontmatter(_f.read())
|
||
except Exception:
|
||
content = entry.content
|
||
if not content:
|
||
content = entry.content
|
||
staleness = entry.staleness_note
|
||
parts.append(
|
||
f"<system-reminder>\n"
|
||
f"### [{entry.mem_type.value}] {entry.name}\n"
|
||
f"{content[:2000]}"
|
||
)
|
||
if staleness:
|
||
parts.append(f"\n{staleness}")
|
||
parts.append("</system-reminder>")
|
||
except Exception as e:
|
||
logger.warning("AI 记忆选择失败: %s", e)
|
||
|
||
return "\n".join(parts)
|
||
|
||
async def _inject_learning_patterns(self, query: str) -> str:
|
||
"""查询学习模式,返回格式化的提示文本。"""
|
||
from app.core.database import SessionLocal
|
||
db = None
|
||
try:
|
||
db = SessionLocal()
|
||
patterns = load_relevant_patterns(
|
||
db, self._learning_scope_kind, self.memory.scope_id, query
|
||
)
|
||
return format_pattern_hint(patterns, query)
|
||
except Exception as e:
|
||
logger.warning("加载学习模式失败: %s", e)
|
||
return ""
|
||
|
||
async def _inject_knowledge_context(self, query: str) -> None:
|
||
"""从知识进化库检索相关经验并注入 system prompt。"""
|
||
try:
|
||
enriched = knowledge_retriever.inject_knowledge(
|
||
self.context.system_prompt, query
|
||
)
|
||
if enriched != self.context.system_prompt:
|
||
self.context.set_system_prompt(enriched)
|
||
logger.info("Agent 已注入相关知识库经验")
|
||
except Exception as e:
|
||
logger.debug("知识检索注入跳过: %s", e)
|
||
|
||
async def _save_learning_pattern(
|
||
self, query: str, steps: List[AgentStep],
|
||
success: bool, iterations_used: int, tool_calls_made: int,
|
||
) -> None:
|
||
"""从执行结果中提取模式并保存。"""
|
||
from app.core.database import SessionLocal
|
||
db = None
|
||
try:
|
||
db = SessionLocal()
|
||
pattern_data = extract_pattern_from_result(
|
||
query=query,
|
||
steps=steps,
|
||
success=success,
|
||
iterations_used=iterations_used,
|
||
tool_calls_made=tool_calls_made,
|
||
)
|
||
save_learning_pattern(
|
||
db, self._learning_scope_kind,
|
||
self.memory.scope_id, pattern_data,
|
||
)
|
||
except Exception as e:
|
||
logger.warning("保存学习模式失败: %s", e)
|
||
finally:
|
||
if db:
|
||
db.close()
|
||
|
||
async def _extract_global_knowledge(
|
||
self, user_input: str, final_answer: str, steps: List[AgentStep],
|
||
self_review_score: float = 0.0,
|
||
) -> None:
|
||
"""从 Agent 执行结果中提取知识,写入全局知识池(Agent 间共享)。"""
|
||
# 提取工具调用名称作为 tags
|
||
tool_names = list(dict.fromkeys(
|
||
s.tool_name for s in (steps or [])
|
||
if s.tool_name and s.type == "tool_result"
|
||
))
|
||
tags = tool_names[:5] if tool_names else ["对话"]
|
||
|
||
# 提取关键信息:用户问题摘要 + 回答要点(前 500 字)
|
||
content = (
|
||
f"问题: {user_input[:300]}\n"
|
||
f"回答要点: {final_answer[:500]}"
|
||
)
|
||
if tool_names:
|
||
content += f"\n使用工具: {', '.join(tool_names[:5])}"
|
||
|
||
source_agent_id = self.config.name if self.config.name != "default_agent" else ""
|
||
source_user_id = self.config.user_id or ""
|
||
|
||
# 置信度评估:基于 self_review 评分和工具执行成功数
|
||
confidence = "medium"
|
||
if self_review_score >= 0.8:
|
||
confidence = "high"
|
||
elif self_review_score > 0 and self_review_score < 0.5:
|
||
confidence = "low"
|
||
elif tool_names and len(tool_names) >= 2:
|
||
confidence = "high" # 多工具协作通常质量更高
|
||
# TTL: 高置信度知识有效期更长
|
||
ttl_hours = 720 if confidence == "high" else 168 if confidence == "medium" else 24
|
||
|
||
await self.memory.save_global_knowledge(
|
||
content=content,
|
||
source_agent_id=source_agent_id,
|
||
source_user_id=source_user_id,
|
||
tags=tags,
|
||
confidence=confidence,
|
||
ttl_hours=ttl_hours,
|
||
)
|
||
|
||
async def _self_review(self, content: str, task_context: str = "") -> dict:
|
||
"""输出质量自检:用轻量 LLM 评判输出,返回 {score, passed, issues, suggestions}。"""
|
||
criteria = (
|
||
"回答必须准确、完整、切题。"
|
||
"包含具体可执行的步骤或代码示例。"
|
||
"无明显事实错误或遗漏。"
|
||
"格式清晰,便于阅读。"
|
||
)
|
||
try:
|
||
from app.agent_runtime.core import _LLMClient
|
||
from app.agent_runtime.schemas import AgentLLMConfig
|
||
|
||
review_config = AgentLLMConfig(
|
||
provider=getattr(self.config.llm, 'provider', 'deepseek'),
|
||
model="deepseek-v4-flash",
|
||
temperature=0.1,
|
||
max_tokens=800,
|
||
request_timeout=30.0,
|
||
)
|
||
if self.config.llm.api_key:
|
||
review_config.api_key = self.config.llm.api_key
|
||
if self.config.llm.base_url:
|
||
review_config.base_url = self.config.llm.base_url
|
||
|
||
client = _LLMClient(review_config)
|
||
|
||
judge_prompt = (
|
||
"你是严格的内容质量评审专家。请根据以下标准对内容进行评分。\n\n"
|
||
f"【评判标准】\n{criteria}\n\n"
|
||
f"【待评审内容】\n{content[:8000]}\n"
|
||
)
|
||
if task_context:
|
||
judge_prompt += f"\n【任务背景】\n{task_context[:2000]}\n"
|
||
|
||
judge_prompt += (
|
||
"\n请以 JSON 格式返回评审结果(严格只返回 JSON,不要任何其他文字):\n"
|
||
'{"score": 0.75, "passed": true, "issues": ["问题1"], '
|
||
'"suggestions": ["建议1"], "summary": "一句话总结"}\n\n'
|
||
"评分规则:1.0完美 0.8良好 0.6基本满足 0.4大部分未满足 0.2完全不满足\n"
|
||
"score >= 0.6 时 passed=true,否则 passed=false\n"
|
||
)
|
||
|
||
messages = [{"role": "user", "content": judge_prompt}]
|
||
resp = await client.chat(messages=messages, tools=None, iteration=0)
|
||
judge_text = getattr(resp, 'content', '') or (
|
||
resp.get('content', '') if isinstance(resp, dict) else str(resp)
|
||
)
|
||
|
||
# 解析 JSON
|
||
try:
|
||
judge_clean = judge_text.strip()
|
||
if judge_clean.startswith("```"):
|
||
lines = judge_clean.split("\n")
|
||
judge_clean = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
|
||
result = json.loads(judge_clean)
|
||
except json.JSONDecodeError:
|
||
import re as _sr_re
|
||
m = _sr_re.search(r'\{[^{}]*"score"\s*:\s*[\d.]+[^{}]*\}', judge_text, _sr_re.DOTALL)
|
||
if m:
|
||
try:
|
||
result = json.loads(m.group())
|
||
except json.JSONDecodeError:
|
||
result = {"score": 0.5, "passed": False, "issues": ["无法解析评审结果"], "suggestions": [], "summary": ""}
|
||
else:
|
||
result = {"score": 0.5, "passed": False, "issues": ["无法解析评审结果"], "suggestions": [], "summary": ""}
|
||
|
||
score = float(result.get("score", 0.5))
|
||
threshold = self.config.llm.self_review_threshold
|
||
passed = score >= threshold
|
||
|
||
return {
|
||
"score": score,
|
||
"passed": passed,
|
||
"threshold": threshold,
|
||
"issues": result.get("issues", []),
|
||
"suggestions": result.get("suggestions", []),
|
||
"summary": result.get("summary", ""),
|
||
}
|
||
except Exception as e:
|
||
logger.warning("self_review 执行失败: %s", e)
|
||
return {"score": 0.0, "passed": False, "issues": [f"self_review 执行异常: {e}"],
|
||
"suggestions": ["请检查 self_review 配置或 LLM 可用性"], "error": str(e)}
|
||
|
||
@staticmethod
|
||
def _extract_tool_calls(response: Any) -> List[Dict[str, Any]]:
|
||
"""从 LLM 响应中提取工具调用列表。"""
|
||
if response is None:
|
||
return []
|
||
# OpenAI SDK 格式
|
||
if hasattr(response, "tool_calls") and response.tool_calls:
|
||
result = []
|
||
for tc in response.tool_calls:
|
||
result.append({
|
||
"id": tc.id,
|
||
"type": tc.type,
|
||
"function": {
|
||
"name": tc.function.name,
|
||
"arguments": tc.function.arguments,
|
||
},
|
||
})
|
||
return result
|
||
# 字典格式
|
||
if isinstance(response, dict):
|
||
tc_list = response.get("tool_calls") or []
|
||
if tc_list:
|
||
return tc_list
|
||
# 检查 content 中是否嵌入了 DSML
|
||
content = response.get("content") or ""
|
||
if "invoke" in content or "function_call" in content:
|
||
from app.services.llm_service import _parse_dsml_tool_invocations
|
||
dsml = _parse_dsml_tool_invocations(content)
|
||
if dsml:
|
||
return [
|
||
{
|
||
"id": f"dsml-{i}",
|
||
"type": "function",
|
||
"function": {
|
||
"name": inv["name"],
|
||
"arguments": json.dumps(inv["arguments"], ensure_ascii=False),
|
||
},
|
||
}
|
||
for i, inv in enumerate(dsml)
|
||
]
|
||
return []
|
||
|
||
@staticmethod
|
||
def _extract_content(response: Any) -> str:
|
||
"""从 LLM 响应中提取文本内容。"""
|
||
if response is None:
|
||
return ""
|
||
if hasattr(response, "content"):
|
||
return response.content or ""
|
||
if isinstance(response, dict):
|
||
return response.get("content") or ""
|
||
return str(response)
|
||
|
||
@staticmethod
|
||
def _is_retryable(err_str: str) -> bool:
|
||
"""判断错误是否可重试(使用 ErrorClassifier)。"""
|
||
try:
|
||
error_type, _ = _error_classifier.classify(Exception(err_str))
|
||
return error_type == ErrorType.RETRYABLE
|
||
except Exception:
|
||
err_lower = err_str.lower()
|
||
return any(kw in err_lower for kw in (
|
||
"timed out", "timeout", "connection error",
|
||
"rate limit", "too many requests", "internal server error",
|
||
"service unavailable", "temporarily unavailable",
|
||
))
|
||
|
||
|
||
# LLM 缓存辅助
|
||
def _llm_cache_key(messages: list, model: str) -> str:
|
||
import hashlib
|
||
raw = json.dumps({"msgs": messages[-4:], "model": model}, sort_keys=True, ensure_ascii=False)
|
||
return f"llm:{model}:{hashlib.sha256(raw.encode()).hexdigest()[:16]}"
|
||
|
||
async def _llm_cache_get(key: str) -> Optional[str]:
|
||
try:
|
||
from app.core.redis_client import get_redis_client
|
||
redis = get_redis_client()
|
||
if redis:
|
||
return await redis.get(key)
|
||
except Exception:
|
||
pass
|
||
return None
|
||
|
||
async def _llm_cache_set(key: str, value: str, ttl_ms: int):
|
||
try:
|
||
from app.core.redis_client import get_redis_client
|
||
redis = get_redis_client()
|
||
if redis:
|
||
await redis.setex(key, max(1, int(ttl_ms / 1000)), value)
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
class _LLMClient:
|
||
"""轻量 LLM 客户端包装,复用已有 LLMService 能力。"""
|
||
|
||
def __init__(self, config: Any):
|
||
from app.services.llm_service import llm_service
|
||
self._service = llm_service
|
||
self._config = config
|
||
|
||
async def chat(
|
||
self,
|
||
messages: List[Dict[str, Any]],
|
||
tools: Optional[List[Dict[str, Any]]] = None,
|
||
iteration: int = 1,
|
||
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
||
) -> Any:
|
||
"""调用 LLM,主模型失败时自动切换 fallback_llm 重试。"""
|
||
from openai import AsyncOpenAI
|
||
from app.core.config import settings
|
||
|
||
# 优先从配置读取,其次从 settings(.env 加载),最后 os.environ
|
||
api_key = self._config.api_key or settings.OPENAI_API_KEY or ""
|
||
base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""
|
||
|
||
if not api_key or api_key == "your-openai-api-key":
|
||
api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
|
||
base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"
|
||
|
||
if not api_key:
|
||
raise ValueError("未配置 API Key")
|
||
|
||
return await self._do_chat(
|
||
api_key=api_key, base_url=base_url, model=self._config.model,
|
||
messages=messages, tools=tools, iteration=iteration,
|
||
on_completion=on_completion,
|
||
)
|
||
|
||
async def _do_chat(
|
||
self,
|
||
api_key: str,
|
||
base_url: str,
|
||
model: str,
|
||
messages: List[Dict[str, Any]],
|
||
tools: Optional[List[Dict[str, Any]]] = None,
|
||
iteration: int = 1,
|
||
on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
||
_is_fallback: bool = False,
|
||
) -> Any:
|
||
from openai import AsyncOpenAI
|
||
from app.core.config import settings
|
||
|
||
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
||
|
||
kwargs: Dict[str, Any] = {
|
||
"model": model,
|
||
"messages": messages,
|
||
"temperature": self._config.temperature,
|
||
"timeout": self._config.request_timeout,
|
||
}
|
||
if self._config.max_tokens:
|
||
kwargs["max_tokens"] = self._config.max_tokens
|
||
if self._config.extra_body:
|
||
kwargs["extra_body"] = self._config.extra_body
|
||
if tools:
|
||
# Normalize tool schemas to OpenAI format: custom tools from the
|
||
# marketplace may be stored as {"name":..., "parameters":...}
|
||
# or {"function":{...}} without the required "type": "function".
|
||
normalized = []
|
||
for t in tools:
|
||
if isinstance(t, dict):
|
||
if t.get("type") == "function":
|
||
# Already in correct format: {"type":"function","function":{...}}
|
||
normalized.append(t)
|
||
elif "function" in t:
|
||
# Has function key but missing type: {"function":{...}}
|
||
normalized.append({"type": "function", "function": t["function"]})
|
||
else:
|
||
# Raw schema: {"name":..., "parameters":...}
|
||
normalized.append({"type": "function", "function": t})
|
||
else:
|
||
normalized.append(t)
|
||
kwargs["tools"] = normalized
|
||
kwargs["tool_choice"] = "auto"
|
||
|
||
# LLM 响应缓存(仅不用工具时缓存,避免复杂序列化)
|
||
if self._config.cache_enabled and not tools and not _is_fallback:
|
||
cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
|
||
cached = await _llm_cache_get(cache_key)
|
||
if cached is not None:
|
||
logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
|
||
class _CachedMsg:
|
||
content = cached
|
||
tool_calls = None
|
||
return _CachedMsg()
|
||
|
||
start_time = time.perf_counter()
|
||
last_error = None
|
||
try:
|
||
response = await client.chat.completions.create(**kwargs)
|
||
except Exception as e:
|
||
last_error = e
|
||
|
||
# Reactive Compact: 上下文超限时压缩后重试 (Tier 3)
|
||
if (
|
||
self.compaction_engine
|
||
and is_context_length_error(e)
|
||
and self.compaction_engine.config.reactive_compact_enabled
|
||
):
|
||
logger.warning("检测到上下文超限,触发 ReactiveCompact: %s", str(e)[:100])
|
||
try:
|
||
compact_result = await self.compaction_engine.reactive_compact(
|
||
messages, e, self._config.context_window,
|
||
)
|
||
if compact_result.strategy != CompactionStrategy.NONE:
|
||
logger.info(
|
||
"ReactiveCompact 完成: saved=%d tokens, 重试中...",
|
||
compact_result.tokens_saved,
|
||
)
|
||
return await self._do_chat(
|
||
api_key=api_key, base_url=base_url,
|
||
model=model,
|
||
messages=compact_result.messages,
|
||
tools=tools,
|
||
iteration=iteration,
|
||
on_completion=on_completion,
|
||
_is_fallback=_is_fallback,
|
||
)
|
||
except Exception as ce:
|
||
logger.error("ReactiveCompact 失败: %s", ce)
|
||
|
||
# 降级回退:主模型失败时尝试 fallback_llm
|
||
fallback = self._config.fallback_llm
|
||
if fallback and isinstance(fallback, dict) and not _is_fallback:
|
||
fb_model = fallback.get("model")
|
||
fb_api_key = fallback.get("api_key")
|
||
fb_base_url = fallback.get("base_url")
|
||
if fb_model and (fb_api_key or fb_base_url):
|
||
logger.warning(
|
||
"主模型 %s 调用失败,降级到 %s: %s",
|
||
model, fb_model, str(e)[:200],
|
||
)
|
||
# 先报告主模型失败
|
||
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
||
if on_completion:
|
||
on_completion({
|
||
"model": model, "provider": self._config.provider,
|
||
"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0,
|
||
"latency_ms": latency_ms, "iteration_number": iteration,
|
||
"status": "fallback", "error_message": str(e),
|
||
})
|
||
return await self._do_chat(
|
||
api_key=fb_api_key or api_key,
|
||
base_url=fb_base_url or base_url,
|
||
model=fb_model,
|
||
messages=messages, tools=tools,
|
||
iteration=iteration, on_completion=on_completion,
|
||
_is_fallback=True,
|
||
)
|
||
raise
|
||
|
||
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
||
message = response.choices[0].message
|
||
|
||
# 缓存写入(仅不用工具时)
|
||
if self._config.cache_enabled and not tools and message.content:
|
||
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
|
||
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
|
||
|
||
# 提取 token 用量
|
||
usage = getattr(response, "usage", None)
|
||
prompt_tokens = usage.prompt_tokens if usage else 0
|
||
completion_tokens = usage.completion_tokens if usage else 0
|
||
total_tokens = usage.total_tokens if usage else 0
|
||
|
||
# 调用完成回调
|
||
if on_completion:
|
||
on_completion({
|
||
"model": model,
|
||
"provider": self._config.provider,
|
||
"prompt_tokens": prompt_tokens or 0,
|
||
"completion_tokens": completion_tokens or 0,
|
||
"total_tokens": total_tokens or 0,
|
||
"latency_ms": latency_ms,
|
||
"iteration_number": iteration,
|
||
"status": "success",
|
||
})
|
||
|
||
return message
|