""" Agent Runtime 核心 —— 自主 ReAct 循环。 流程: 1. 接收用户输入 → 追加到消息列表 2. 调用 LLM(携带 tools schema) 3. 如果 LLM 返回工具调用 → 执行工具 → 结果追加到消息列表 → 回到 2 4. 如果 LLM 返回文本 → 作为最终回答返回 5. 超过 max_iterations → 强制终止 """ from __future__ import annotations import hashlib import json import logging import time from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Protocol, TypedDict from app.agent_runtime.schemas import ( AgentConfig, AgentResult, AgentStep, ) from app.agent_runtime.context import AgentContext from app.agent_runtime.memory import AgentMemory from app.agent_runtime.tool_manager import AgentToolManager from app.core.exceptions import WorkflowExecutionError from app.core.hooks import HookManager, HookEvent, HookContext, HookResult from app.agent_runtime.plan_mode import PlanMode, Plan, PlanStatus from app.core.error_recovery import ErrorClassifier, ErrorType, ConversationRecovery from app.core.memdir import MemoryDir, MemoryType as MemType, MemoryManifest, parse_frontmatter from app.core.memory_selector import memory_selector from app.core.compaction import CompactionEngine, CompactionResult, CompactionStrategy from app.core.compaction_config import CompactionConfig from app.core.token_counter import is_context_length_error from app.core.streamlined_output import ( StreamlinedTransformer, create_streamlined_transformer, get_tool_summary_text, ToolCounts, categorize_tool, ) from app.core.prompt_sections import ( PromptComposer, PromptSection, create_prompt_composer, create_default_static_sections, create_default_dynamic_sections, section_environment, section_language, ) from app.core.token_budget import ( TokenBudget, TokenBudgetConfig, create_token_budget, ) from app.services.agent_learning_service import ( extract_pattern_from_result, format_pattern_hint, load_relevant_patterns, save_learning_pattern, ) from app.services.execution_logger import execution_logger as _exec_logger from app.services.knowledge_retriever import knowledge_retriever logger = logging.getLogger(__name__) class LLMCallMetrics(TypedDict, total=False): """一次 LLM 调用的度量数据""" agent_id: Optional[str] session_id: str user_id: Optional[str] model: str provider: Optional[str] prompt_tokens: int completion_tokens: int total_tokens: int latency_ms: int iteration_number: int step_type: str # think / final tool_name: Optional[str] status: str # success / error error_message: Optional[str] # 全局错误分类器(可重试判定 + 退避策略) _error_classifier = ErrorClassifier() class AgentRuntime: """ 自主 Agent 运行时。 用法: runtime = AgentRuntime(config) result = await runtime.run("帮我写个Python脚本") """ def __init__( self, config: Optional[AgentConfig] = None, context: Optional[AgentContext] = None, memory: Optional[AgentMemory] = None, tool_manager: Optional[AgentToolManager] = None, execution_logger: Optional[Any] = None, on_tool_executed: Optional[Callable[[str], Any]] = None, on_llm_call: Optional[Callable[[Dict[str, Any]], Any]] = None, hook_manager: Optional[HookManager] = None, streamlined: bool = False, ): self.config = config or AgentConfig() self.context = context or AgentContext( system_prompt=self.config.system_prompt, user_id=self.config.user_id, ) _mem_scope = self.config.memory_scope_id or self.config.user_id or self.config.name self.memory = memory or AgentMemory( scope_id=_mem_scope, max_history=self.config.memory.max_history_messages, persist=self.config.memory.persist_to_db, vector_memory_enabled=self.config.memory.vector_memory_enabled, vector_memory_top_k=self.config.memory.vector_memory_top_k, vector_memory_rerank=self.config.memory.vector_memory_rerank, memory_type_filter=self.config.memory.memory_type_filter, team_id=self.config.memory.team_id, team_share_enabled=self.config.memory.team_share_enabled, memory_dir_enabled=self.config.memory.memory_dir_enabled, memory_dir_path=self.config.memory.memory_dir_path, parent_agent_id=self.config.memory.parent_agent_id, ) self.tool_manager = tool_manager or AgentToolManager( include_tools=self.config.tools.include_tools, exclude_tools=self.config.tools.exclude_tools, cache_enabled=self.config.tools.cache_enabled, cache_tool_whitelist=self.config.tools.cache_tool_whitelist, cache_ttl_ms=self.config.tools.cache_ttl_ms, permission_level=self.config.tools.permission_level, auto_approve_rules=self.config.tools.auto_approve_rules, deny_tools=self.config.tools.deny_tools, ) self.execution_logger = execution_logger self.on_tool_executed = on_tool_executed self.on_llm_call = on_llm_call self._memory_context_loaded = False self._llm_invocations = 0 # 自主学习作用域:bare 聊天用 "bare",Agent 用 "agent" self._learning_scope_kind = "bare" if "bare" in str(_mem_scope) else "agent" # Hook 管理器 (P1) self.hook_manager = hook_manager or HookManager() # 计划模式 (P2) self.plan_mode = PlanMode(self.config.llm) if self.config.llm.plan_mode_enabled else None # 对话自动压缩 (参考 Claude Code compact) self.compaction_engine: Optional[CompactionEngine] = None compaction_cfg = getattr(self.config.memory, 'compaction', None) if compaction_cfg is None: compaction_cfg = CompactionConfig() if compaction_cfg.enabled: self.compaction_engine = CompactionEngine( config=compaction_cfg, model=self.config.llm.model, ) logger.info("对话压缩引擎已启用 (model=%s, window=%d)", self.config.llm.model, self.config.llm.context_window) # 工具结果流式美化 (参考 Claude Code streamlinedTransform) self.streamlined = streamlined self._streamlined_transformer: Optional[StreamlinedTransformer] = None if streamlined: self._streamlined_transformer = create_streamlined_transformer(enabled=True) logger.info("工具结果流式美化已启用") # 系统提示词分层装配 (P2 — 参考 Claude Code systemPromptSections.ts) self._prompt_composer: Optional[PromptComposer] = None self._prompt_sections_enabled = self.config.prompt_sections.enabled if self._prompt_sections_enabled: ps_config = self.config.prompt_sections # 构建静态段(按开关过滤) static_sections = [] s_switches = ps_config.static_sections if s_switches.get("persona", True): static_sections.append(PromptSection( "persona", lambda cfg=self.config: f"{cfg.system_prompt}\n\n" )) if s_switches.get("capabilities", True): from app.core.prompt_sections import section_capabilities static_sections.append(PromptSection("capabilities", section_capabilities)) if s_switches.get("tool_instructions", True): from app.core.prompt_sections import section_tool_instructions static_sections.append(PromptSection("tool_instructions", section_tool_instructions)) if s_switches.get("safety_rules", True): from app.core.prompt_sections import section_safety_rules static_sections.append(PromptSection("safety_rules", section_safety_rules)) if s_switches.get("output_style", True): from app.core.prompt_sections import section_output_style static_sections.append(PromptSection("output_style", section_output_style)) self._prompt_composer = PromptComposer() self._prompt_composer.add_static_sections(static_sections) logger.info("系统提示词分层装配已启用 (%d 静态段)", len(static_sections)) # Token 预算管理 (P2 — 参考 Claude Code tokenBudget.ts) self._token_budget: Optional[TokenBudget] = None tb_config = self.config.token_budget if tb_config.enabled: self._token_budget = TokenBudget( config=TokenBudgetConfig( enabled=True, context_window=tb_config.context_window or self.config.llm.context_window, output_reserve=tb_config.output_reserve, warning_threshold_pct=tb_config.warning_threshold_pct, compact_threshold_pct=tb_config.compact_threshold_pct, hard_limit_pct=tb_config.hard_limit_pct, user_budget=tb_config.user_budget, auto_continue=tb_config.auto_continue, compaction_after_warning=tb_config.compaction_after_warning, max_compaction_attempts=tb_config.max_compaction_attempts, ), model=self.config.llm.model, ) logger.info("Token 预算管理已启用 (window=%d, compact@%d%%)", self._token_budget.config.context_window, int(tb_config.compact_threshold_pct * 100)) # 崩溃恢复 (P4) self.recovery = ConversationRecovery() self._recovery_snapshot_counter = 0 # 文件式记忆 (MEMORY.md) self._memdir: Optional[MemoryDir] = None self._memdir_manifest: Optional[MemoryManifest] = None if self.config.memory.memory_dir_enabled: mem_path = self.config.memory.memory_dir_path if not mem_path: # 默认路径: 项目根目录下的 .claude/memory import os as _os mem_path = _os.path.join( _os.path.dirname(_os.path.dirname(_os.path.dirname(__file__))), ".claude", "memory", ) self._memdir = MemoryDir(mem_path) # 启动时扫描一次 self._memdir_manifest = self._memdir.scan() memory_selector.reset() logger.info("文件式记忆已启用: %s (%d 条)", mem_path, self._memdir_manifest.total_files) # 预算回调:供 WorkflowEngine 注入,使 Agent 内部计数计入工作流预算 # 返回 True 表示预算充足;返回 False 或抛出异常表示超限 self.on_llm_invocation: Optional[Callable[[], Any]] = None def _attach_token_usage(self, result: AgentResult) -> AgentResult: """将 TokenBudget 摘要附加到 AgentResult(若启用)。""" if self._token_budget: from app.agent_runtime.schemas import TokenUsageInfo result.token_usage = TokenUsageInfo(**self._token_budget.summary()) return result def _build_execution_log_kwargs(self, user_input: str, result: AgentResult, latency_ms: int) -> dict: """从 AgentResult 构建 execution_logger 所需的参数字典。""" tool_chain = [] for s in result.steps: if s.type == "tool_result" and s.tool_name: tool_chain.append({ "tool_name": s.tool_name, "tool_input": s.tool_input, "tool_output": s.tool_result[:500] if s.tool_result else None, }) steps_summary = [ {"iteration": s.iteration, "type": s.type, "tool_name": s.tool_name, "content": (s.content or "")[:300]} for s in result.steps[-20:] # 最多保留最近 20 步 ] return dict( agent_id=None, # 由调用方设置 agent_name=self.config.name, user_id=self.config.user_id, session_id=self.context.session_id, input_text=user_input, output_text=result.content, output_truncated=result.truncated, success=result.success, error_message=result.error, latency_ms=latency_ms, iterations_used=result.iterations_used, tool_calls_made=result.tool_calls_made, tool_chain=tool_chain if tool_chain else None, steps=steps_summary if steps_summary else None, model=self.config.llm.model, provider=self.config.llm.provider, ) def _fire_recovery_snapshot(self): """Fire-and-forget 保存崩溃恢复快照(每 5 次工具调用保存一次)。""" self._recovery_snapshot_counter += 1 if self._recovery_snapshot_counter % 5 != 0: return try: import asyncio asyncio.ensure_future( self.recovery.save_snapshot( session_id=self.context.session_id, messages=self.context.messages, extra={ "agent_name": self.config.name, "iteration": self.context.iteration, "tool_calls_made": self.context.tool_calls_made, }, ) ) except Exception: pass def _fire_execution_log(self, user_input: str, result: AgentResult, start_time: float): """Fire-and-forget 记录执行日志(非阻塞)。""" try: latency_ms = int((time.time() - start_time) * 1000) kwargs = self._build_execution_log_kwargs(user_input, result, latency_ms) _exec_logger.log_execution_fire_and_forget(**kwargs) except Exception: pass # 日志记录失败不影响主流程 async def run(self, user_input: str) -> AgentResult: """ 执行 Agent 单轮对话。 流程:加载记忆 → 追加用户消息 → ReAct 循环 → 保存记忆 → 返回结果。 """ max_iter = max(1, self.config.llm.max_iterations) self.context.iteration = 0 self.context.tool_calls_made = 0 self._llm_invocations = 0 # 每次 run() 重置 LLM 调用计数 _run_start = time.time() # 执行开始时间,用于计算总延迟 # 1. 系统提示词分层装配(首次加载全部段,后续只刷新动态段) if self._prompt_sections_enabled: system_prompt = await self._compose_system_prompt(user_input) self.context.set_system_prompt(system_prompt) if not self._memory_context_loaded: self._memory_context_loaded = True logger.info("分层装配已完成(静态段 + 动态段)") elif not self._memory_context_loaded: await self._inject_memory_context(user_input) self._memory_context_loaded = True await self._inject_knowledge_context(user_input) # 2. 追加用户消息 self.context.add_user_message(user_input) # 2.5 计划模式 (P2) — 生成执行计划 plan: Optional[Plan] = None if self.plan_mode and self.config.llm.plan_mode_enabled: try: plan = await self.plan_mode.generate_plan( user_input=user_input, available_tools=self.tool_manager.tool_names(), messages_history=self.context.messages, ) logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps)) if self.config.llm.plan_approval_required: approved = await self.plan_mode.present_plan(plan) if not approved: logger.info("计划模式: 计划被拒绝") result = AgentResult( success=False, content=f"计划已被拒绝。\n\n{plan.to_markdown()}", iterations_used=0, tool_calls_made=0, error="plan_rejected", ) self._fire_execution_log(user_input, result, _run_start) self._attach_token_usage(result) return result except Exception as e: logger.warning("计划生成失败,回退到直接执行: %s", e) plan = None # 3. ReAct 循环 llm = _LLMClient(self.config.llm) tool_schemas = self.tool_manager.get_tool_schemas() has_tools = self.tool_manager.has_tools() steps: List[AgentStep] = [] _self_review_attempted = False # 防止无限修正循环 # 构建 LLM 调用回调(包装 on_llm_call,补充上下文) llm_callback_ctx = {"step_type": "think", "tool_name": None} def _llm_callback(metrics: Dict[str, Any]): # Token 预算追踪 (P2) if self._token_budget: prompt_tok = metrics.get("prompt_tokens", 0) comp_tok = metrics.get("completion_tokens", 0) if prompt_tok <= 0: prompt_tok = self._token_budget.input_tokens # fallback estimate self._token_budget.record_llm_call( prompt_tokens=prompt_tok, completion_tokens=comp_tok, iteration=self.context.iteration, step_type=llm_callback_ctx["step_type"], ) if self.on_llm_call: metrics.update({ "session_id": self.context.session_id, "user_id": self.config.user_id, "step_type": llm_callback_ctx["step_type"], "tool_name": llm_callback_ctx["tool_name"], }) self.on_llm_call(metrics) while self.context.iteration < max_iter: self.context.iteration += 1 # Token 预算检查:每次迭代前更新输入 token 估计 if self._token_budget: self._token_budget.update_from_counter(self.context.messages) self._token_budget.reset_compaction_attempts() # 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩 _should_compact = self.compaction_engine and self.context.iteration > 1 if _should_compact and self._token_budget and self._token_budget.needs_compaction: self._token_budget.record_compaction_attempt() logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line) if self.compaction_engine and self.context.iteration > 1: compact_result = await self.compaction_engine.maybe_compact( self.context.messages, self.config.llm.context_window, ) if compact_result.strategy != CompactionStrategy.NONE: self.context.replace_internal_messages( [m for m in compact_result.messages if m.get("role") != "system"] # 去掉 system(由 context 管理) ) logger.debug( "压缩完成: strategy=%s saved=%d tokens", compact_result.strategy.value, compact_result.tokens_saved, ) # 裁剪过长历史 messages = self.memory.trim_messages(self.context.messages) # 预算检查:LLM 调用次数(在调用 LLM 之前检查,避免浪费额度) budget = self.config.budget if self._llm_invocations >= budget.max_llm_invocations: err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)" logger.warning(err) steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err)) await self.memory.save_context(user_input, err, self.context.messages) result = AgentResult(success=False, content=err, truncated=True, iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, steps=steps, error=err) self._fire_execution_log(user_input, result, _run_start) self._attach_token_usage(result) return result # 调用外部 LLM 预算回调(WorkflowEngine 注入,将 Agent 的 LLM 计入工作流预算) if self.on_llm_invocation: try: self.on_llm_invocation() except Exception as e: err = f"LLM 调用超出工作流预算: {e}" logger.warning(err) steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err)) await self.memory.save_context(user_input, err, self.context.messages) result = AgentResult(success=False, content=err, truncated=True, iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, steps=steps, error=str(e)) self._fire_execution_log(user_input, result, _run_start) self._attach_token_usage(result) return result # 调用 LLM try: response = await llm.chat( messages=messages, tools=tool_schemas if has_tools and self.context.iteration == 1 else (tool_schemas if has_tools else None), iteration=self.context.iteration, on_completion=_llm_callback, ) except Exception as e: err_str = str(e) logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str) if self.context.iteration < max_iter and self._is_retryable(err_str): steps.append(AgentStep( iteration=self.context.iteration, type="tool_result", content=f"LLM 调用失败(可重试): {err_str}", )) self._llm_invocations += 1 # 重试也计入 LLM 调用预算 continue result = AgentResult( success=False, content=f"LLM 调用失败: {err_str}", iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, error=err_str, ) self._fire_execution_log(user_input, result, _run_start) self._attach_token_usage(result) return result # 记录 LLM 调用次数(内部计数) self._llm_invocations += 1 # 解析工具调用 tool_calls = self._extract_tool_calls(response) content = self._extract_content(response) reasoning = getattr(response, "reasoning_content", None) or ( response.get("reasoning_content") if isinstance(response, dict) else None ) if not tool_calls: # LLM 直接返回文本 → 结束 self.context.add_assistant_message(content) final_text = content or "(模型未返回有效内容)" review_score = 0.0 # 输出质量自检(默认关闭,Agent 节点可开启) if self.config.self_review_enabled and not _self_review_attempted: review = await self._self_review(final_text, task_context=user_input) steps.append(AgentStep( iteration=self.context.iteration, type="tool_result", content=f"self_review: score={review['score']:.2f} passed={review['passed']}", tool_name="self_review", tool_input={"content": final_text[:200]}, tool_result=json.dumps(review, ensure_ascii=False), )) if review["passed"]: review_score = review["score"] logger.info("self_review 通过 (%.2f >= %.2f)", review["score"], review["threshold"]) else: logger.info("self_review 未通过 (%.2f < %.2f),追加修正", review["score"], review["threshold"]) _self_review_attempted = True # 追加修正提示 fix_prompt = ( f"你的上一个回答未通过质量检查(评分 {review['score']:.1f}/{review['threshold']})。\n" f"问题:{';'.join(review['issues'][:3])}\n" f"改进建议:{';'.join(review['suggestions'][:3])}\n" "请修正你的回答,确保满足上述建议。" ) self.context.add_user_message(fix_prompt) continue # 回到 ReAct 循环,让 LLM 修正 steps.append(AgentStep( iteration=self.context.iteration, type="final", content=final_text, reasoning=reasoning, )) # 保存记忆 await self.memory.save_context(user_input, final_text, self.context.messages) # 保存学习模式 if self.config.memory.learning_enabled: await self._save_learning_pattern( user_input, steps, success=True, iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, ) # 提取知识到全局知识池(Agent 间知识共享) await self._extract_global_knowledge(user_input, final_text, steps, review_score) result = AgentResult( success=True, content=final_text, iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, steps=steps, ) self._fire_execution_log(user_input, result, _run_start) self._attach_token_usage(result) return result # 有工具调用 → 先记录 assistant 消息(含 tool_calls) self.context.add_assistant_message(content or "", tool_calls, reasoning) # 记录思考步骤(含工具调用意图) tc_names = [tc["function"]["name"] for tc in tool_calls] tc_args_list = [] for tc in tool_calls: try: tc_args_list.append(json.loads(tc["function"].get("arguments", "{}"))) except (json.JSONDecodeError, TypeError): raw_args = tc["function"].get("arguments", "") logger.warning("工具参数 JSON 解析失败,使用空对象: %.200s", str(raw_args)) tc_args_list.append({}) steps.append(AgentStep( iteration=self.context.iteration, type="think", content=content or f"调用工具: {', '.join(tc_names)}", reasoning=reasoning, tool_name=tc_names[0] if len(tc_names) == 1 else None, tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None, )) if self.execution_logger: self.execution_logger.info( f"Agent 调用 {len(tool_calls)} 个工具", data={"tool_calls": tc_names, "iteration": self.context.iteration}, ) # 逐一执行工具 for tc in tool_calls: tfn = tc.get("function", {}) tname = tfn.get("name", "unknown") tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}") try: targs = json.loads(tfn.get("arguments", "{}")) except (json.JSONDecodeError, TypeError): targs = {} # Hook: PreToolUse — 可拦截/修改工具调用 hook_ctx = HookContext( event=HookEvent.PRE_TOOL_USE, tool_name=tname, tool_input=targs, session_id=self.context.session_id, agent_name=self.config.name, user_id=self.config.user_id, ) hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx) if not hook_res.allowed: result = json.dumps({"error": hook_res.reason}, ensure_ascii=False) self.context.add_tool_result(tcid, tname, result) continue if hook_res.modified_input: targs = hook_res.modified_input # 审批检查需要原始参数,所以审批在前;但如果 hook 改了参数,需要重新构建 if hook_res.modified_input and tname in self.config.tools.require_approval: tfn["arguments"] = json.dumps(targs, ensure_ascii=False) # 工具执行前审批检查 if tname in self.config.tools.require_approval: from app.services.approval_manager import approval_manager as _am logger.info("Agent 工具需审批 [%s]: %s", tname, targs) approval_req = await _am.submit( tool_name=tname, args=targs, timeout_ms=self.config.tools.approval_timeout_ms, ) decision = approval_req.decision if decision == "denied": result = f"[审批拒绝] 工具 {tname} 需要人工审批但被拒绝。" self.context.add_tool_result(tcid, tname, result) continue elif decision == "skip": result = f"[审批跳过] 工具 {tname} 被跳过。" self.context.add_tool_result(tcid, tname, result) continue # decision == "approved" → 继续执行 logger.info("Agent 执行工具 [%s]: %s", tname, targs) try: result = await self.tool_manager.execute(tname, targs) except Exception as tool_err: logger.error("工具 '%s' 执行异常: %s", tname, tool_err, exc_info=True) result = json.dumps({ "error": f"工具 '{tname}' 执行异常: {tool_err}" }, ensure_ascii=False) steps.append(AgentStep( iteration=self.context.iteration, type="tool_result", content=f"工具 {tname} 返回结果", tool_name=tname, tool_input=targs, tool_result=result[:500] + "..." if len(result) > 500 else result, )) self.context.add_tool_result(tcid, tname, result) self.context.tool_calls_made += 1 # Hook: PostToolUse — 工具执行后处理 post_ctx = HookContext( event=HookEvent.POST_TOOL_USE, tool_name=tname, tool_input=targs, tool_output=result, session_id=self.context.session_id, agent_name=self.config.name, user_id=self.config.user_id, ) await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx) # 崩溃恢复快照 (P4) self._fire_recovery_snapshot() # 预算检查:工具调用次数 if self.context.tool_calls_made > budget.max_tool_calls: err = f"已超过工具调用预算({budget.max_tool_calls} 次)" logger.warning(err) steps.append(AgentStep(iteration=self.context.iteration, type="tool_result", content=err, tool_name=tname)) result = AgentResult(success=False, content=err, truncated=True, iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, steps=steps, error=err) self._fire_execution_log(user_input, result, _run_start) self._attach_token_usage(result) return result if self.on_tool_executed: try: await self.on_tool_executed(tname) except WorkflowExecutionError: raise except Exception: pass if self.execution_logger: preview = result[:300] + "..." if len(result) > 300 else result self.execution_logger.info( f"工具 {tname} 执行完成", data={"tool_name": tname, "result_preview": preview}, ) # 达到最大迭代次数 last_content = "" for m in reversed(self.context.messages): if m.get("role") == "assistant" and m.get("content"): last_content = m["content"] break logger.warning("Agent 达到最大迭代次数 (%s)", max_iter) await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages) # 保存学习模式(即使截断,标记为未成功以便后续分析) if self.config.memory.learning_enabled: await self._save_learning_pattern( user_input, steps, success=False, iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, ) # 提取知识到全局知识池(即便截断,工具调用序列仍有参考价值) if last_content: await self._extract_global_knowledge(user_input, last_content, steps) if last_content: steps.append(AgentStep( iteration=self.context.iteration, type="final", content=last_content, )) truncation_msg = f"已达最大迭代次数 ({max_iter}),任务被截断" result = AgentResult( success=False, content=last_content or truncation_msg, truncated=True, iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, steps=steps, error=truncation_msg, ) self._fire_execution_log(user_input, result, _run_start) self._attach_token_usage(result) return result async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]: """ 流式执行 Agent 单轮对话(支持 streamlined 模式)。 与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件。 当 streamlined=True 时,工具调用会被折叠为累计摘要。 """ if self._streamlined_transformer: self._streamlined_transformer.reset() async for event in self._run_stream_impl(user_input): transformed = self._streamlined_transformer.transform(event) if transformed is not None: yield transformed flushed = self._streamlined_transformer.flush() if flushed: yield flushed else: async for event in self._run_stream_impl(user_input): yield event async def _run_stream_impl(self, user_input: str) -> AsyncGenerator[dict, None]: """ 流式执行 Agent 单轮对话(内部实现)。 与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件: - think: LLM 思考中,准备调用工具 - tool_call: 即将执行工具 - tool_result: 工具执行完毕 - final: 最终回答 - error: 出错/预算超限 """ max_iter = max(1, self.config.llm.max_iterations) self.context.iteration = 0 self.context.tool_calls_made = 0 # 1. 系统提示词分层装配 if self._prompt_sections_enabled: system_prompt = await self._compose_system_prompt(user_input) self.context.set_system_prompt(system_prompt) if not self._memory_context_loaded: self._memory_context_loaded = True logger.info("分层装配已完成(静态段 + 动态段)") elif not self._memory_context_loaded: await self._inject_memory_context(user_input) self._memory_context_loaded = True await self._inject_knowledge_context(user_input) # 2. 追加用户消息 self.context.add_user_message(user_input) # 2.5 计划模式 (P2) — 流式生成执行计划 plan: Optional[Plan] = None if self.plan_mode and self.config.llm.plan_mode_enabled: yield {"type": "plan_generating", "content": "正在生成执行计划…", "iteration": 0} try: plan = await self.plan_mode.generate_plan( user_input=user_input, available_tools=self.tool_manager.tool_names(), messages_history=self.context.messages, ) logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps)) yield { "type": "plan", "content": plan.to_markdown(), "plan_data": plan.to_dict(), "iteration": 0, "session_id": self.context.session_id, } if self.config.llm.plan_approval_required: # 等待外部审批(通过 on_approval_required 回调) approved = await self.plan_mode.present_plan(plan) if not approved: logger.info("计划模式: 计划被拒绝") yield { "type": "plan_rejected", "content": "计划已被拒绝", "plan_data": plan.to_dict(), "iteration": 0, "session_id": self.context.session_id, } return yield { "type": "plan_approved", "content": "计划已批准,开始执行", "iteration": 0, "session_id": self.context.session_id, } except Exception as e: logger.warning("计划生成失败,回退到直接执行: %s", e) yield {"type": "plan_failed", "content": f"计划生成失败: {e}", "iteration": 0} plan = None # 3. ReAct 循环 llm = _LLMClient(self.config.llm) tool_schemas = self.tool_manager.get_tool_schemas() has_tools = self.tool_manager.has_tools() steps: List[AgentStep] = [] _self_review_attempted = False llm_callback_ctx = {"step_type": "think", "tool_name": None} def _llm_callback(metrics: Dict[str, Any]): # Token 预算追踪 (P2) if self._token_budget: prompt_tok = metrics.get("prompt_tokens", 0) comp_tok = metrics.get("completion_tokens", 0) if prompt_tok <= 0: prompt_tok = self._token_budget.input_tokens # fallback estimate self._token_budget.record_llm_call( prompt_tokens=prompt_tok, completion_tokens=comp_tok, iteration=self.context.iteration, step_type=llm_callback_ctx["step_type"], ) if self.on_llm_call: metrics.update({ "session_id": self.context.session_id, "user_id": self.config.user_id, "step_type": llm_callback_ctx["step_type"], "tool_name": llm_callback_ctx["tool_name"], }) self.on_llm_call(metrics) while self.context.iteration < max_iter: self.context.iteration += 1 # Token 预算检查:每次迭代前更新输入 token 估计 if self._token_budget: self._token_budget.update_from_counter(self.context.messages) self._token_budget.reset_compaction_attempts() # 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩 if self.compaction_engine and self.context.iteration > 1: if self._token_budget and self._token_budget.needs_compaction: self._token_budget.record_compaction_attempt() logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line) compact_result = await self.compaction_engine.maybe_compact( self.context.messages, self.config.llm.context_window, ) if compact_result.strategy != CompactionStrategy.NONE: self.context.replace_internal_messages( [m for m in compact_result.messages if m.get("role") != "system"] ) logger.debug( "压缩完成: strategy=%s saved=%d tokens", compact_result.strategy.value, compact_result.tokens_saved, ) messages = self.memory.trim_messages(self.context.messages) # 预算检查:LLM 调用次数(在调用 LLM 之前检查,避免浪费额度) budget = self.config.budget if self._llm_invocations >= budget.max_llm_invocations: err = f"已超过 LLM 调用预算({budget.max_llm_invocations} 次)" logger.warning(err) yield {"type": "error", "content": err, "iteration": self.context.iteration, "truncated": True} await self.memory.save_context(user_input, err, self.context.messages) return # 调用外部 LLM 预算回调(WorkflowEngine 注入) if self.on_llm_invocation: try: self.on_llm_invocation() except Exception as e: err = f"LLM 调用超出工作流预算: {e}" logger.warning(err) yield {"type": "error", "content": err, "iteration": self.context.iteration, "truncated": True} return # think 事件:告知前端 Agent 正在思考(让 UI 即时反馈,避免假死感) yield {"type": "think", "content": "", "reasoning": None, "iteration": self.context.iteration} # 调用 LLM try: response = await llm.chat( messages=messages, tools=tool_schemas if has_tools and self.context.iteration == 1 else (tool_schemas if has_tools else None), iteration=self.context.iteration, on_completion=_llm_callback, ) except Exception as e: err_str = str(e) logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str) if self.context.iteration < max_iter and self._is_retryable(err_str): yield {"type": "error", "content": f"LLM 调用失败(可重试): {err_str}", "iteration": self.context.iteration} continue yield {"type": "error", "content": f"LLM 调用失败: {err_str}", "iteration": self.context.iteration} return # 记录 LLM 调用次数(内部计数) self._llm_invocations += 1 # 解析工具调用 tool_calls = self._extract_tool_calls(response) content = self._extract_content(response) reasoning = getattr(response, "reasoning_content", None) or ( response.get("reasoning_content") if isinstance(response, dict) else None ) if not tool_calls: # LLM 直接返回文本 → 结束 self.context.add_assistant_message(content) final_text = content or "(模型未返回有效内容)" review_score = 0.0 # 输出质量自检(默认关闭) if self.config.self_review_enabled and not _self_review_attempted: review = await self._self_review(final_text, task_context=user_input) yield { "type": "tool_result", "content": f"self_review: score={review['score']:.2f} passed={review['passed']}", "tool_name": "self_review", "iteration": self.context.iteration, "session_id": self.context.session_id, } if review["passed"]: review_score = review["score"] logger.info("self_review 通过 (%.2f >= %.2f)", review["score"], review["threshold"]) else: logger.info("self_review 未通过 (%.2f < %.2f),追加修正", review["score"], review["threshold"]) _self_review_attempted = True yield { "type": "think", "content": f"自检未通过({review['score']:.1f}),正在修正:{';'.join(review['suggestions'][:2])}", "iteration": self.context.iteration, "session_id": self.context.session_id, } fix_prompt = ( f"你的上一个回答未通过质量检查(评分 {review['score']:.1f}/{review['threshold']})。\n" f"问题:{';'.join(review['issues'][:3])}\n" f"改进建议:{';'.join(review['suggestions'][:3])}\n" "请修正你的回答,确保满足上述建议。" ) self.context.add_user_message(fix_prompt) continue # 回到 ReAct 循环,让 LLM 修正 token_usage_final = self._token_budget.summary() if self._token_budget else None yield { "type": "final", "content": final_text, "reasoning": reasoning, "iteration": self.context.iteration, "iterations_used": self.context.iteration, "tool_calls_made": self.context.tool_calls_made, "session_id": self.context.session_id, "token_usage": token_usage_final, } await self.memory.save_context(user_input, final_text, self.context.messages) # 保存学习模式 if self.config.memory.learning_enabled: await self._save_learning_pattern( user_input, steps, success=True, iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, ) # 提取知识到全局知识池(Agent 间知识共享) await self._extract_global_knowledge(user_input, final_text, steps, review_score) return # 有工具调用 → 先记录 assistant 消息 self.context.add_assistant_message(content or "", tool_calls, reasoning) # yield think 事件 tc_names = [tc["function"]["name"] for tc in tool_calls] tc_args_list = [] for tc in tool_calls: try: tc_args_list.append(json.loads(tc["function"].get("arguments", "{}"))) except (json.JSONDecodeError, TypeError): raw_args = tc["function"].get("arguments", "") logger.warning("工具参数 JSON 解析失败,使用空对象: %.200s", str(raw_args)) tc_args_list.append({}) yield { "type": "think", "content": content or f"调用工具: {', '.join(tc_names)}", "reasoning": reasoning, "tool_names": tc_names, "iteration": self.context.iteration, } steps.append(AgentStep( iteration=self.context.iteration, type="think", content=content or f"调用工具: {', '.join(tc_names)}", reasoning=reasoning, tool_name=tc_names[0] if len(tc_names) == 1 else None, tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None, )) if self.execution_logger: self.execution_logger.info( f"Agent 调用 {len(tool_calls)} 个工具", data={"tool_calls": tc_names, "iteration": self.context.iteration}, ) # 逐一执行工具 for tc in tool_calls: tfn = tc.get("function", {}) tname = tfn.get("name", "unknown") tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}") try: targs = json.loads(tfn.get("arguments", "{}")) except (json.JSONDecodeError, TypeError): targs = {} # Hook: PreToolUse — 可拦截/修改工具调用 (流式) hook_ctx = HookContext( event=HookEvent.PRE_TOOL_USE, tool_name=tname, tool_input=targs, session_id=self.context.session_id, agent_name=self.config.name, user_id=self.config.user_id, ) hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx) if not hook_res.allowed: result = json.dumps({"error": hook_res.reason}, ensure_ascii=False) yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration} self.context.add_tool_result(tcid, tname, result) continue if hook_res.modified_input: targs = hook_res.modified_input # yield tool_call 事件 yield { "type": "tool_call", "name": tname, "input": targs, "iteration": self.context.iteration, } # 工具执行前审批检查(流式:先 create → yield 事件带 ID → 等待决定) if tname in self.config.tools.require_approval: from app.services.approval_manager import approval_manager as _am logger.info("Agent 工具需审批 [%s]: %s", tname, targs) approval_req = _am.create(tool_name=tname, args=targs) yield { "type": "approval_required", "approval_id": approval_req.approval_id, "tool_name": tname, "args": targs, "iteration": self.context.iteration, } decision = await _am.wait_for_decision( approval_req.approval_id, timeout_ms=self.config.tools.approval_timeout_ms, ) if decision == "denied": result = f"[审批拒绝] 工具 {tname} 需要人工审批但被拒绝。" yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration} self.context.add_tool_result(tcid, tname, result) continue elif decision == "skip": result = f"[审批跳过] 工具 {tname} 被跳过。" yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration} self.context.add_tool_result(tcid, tname, result) continue # decision == "approved" → 继续执行 logger.info("Agent 执行工具 [%s]: %s", tname, targs) try: result = await self.tool_manager.execute(tname, targs) except Exception as tool_err: logger.error("工具 '%s' 执行异常: %s", tname, tool_err, exc_info=True) result = json.dumps({ "error": f"工具 '{tname}' 执行异常: {tool_err}" }, ensure_ascii=False) # yield tool_result 事件 yield { "type": "tool_result", "name": tname, "result": result[:500] + "..." if len(result) > 500 else result, "iteration": self.context.iteration, } steps.append(AgentStep( iteration=self.context.iteration, type="tool_result", content=f"工具 {tname} 返回结果", tool_name=tname, tool_input=targs, tool_result=result[:500] + "..." if len(result) > 500 else result, )) self.context.add_tool_result(tcid, tname, result) self.context.tool_calls_made += 1 # Hook: PostToolUse — 工具执行后处理 (流式) post_ctx = HookContext( event=HookEvent.POST_TOOL_USE, tool_name=tname, tool_input=targs, tool_output=result, session_id=self.context.session_id, agent_name=self.config.name, user_id=self.config.user_id, ) await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx) # 崩溃恢复快照 (P4) self._fire_recovery_snapshot() # 预算检查:工具调用次数 if self.context.tool_calls_made > budget.max_tool_calls: err = f"已超过工具调用预算({budget.max_tool_calls} 次)" logger.warning(err) yield {"type": "error", "content": err, "iteration": self.context.iteration, "truncated": True} return if self.on_tool_executed: try: await self.on_tool_executed(tname) except WorkflowExecutionError: raise except Exception: pass if self.execution_logger: preview = result[:300] + "..." if len(result) > 300 else result self.execution_logger.info( f"工具 {tname} 执行完成", data={"tool_name": tname, "result_preview": preview}, ) # Hook: Stop — 对话完成 stop_ctx = HookContext( event=HookEvent.STOP, session_id=self.context.session_id, agent_name=self.config.name, user_id=self.config.user_id, ) await self.hook_manager.trigger(HookEvent.STOP, stop_ctx) # 达到最大迭代次数 last_content = "" for m in reversed(self.context.messages): if m.get("role") == "assistant" and m.get("content"): last_content = m["content"] break logger.warning("Agent 达到最大迭代次数 (%s)", max_iter) await self.memory.save_context(user_input, last_content or "(已达最大迭代次数)", self.context.messages) # 保存学习模式(即便截断,工具调用模式仍有参考价值) if self.config.memory.learning_enabled: await self._save_learning_pattern( user_input, steps, success=True, iterations_used=self.context.iteration, tool_calls_made=self.context.tool_calls_made, ) # 提取知识到全局知识池(即便截断,工具调用序列仍有参考价值) if last_content: await self._extract_global_knowledge(user_input, last_content, steps) token_usage_truncated = self._token_budget.summary() if self._token_budget else None yield { "type": "final", "content": last_content or "已达最大迭代次数,但模型未返回最终回答。", "iteration": self.context.iteration, "iterations_used": self.context.iteration, "tool_calls_made": self.context.tool_calls_made, "truncated": True, "session_id": self.context.session_id, "token_usage": token_usage_truncated, } async def _compose_system_prompt(self, query: str = "") -> str: """使用分层装配构建完整系统提示词。 将静态段 + 动态段并行解析后拼接,替代原先的字符串拼接方式。 返回最终的 system_prompt 字符串。 """ if not self._prompt_composer: # 降级:使用原有字符串拼接方式 enriched = self.config.system_prompt.rstrip("\n") mem_text = await self.memory.initialize(query=query) if mem_text: enriched += "\n\n" + mem_text if self.config.memory.learning_enabled: pattern_hint = await self._inject_learning_patterns(query) if pattern_hint: enriched += "\n\n" + pattern_hint if self._memdir and self._memdir_manifest: memdir_text = await self._inject_memdir_context(query) if memdir_text: enriched += "\n\n" + memdir_text try: enriched = knowledge_retriever.inject_knowledge(enriched, query) except Exception: pass return enriched # 分层装配路径 ps_config = self.config.prompt_sections d_switches = ps_config.dynamic_sections # 清除上一次运行的动态段 # (静态段保留缓存,动态段每次重算) self._prompt_composer._dynamic_sections.clear() # 动态段:环境信息 if d_switches.get("environment", True): self._prompt_composer.add_dynamic(PromptSection( "environment", lambda uid=self.config.user_id: section_environment(uid), cache_break=True, )) # 动态段:语言偏好 if d_switches.get("language", True): lang = ps_config.language if lang: self._prompt_composer.add_dynamic(PromptSection( "language", lambda l=lang: section_language(l), cache_break=False, )) # 动态段:长期记忆上下文 if d_switches.get("memory_context", True): mem_text = await self.memory.initialize(query=query) if mem_text: self._prompt_composer.add_dynamic(PromptSection( "memory_context", lambda t=mem_text: f"# Long-term Memory\n\n{t}", cache_break=True, )) # 动态段:学习模式提示 if self.config.memory.learning_enabled: pattern_hint = await self._inject_learning_patterns(query) if pattern_hint: self._prompt_composer.add_dynamic(PromptSection( "learning_patterns", lambda p=pattern_hint: p, cache_break=True, )) # 动态段:文件式记忆 if self._memdir and self._memdir_manifest: memdir_text = await self._inject_memdir_context(query) if memdir_text: self._prompt_composer.add_dynamic(PromptSection( "memdir", lambda t=memdir_text: t, cache_break=True, )) # 动态段:知识库检索 if d_switches.get("memory_context", True): try: base_enriched = knowledge_retriever.inject_knowledge( self.config.system_prompt, query ) if base_enriched != self.config.system_prompt: # 提取增量部分 knowledge_delta = base_enriched[len(self.config.system_prompt):].strip() if knowledge_delta: self._prompt_composer.add_dynamic(PromptSection( "knowledge_base", lambda kd=knowledge_delta: f"# Relevant Knowledge\n\n{kd}", cache_break=True, )) except Exception: pass # 工具列表段(默认关闭,太长) if d_switches.get("tool_list", False): tool_names = self.tool_manager.tool_names() if tool_names: tool_list_text = "\n".join(f"- {n}" for n in sorted(tool_names)) self._prompt_composer.add_dynamic(PromptSection( "tool_list", lambda t=tool_list_text: f"# Available Tools\n\n{t}", cache_break=False, )) # 解析 + 装配 return await self._prompt_composer.assemble_full() async def _inject_memory_context(self, query: str = "") -> None: """加载长期记忆并注入 system prompt。""" mem_text = await self.memory.initialize(query=query) enriched = self.config.system_prompt.rstrip("\n") if mem_text: enriched += "\n\n" + mem_text # 注入学习模式提示(历史工具使用建议) if self.config.memory.learning_enabled: pattern_hint = await self._inject_learning_patterns(query) if pattern_hint: enriched += "\n\n" + pattern_hint # 注入文件式记忆 (MEMORY.md) if self._memdir and self._memdir_manifest: memdir_text = await self._inject_memdir_context(query) if memdir_text: enriched += "\n\n" + memdir_text self.context.set_system_prompt(enriched) logger.info("Agent 已注入长期记忆上下文") async def _inject_memdir_context(self, query: str) -> str: """加载文件式记忆并构建注入文本。""" if not self._memdir or not self._memdir_manifest: return "" parts: List[str] = [] # 记忆操作指导(首次注入) memdir_prompt = self._memdir.build_system_prompt() parts.append(memdir_prompt) # AI 驱动的相关性选择 if self._memdir_manifest.entries: try: selected = await memory_selector.select( query=query, manifest=self._memdir_manifest, recent_tools=self.tool_manager.tool_names(), ) if selected: # 读取选中的记忆文件 parts.append("\n## 相关记忆\n") for fn in selected: entry = next( (e for e in self._memdir_manifest.entries if e.filename == fn), None ) if entry: # 加载完整内容 try: with open(entry.filepath, "r", encoding="utf-8") as _f: _, content = parse_frontmatter(_f.read()) except Exception: content = entry.content if not content: content = entry.content staleness = entry.staleness_note parts.append( f"\n" f"### [{entry.mem_type.value}] {entry.name}\n" f"{content[:2000]}" ) if staleness: parts.append(f"\n{staleness}") parts.append("") except Exception as e: logger.warning("AI 记忆选择失败: %s", e) return "\n".join(parts) async def _inject_learning_patterns(self, query: str) -> str: """查询学习模式,返回格式化的提示文本。""" from app.core.database import SessionLocal db = None try: db = SessionLocal() patterns = load_relevant_patterns( db, self._learning_scope_kind, self.memory.scope_id, query ) return format_pattern_hint(patterns, query) except Exception as e: logger.warning("加载学习模式失败: %s", e) return "" async def _inject_knowledge_context(self, query: str) -> None: """从知识进化库检索相关经验并注入 system prompt。""" try: enriched = knowledge_retriever.inject_knowledge( self.context.system_prompt, query ) if enriched != self.context.system_prompt: self.context.set_system_prompt(enriched) logger.info("Agent 已注入相关知识库经验") except Exception as e: logger.debug("知识检索注入跳过: %s", e) async def _save_learning_pattern( self, query: str, steps: List[AgentStep], success: bool, iterations_used: int, tool_calls_made: int, ) -> None: """从执行结果中提取模式并保存。""" from app.core.database import SessionLocal db = None try: db = SessionLocal() pattern_data = extract_pattern_from_result( query=query, steps=steps, success=success, iterations_used=iterations_used, tool_calls_made=tool_calls_made, ) save_learning_pattern( db, self._learning_scope_kind, self.memory.scope_id, pattern_data, ) except Exception as e: logger.warning("保存学习模式失败: %s", e) finally: if db: db.close() async def _extract_global_knowledge( self, user_input: str, final_answer: str, steps: List[AgentStep], self_review_score: float = 0.0, ) -> None: """从 Agent 执行结果中提取知识,写入全局知识池(Agent 间共享)。""" # 提取工具调用名称作为 tags tool_names = list(dict.fromkeys( s.tool_name for s in (steps or []) if s.tool_name and s.type == "tool_result" )) tags = tool_names[:5] if tool_names else ["对话"] # 提取关键信息:用户问题摘要 + 回答要点(前 500 字) content = ( f"问题: {user_input[:300]}\n" f"回答要点: {final_answer[:500]}" ) if tool_names: content += f"\n使用工具: {', '.join(tool_names[:5])}" source_agent_id = self.config.name if self.config.name != "default_agent" else "" source_user_id = self.config.user_id or "" # 置信度评估:基于 self_review 评分和工具执行成功数 confidence = "medium" if self_review_score >= 0.8: confidence = "high" elif self_review_score > 0 and self_review_score < 0.5: confidence = "low" elif tool_names and len(tool_names) >= 2: confidence = "high" # 多工具协作通常质量更高 # TTL: 高置信度知识有效期更长 ttl_hours = 720 if confidence == "high" else 168 if confidence == "medium" else 24 await self.memory.save_global_knowledge( content=content, source_agent_id=source_agent_id, source_user_id=source_user_id, tags=tags, confidence=confidence, ttl_hours=ttl_hours, ) async def _self_review(self, content: str, task_context: str = "") -> dict: """输出质量自检:用轻量 LLM 评判输出,返回 {score, passed, issues, suggestions}。""" criteria = ( "回答必须准确、完整、切题。" "包含具体可执行的步骤或代码示例。" "无明显事实错误或遗漏。" "格式清晰,便于阅读。" ) try: from app.agent_runtime.core import _LLMClient from app.agent_runtime.schemas import AgentLLMConfig review_config = AgentLLMConfig( provider=getattr(self.config.llm, 'provider', 'deepseek'), model="deepseek-v4-flash", temperature=0.1, max_tokens=800, request_timeout=30.0, ) if self.config.llm.api_key: review_config.api_key = self.config.llm.api_key if self.config.llm.base_url: review_config.base_url = self.config.llm.base_url client = _LLMClient(review_config) judge_prompt = ( "你是严格的内容质量评审专家。请根据以下标准对内容进行评分。\n\n" f"【评判标准】\n{criteria}\n\n" f"【待评审内容】\n{content[:8000]}\n" ) if task_context: judge_prompt += f"\n【任务背景】\n{task_context[:2000]}\n" judge_prompt += ( "\n请以 JSON 格式返回评审结果(严格只返回 JSON,不要任何其他文字):\n" '{"score": 0.75, "passed": true, "issues": ["问题1"], ' '"suggestions": ["建议1"], "summary": "一句话总结"}\n\n' "评分规则:1.0完美 0.8良好 0.6基本满足 0.4大部分未满足 0.2完全不满足\n" "score >= 0.6 时 passed=true,否则 passed=false\n" ) messages = [{"role": "user", "content": judge_prompt}] resp = await client.chat(messages=messages, tools=None, iteration=0) judge_text = getattr(resp, 'content', '') or ( resp.get('content', '') if isinstance(resp, dict) else str(resp) ) # 解析 JSON try: judge_clean = judge_text.strip() if judge_clean.startswith("```"): lines = judge_clean.split("\n") judge_clean = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]) result = json.loads(judge_clean) except json.JSONDecodeError: import re as _sr_re m = _sr_re.search(r'\{[^{}]*"score"\s*:\s*[\d.]+[^{}]*\}', judge_text, _sr_re.DOTALL) if m: try: result = json.loads(m.group()) except json.JSONDecodeError: result = {"score": 0.5, "passed": False, "issues": ["无法解析评审结果"], "suggestions": [], "summary": ""} else: result = {"score": 0.5, "passed": False, "issues": ["无法解析评审结果"], "suggestions": [], "summary": ""} score = float(result.get("score", 0.5)) threshold = self.config.llm.self_review_threshold passed = score >= threshold return { "score": score, "passed": passed, "threshold": threshold, "issues": result.get("issues", []), "suggestions": result.get("suggestions", []), "summary": result.get("summary", ""), } except Exception as e: logger.warning("self_review 执行失败: %s", e) return {"score": 0.0, "passed": False, "issues": [f"self_review 执行异常: {e}"], "suggestions": ["请检查 self_review 配置或 LLM 可用性"], "error": str(e)} @staticmethod def _extract_tool_calls(response: Any) -> List[Dict[str, Any]]: """从 LLM 响应中提取工具调用列表。""" if response is None: return [] # OpenAI SDK 格式 if hasattr(response, "tool_calls") and response.tool_calls: result = [] for tc in response.tool_calls: result.append({ "id": tc.id, "type": tc.type, "function": { "name": tc.function.name, "arguments": tc.function.arguments, }, }) return result # 字典格式 if isinstance(response, dict): tc_list = response.get("tool_calls") or [] if tc_list: return tc_list # 检查 content 中是否嵌入了 DSML content = response.get("content") or "" if "invoke" in content or "function_call" in content: from app.services.llm_service import _parse_dsml_tool_invocations dsml = _parse_dsml_tool_invocations(content) if dsml: return [ { "id": f"dsml-{i}", "type": "function", "function": { "name": inv["name"], "arguments": json.dumps(inv["arguments"], ensure_ascii=False), }, } for i, inv in enumerate(dsml) ] return [] @staticmethod def _extract_content(response: Any) -> str: """从 LLM 响应中提取文本内容。""" if response is None: return "" if hasattr(response, "content"): return response.content or "" if isinstance(response, dict): return response.get("content") or "" return str(response) @staticmethod def _is_retryable(err_str: str) -> bool: """判断错误是否可重试(使用 ErrorClassifier)。""" try: error_type, _ = _error_classifier.classify(Exception(err_str)) return error_type == ErrorType.RETRYABLE except Exception: err_lower = err_str.lower() return any(kw in err_lower for kw in ( "timed out", "timeout", "connection error", "rate limit", "too many requests", "internal server error", "service unavailable", "temporarily unavailable", )) # LLM 缓存辅助 def _llm_cache_key(messages: list, model: str) -> str: import hashlib raw = json.dumps({"msgs": messages[-4:], "model": model}, sort_keys=True, ensure_ascii=False) return f"llm:{model}:{hashlib.sha256(raw.encode()).hexdigest()[:16]}" async def _llm_cache_get(key: str) -> Optional[str]: try: from app.core.redis_client import get_redis_client redis = get_redis_client() if redis: return await redis.get(key) except Exception: pass return None async def _llm_cache_set(key: str, value: str, ttl_ms: int): try: from app.core.redis_client import get_redis_client redis = get_redis_client() if redis: await redis.setex(key, max(1, int(ttl_ms / 1000)), value) except Exception: pass class _LLMClient: """轻量 LLM 客户端包装,复用已有 LLMService 能力。""" def __init__(self, config: Any): from app.services.llm_service import llm_service self._service = llm_service self._config = config async def chat( self, messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None, iteration: int = 1, on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None, ) -> Any: """调用 LLM,主模型失败时自动切换 fallback_llm 重试。""" from openai import AsyncOpenAI from app.core.config import settings # 优先从配置读取,其次从 settings(.env 加载),最后 os.environ api_key = self._config.api_key or settings.OPENAI_API_KEY or "" base_url = self._config.base_url or settings.OPENAI_BASE_URL or "" if not api_key or api_key == "your-openai-api-key": api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or "" base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com" if not api_key: raise ValueError("未配置 API Key") return await self._do_chat( api_key=api_key, base_url=base_url, model=self._config.model, messages=messages, tools=tools, iteration=iteration, on_completion=on_completion, ) async def _do_chat( self, api_key: str, base_url: str, model: str, messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None, iteration: int = 1, on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None, _is_fallback: bool = False, ) -> Any: from openai import AsyncOpenAI from app.core.config import settings client = AsyncOpenAI(api_key=api_key, base_url=base_url) kwargs: Dict[str, Any] = { "model": model, "messages": messages, "temperature": self._config.temperature, "timeout": self._config.request_timeout, } if self._config.max_tokens: kwargs["max_tokens"] = self._config.max_tokens if self._config.extra_body: kwargs["extra_body"] = self._config.extra_body if tools: # Normalize tool schemas to OpenAI format: custom tools from the # marketplace may be stored as {"name":..., "parameters":...} # or {"function":{...}} without the required "type": "function". normalized = [] for t in tools: if isinstance(t, dict): if t.get("type") == "function": # Already in correct format: {"type":"function","function":{...}} normalized.append(t) elif "function" in t: # Has function key but missing type: {"function":{...}} normalized.append({"type": "function", "function": t["function"]}) else: # Raw schema: {"name":..., "parameters":...} normalized.append({"type": "function", "function": t}) else: normalized.append(t) kwargs["tools"] = normalized kwargs["tool_choice"] = "auto" # LLM 响应缓存(仅不用工具时缓存,避免复杂序列化) if self._config.cache_enabled and not tools and not _is_fallback: cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", "")) cached = await _llm_cache_get(cache_key) if cached is not None: logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model")) class _CachedMsg: content = cached tool_calls = None return _CachedMsg() start_time = time.perf_counter() last_error = None try: response = await client.chat.completions.create(**kwargs) except Exception as e: last_error = e # Reactive Compact: 上下文超限时压缩后重试 (Tier 3) if ( self.compaction_engine and is_context_length_error(e) and self.compaction_engine.config.reactive_compact_enabled ): logger.warning("检测到上下文超限,触发 ReactiveCompact: %s", str(e)[:100]) try: compact_result = await self.compaction_engine.reactive_compact( messages, e, self._config.context_window, ) if compact_result.strategy != CompactionStrategy.NONE: logger.info( "ReactiveCompact 完成: saved=%d tokens, 重试中...", compact_result.tokens_saved, ) return await self._do_chat( api_key=api_key, base_url=base_url, model=model, messages=compact_result.messages, tools=tools, iteration=iteration, on_completion=on_completion, _is_fallback=_is_fallback, ) except Exception as ce: logger.error("ReactiveCompact 失败: %s", ce) # 降级回退:主模型失败时尝试 fallback_llm(优先每 Agent 配置,其次全局配置) fallback = self._config.fallback_llm if not fallback: # 全局降级配置兜底 fb_model = settings.FALLBACK_LLM_MODEL if fb_model: fallback = { "model": fb_model, "api_key": settings.FALLBACK_LLM_API_KEY or None, "base_url": settings.FALLBACK_LLM_BASE_URL or None, } if fallback and isinstance(fallback, dict) and not _is_fallback: fb_model = fallback.get("model") fb_api_key = fallback.get("api_key") fb_base_url = fallback.get("base_url") if fb_model and (fb_api_key or fb_base_url): logger.warning( "主模型 %s 调用失败,降级到 %s: %s", model, fb_model, str(e)[:200], ) # 先报告主模型失败 latency_ms = int((time.perf_counter() - start_time) * 1000) if on_completion: on_completion({ "model": model, "provider": self._config.provider, "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "latency_ms": latency_ms, "iteration_number": iteration, "status": "fallback", "error_message": str(e), }) return await self._do_chat( api_key=fb_api_key or api_key, base_url=fb_base_url or base_url, model=fb_model, messages=messages, tools=tools, iteration=iteration, on_completion=on_completion, _is_fallback=True, ) raise latency_ms = int((time.perf_counter() - start_time) * 1000) message = response.choices[0].message # 缓存写入(仅不用工具时) if self._config.cache_enabled and not tools and message.content: ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", "")) await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms) # 提取 token 用量 usage = getattr(response, "usage", None) prompt_tokens = usage.prompt_tokens if usage else 0 completion_tokens = usage.completion_tokens if usage else 0 total_tokens = usage.total_tokens if usage else 0 # 调用完成回调 if on_completion: on_completion({ "model": model, "provider": self._config.provider, "prompt_tokens": prompt_tokens or 0, "completion_tokens": completion_tokens or 0, "total_tokens": total_tokens or 0, "latency_ms": latency_ms, "iteration_number": iteration, "status": "success", }) return message