aiagent/backend/app/agent_runtime/core.py

"""
Agent Runtime 核心 —— 自主 ReAct 循环。

流程：
1. 接收用户输入 → 追加到消息列表
2. 调用 LLM（携带 tools schema）
3. 如果 LLM 返回工具调用 → 执行工具 → 结果追加到消息列表 → 回到 2
4. 如果 LLM 返回文本 → 作为最终回答返回
5. 超过 max_iterations → 强制终止
"""
from __future__ import annotations

import hashlib
import json
import logging
import time
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Protocol, TypedDict

from app.agent_runtime.schemas import (
    AgentConfig,
    AgentResult,
    AgentStep,
)
from app.agent_runtime.context import AgentContext
from app.agent_runtime.memory import AgentMemory
from app.agent_runtime.tool_manager import AgentToolManager
from app.core.exceptions import WorkflowExecutionError
from app.core.hooks import HookManager, HookEvent, HookContext, HookResult
from app.agent_runtime.plan_mode import PlanMode, Plan, PlanStatus
from app.core.error_recovery import ErrorClassifier, ErrorType, ConversationRecovery
from app.core.memdir import MemoryDir, MemoryType as MemType, MemoryManifest, parse_frontmatter
from app.core.memory_selector import memory_selector
from app.core.compaction import CompactionEngine, CompactionResult, CompactionStrategy
from app.core.compaction_config import CompactionConfig
from app.core.token_counter import is_context_length_error
from app.core.streamlined_output import (
    StreamlinedTransformer,
    create_streamlined_transformer,
    get_tool_summary_text,
    ToolCounts,
    categorize_tool,
)
from app.core.prompt_sections import (
    PromptComposer,
    PromptSection,
    create_prompt_composer,
    create_default_static_sections,
    create_default_dynamic_sections,
    section_environment,
    section_language,
)
from app.core.token_budget import (
    TokenBudget,
    TokenBudgetConfig,
    create_token_budget,
)
from app.services.agent_learning_service import (
    extract_pattern_from_result,
    format_pattern_hint,
    load_relevant_patterns,
    save_learning_pattern,
)
from app.services.execution_logger import execution_logger as _exec_logger
from app.services.knowledge_retriever import knowledge_retriever

logger = logging.getLogger(__name__)


class LLMCallMetrics(TypedDict, total=False):
    """一次 LLM 调用的度量数据"""
    agent_id: Optional[str]
    session_id: str
    user_id: Optional[str]
    model: str
    provider: Optional[str]
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
    latency_ms: int
    iteration_number: int
    step_type: str  # think / final
    tool_name: Optional[str]
    status: str  # success / error
    error_message: Optional[str]

# 全局错误分类器（可重试判定 + 退避策略）
_error_classifier = ErrorClassifier()


class AgentRuntime:
    """
    自主 Agent 运行时。

    用法：
        runtime = AgentRuntime(config)
        result = await runtime.run("帮我写个Python脚本")
    """

    def __init__(
        self,
        config: Optional[AgentConfig] = None,
        context: Optional[AgentContext] = None,
        memory: Optional[AgentMemory] = None,
        tool_manager: Optional[AgentToolManager] = None,
        execution_logger: Optional[Any] = None,
        on_tool_executed: Optional[Callable[[str], Any]] = None,
        on_llm_call: Optional[Callable[[Dict[str, Any]], Any]] = None,
        hook_manager: Optional[HookManager] = None,
        streamlined: bool = False,
    ):
        self.config = config or AgentConfig()
        self.context = context or AgentContext(
            system_prompt=self.config.system_prompt,
            user_id=self.config.user_id,
        )
        _mem_scope = self.config.memory_scope_id or self.config.user_id or self.config.name
        self.memory = memory or AgentMemory(
            scope_id=_mem_scope,
            max_history=self.config.memory.max_history_messages,
            persist=self.config.memory.persist_to_db,
            vector_memory_enabled=self.config.memory.vector_memory_enabled,
            vector_memory_top_k=self.config.memory.vector_memory_top_k,
            vector_memory_rerank=self.config.memory.vector_memory_rerank,
            memory_type_filter=self.config.memory.memory_type_filter,
            team_id=self.config.memory.team_id,
            team_share_enabled=self.config.memory.team_share_enabled,
            memory_dir_enabled=self.config.memory.memory_dir_enabled,
            memory_dir_path=self.config.memory.memory_dir_path,
        )
        self.tool_manager = tool_manager or AgentToolManager(
            include_tools=self.config.tools.include_tools,
            exclude_tools=self.config.tools.exclude_tools,
            cache_enabled=self.config.tools.cache_enabled,
            cache_tool_whitelist=self.config.tools.cache_tool_whitelist,
            cache_ttl_ms=self.config.tools.cache_ttl_ms,
            permission_level=self.config.tools.permission_level,
            auto_approve_rules=self.config.tools.auto_approve_rules,
            deny_tools=self.config.tools.deny_tools,
        )
        self.execution_logger = execution_logger
        self.on_tool_executed = on_tool_executed
        self.on_llm_call = on_llm_call
        self._memory_context_loaded = False
        self._llm_invocations = 0
        # 自主学习作用域：bare 聊天用 "bare"，Agent 用 "agent"
        self._learning_scope_kind = "bare" if "bare" in str(_mem_scope) else "agent"

        # Hook 管理器 (P1)
        self.hook_manager = hook_manager or HookManager()

        # 计划模式 (P2)
        self.plan_mode = PlanMode(self.config.llm) if self.config.llm.plan_mode_enabled else None

        # 对话自动压缩 (参考 Claude Code compact)
        self.compaction_engine: Optional[CompactionEngine] = None
        compaction_cfg = getattr(self.config.memory, 'compaction', None)
        if compaction_cfg is None:
            compaction_cfg = CompactionConfig()
        if compaction_cfg.enabled:
            self.compaction_engine = CompactionEngine(
                config=compaction_cfg,
                model=self.config.llm.model,
            )
            logger.info("对话压缩引擎已启用 (model=%s, window=%d)",
                        self.config.llm.model, self.config.llm.context_window)

        # 工具结果流式美化 (参考 Claude Code streamlinedTransform)
        self.streamlined = streamlined
        self._streamlined_transformer: Optional[StreamlinedTransformer] = None
        if streamlined:
            self._streamlined_transformer = create_streamlined_transformer(enabled=True)
            logger.info("工具结果流式美化已启用")

        # 系统提示词分层装配 (P2 — 参考 Claude Code systemPromptSections.ts)
        self._prompt_composer: Optional[PromptComposer] = None
        self._prompt_sections_enabled = self.config.prompt_sections.enabled
        if self._prompt_sections_enabled:
            ps_config = self.config.prompt_sections
            # 构建静态段（按开关过滤）
            static_sections = []
            s_switches = ps_config.static_sections
            if s_switches.get("persona", True):
                static_sections.append(PromptSection(
                    "persona",
                    lambda cfg=self.config: f"{cfg.system_prompt}\n\n"
                ))
            if s_switches.get("capabilities", True):
                from app.core.prompt_sections import section_capabilities
                static_sections.append(PromptSection("capabilities", section_capabilities))
            if s_switches.get("tool_instructions", True):
                from app.core.prompt_sections import section_tool_instructions
                static_sections.append(PromptSection("tool_instructions", section_tool_instructions))
            if s_switches.get("safety_rules", True):
                from app.core.prompt_sections import section_safety_rules
                static_sections.append(PromptSection("safety_rules", section_safety_rules))
            if s_switches.get("output_style", True):
                from app.core.prompt_sections import section_output_style
                static_sections.append(PromptSection("output_style", section_output_style))

            self._prompt_composer = PromptComposer()
            self._prompt_composer.add_static_sections(static_sections)
            logger.info("系统提示词分层装配已启用 (%d 静态段)", len(static_sections))

        # Token 预算管理 (P2 — 参考 Claude Code tokenBudget.ts)
        self._token_budget: Optional[TokenBudget] = None
        tb_config = self.config.token_budget
        if tb_config.enabled:
            self._token_budget = TokenBudget(
                config=TokenBudgetConfig(
                    enabled=True,
                    context_window=tb_config.context_window or self.config.llm.context_window,
                    output_reserve=tb_config.output_reserve,
                    warning_threshold_pct=tb_config.warning_threshold_pct,
                    compact_threshold_pct=tb_config.compact_threshold_pct,
                    hard_limit_pct=tb_config.hard_limit_pct,
                    user_budget=tb_config.user_budget,
                    auto_continue=tb_config.auto_continue,
                    compaction_after_warning=tb_config.compaction_after_warning,
                    max_compaction_attempts=tb_config.max_compaction_attempts,
                ),
                model=self.config.llm.model,
            )
            logger.info("Token 预算管理已启用 (window=%d, compact@%d%%)",
                        self._token_budget.config.context_window,
                        int(tb_config.compact_threshold_pct * 100))

        # 崩溃恢复 (P4)
        self.recovery = ConversationRecovery()
        self._recovery_snapshot_counter = 0

        # 文件式记忆 (MEMORY.md)
        self._memdir: Optional[MemoryDir] = None
        self._memdir_manifest: Optional[MemoryManifest] = None
        if self.config.memory.memory_dir_enabled:
            mem_path = self.config.memory.memory_dir_path
            if not mem_path:
                # 默认路径: 项目根目录下的 .claude/memory
                import os as _os
                mem_path = _os.path.join(
                    _os.path.dirname(_os.path.dirname(_os.path.dirname(__file__))),
                    ".claude", "memory",
                )
            self._memdir = MemoryDir(mem_path)
            # 启动时扫描一次
            self._memdir_manifest = self._memdir.scan()
            memory_selector.reset()
            logger.info("文件式记忆已启用: %s (%d 条)", mem_path,
                        self._memdir_manifest.total_files)

        # 预算回调：供 WorkflowEngine 注入，使 Agent 内部计数计入工作流预算
        # 返回 True 表示预算充足；返回 False 或抛出异常表示超限
        self.on_llm_invocation: Optional[Callable[[], Any]] = None

    def _attach_token_usage(self, result: AgentResult) -> AgentResult:
        """将 TokenBudget 摘要附加到 AgentResult（若启用）。"""
        if self._token_budget:
            from app.agent_runtime.schemas import TokenUsageInfo
            result.token_usage = TokenUsageInfo(**self._token_budget.summary())
        return result

    def _build_execution_log_kwargs(self, user_input: str, result: AgentResult, latency_ms: int) -> dict:
        """从 AgentResult 构建 execution_logger 所需的参数字典。"""
        tool_chain = []
        for s in result.steps:
            if s.type == "tool_result" and s.tool_name:
                tool_chain.append({
                    "tool_name": s.tool_name,
                    "tool_input": s.tool_input,
                    "tool_output": s.tool_result[:500] if s.tool_result else None,
                })
        steps_summary = [
            {"iteration": s.iteration, "type": s.type, "tool_name": s.tool_name,
             "content": (s.content or "")[:300]}
            for s in result.steps[-20:]  # 最多保留最近 20 步
        ]
        return dict(
            agent_id=None,  # 由调用方设置
            agent_name=self.config.name,
            user_id=self.config.user_id,
            session_id=self.context.session_id,
            input_text=user_input,
            output_text=result.content,
            output_truncated=result.truncated,
            success=result.success,
            error_message=result.error,
            latency_ms=latency_ms,
            iterations_used=result.iterations_used,
            tool_calls_made=result.tool_calls_made,
            tool_chain=tool_chain if tool_chain else None,
            steps=steps_summary if steps_summary else None,
            model=self.config.llm.model,
            provider=self.config.llm.provider,
        )

    def _fire_recovery_snapshot(self):
        """Fire-and-forget 保存崩溃恢复快照（每 5 次工具调用保存一次）。"""
        self._recovery_snapshot_counter += 1
        if self._recovery_snapshot_counter % 5 != 0:
            return
        try:
            import asyncio
            asyncio.ensure_future(
                self.recovery.save_snapshot(
                    session_id=self.context.session_id,
                    messages=self.context.messages,
                    extra={
                        "agent_name": self.config.name,
                        "iteration": self.context.iteration,
                        "tool_calls_made": self.context.tool_calls_made,
                    },
                )
            )
        except Exception:
            pass

    def _fire_execution_log(self, user_input: str, result: AgentResult, start_time: float):
        """Fire-and-forget 记录执行日志（非阻塞）。"""
        try:
            latency_ms = int((time.time() - start_time) * 1000)
            kwargs = self._build_execution_log_kwargs(user_input, result, latency_ms)
            _exec_logger.log_execution_fire_and_forget(**kwargs)
        except Exception:
            pass  # 日志记录失败不影响主流程

    async def run(self, user_input: str) -> AgentResult:
        """
        执行 Agent 单轮对话。

        流程：加载记忆 → 追加用户消息 → ReAct 循环 → 保存记忆 → 返回结果。
        """
        max_iter = max(1, self.config.llm.max_iterations)
        self.context.iteration = 0
        self.context.tool_calls_made = 0
        self._llm_invocations = 0  # 每次 run() 重置 LLM 调用计数
        _run_start = time.time()  # 执行开始时间，用于计算总延迟

        # 1. 系统提示词分层装配（首次加载全部段，后续只刷新动态段）
        if self._prompt_sections_enabled:
            system_prompt = await self._compose_system_prompt(user_input)
            self.context.set_system_prompt(system_prompt)
            if not self._memory_context_loaded:
                self._memory_context_loaded = True
                logger.info("分层装配已完成（静态段 + 动态段）")
        elif not self._memory_context_loaded:
            await self._inject_memory_context(user_input)
            self._memory_context_loaded = True
            await self._inject_knowledge_context(user_input)

        # 2. 追加用户消息
        self.context.add_user_message(user_input)

        # 2.5 计划模式 (P2) — 生成执行计划
        plan: Optional[Plan] = None
        if self.plan_mode and self.config.llm.plan_mode_enabled:
            try:
                plan = await self.plan_mode.generate_plan(
                    user_input=user_input,
                    available_tools=self.tool_manager.tool_names(),
                    messages_history=self.context.messages,
                )
                logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps))
                if self.config.llm.plan_approval_required:
                    approved = await self.plan_mode.present_plan(plan)
                    if not approved:
                        logger.info("计划模式: 计划被拒绝")
                        result = AgentResult(
                            success=False,
                            content=f"计划已被拒绝。\n\n{plan.to_markdown()}",
                            iterations_used=0,
                            tool_calls_made=0,
                            error="plan_rejected",
                        )
                        self._fire_execution_log(user_input, result, _run_start)
                        self._attach_token_usage(result)
                        return result
            except Exception as e:
                logger.warning("计划生成失败，回退到直接执行: %s", e)
                plan = None

        # 3. ReAct 循环
        llm = _LLMClient(self.config.llm)
        tool_schemas = self.tool_manager.get_tool_schemas()
        has_tools = self.tool_manager.has_tools()
        steps: List[AgentStep] = []
        _self_review_attempted = False  # 防止无限修正循环

        # 构建 LLM 调用回调（包装 on_llm_call，补充上下文）
        llm_callback_ctx = {"step_type": "think", "tool_name": None}

        def _llm_callback(metrics: Dict[str, Any]):
            # Token 预算追踪 (P2)
            if self._token_budget:
                prompt_tok = metrics.get("prompt_tokens", 0)
                comp_tok = metrics.get("completion_tokens", 0)
                if prompt_tok <= 0:
                    prompt_tok = self._token_budget.input_tokens  # fallback estimate
                self._token_budget.record_llm_call(
                    prompt_tokens=prompt_tok,
                    completion_tokens=comp_tok,
                    iteration=self.context.iteration,
                    step_type=llm_callback_ctx["step_type"],
                )
            if self.on_llm_call:
                metrics.update({
                    "session_id": self.context.session_id,
                    "user_id": self.config.user_id,
                    "step_type": llm_callback_ctx["step_type"],
                    "tool_name": llm_callback_ctx["tool_name"],
                })
                self.on_llm_call(metrics)

        while self.context.iteration < max_iter:
            self.context.iteration += 1

            # Token 预算检查：每次迭代前更新输入 token 估计
            if self._token_budget:
                self._token_budget.update_from_counter(self.context.messages)
                self._token_budget.reset_compaction_attempts()

            # 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩
            _should_compact = self.compaction_engine and self.context.iteration > 1
            if _should_compact and self._token_budget and self._token_budget.needs_compaction:
                self._token_budget.record_compaction_attempt()
                logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line)
            if self.compaction_engine and self.context.iteration > 1:
                compact_result = await self.compaction_engine.maybe_compact(
                    self.context.messages,
                    self.config.llm.context_window,
                )
                if compact_result.strategy != CompactionStrategy.NONE:
                    self.context.replace_internal_messages(
                        [m for m in compact_result.messages
                         if m.get("role") != "system"]  # 去掉 system（由 context 管理）
                    )
                    logger.debug(
                        "压缩完成: strategy=%s saved=%d tokens",
                        compact_result.strategy.value, compact_result.tokens_saved,
                    )

            # 裁剪过长历史
            messages = self.memory.trim_messages(self.context.messages)

            # 预算检查：LLM 调用次数（在调用 LLM 之前检查，避免浪费额度）
            budget = self.config.budget
            if self._llm_invocations >= budget.max_llm_invocations:
                err = f"已超过 LLM 调用预算（{budget.max_llm_invocations} 次）"
                logger.warning(err)
                steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
                await self.memory.save_context(user_input, err, self.context.messages)
                result = AgentResult(success=False, content=err, truncated=True,
                                   iterations_used=self.context.iteration,
                                   tool_calls_made=self.context.tool_calls_made,
                                   steps=steps, error=err)
                self._fire_execution_log(user_input, result, _run_start)
                self._attach_token_usage(result)
                return result

            # 调用外部 LLM 预算回调（WorkflowEngine 注入，将 Agent 的 LLM 计入工作流预算）
            if self.on_llm_invocation:
                try:
                    self.on_llm_invocation()
                except Exception as e:
                    err = f"LLM 调用超出工作流预算: {e}"
                    logger.warning(err)
                    steps.append(AgentStep(iteration=self.context.iteration, type="final", content=err))
                    await self.memory.save_context(user_input, err, self.context.messages)
                    result = AgentResult(success=False, content=err, truncated=True,
                                       iterations_used=self.context.iteration,
                                       tool_calls_made=self.context.tool_calls_made,
                                       steps=steps, error=str(e))
                    self._fire_execution_log(user_input, result, _run_start)
                    self._attach_token_usage(result)
                    return result

            # 调用 LLM
            try:
                response = await llm.chat(
                    messages=messages,
                    tools=tool_schemas if has_tools and self.context.iteration == 1 else
                               (tool_schemas if has_tools else None),
                    iteration=self.context.iteration,
                    on_completion=_llm_callback,
                )
            except Exception as e:
                err_str = str(e)
                logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
                if self.context.iteration < max_iter and self._is_retryable(err_str):
                    steps.append(AgentStep(
                        iteration=self.context.iteration,
                        type="tool_result",
                        content=f"LLM 调用失败（可重试）: {err_str}",
                    ))
                    self._llm_invocations += 1  # 重试也计入 LLM 调用预算
                    continue
                result = AgentResult(
                    success=False,
                    content=f"LLM 调用失败: {err_str}",
                    iterations_used=self.context.iteration,
                    tool_calls_made=self.context.tool_calls_made,
                    error=err_str,
                )
                self._fire_execution_log(user_input, result, _run_start)
                self._attach_token_usage(result)
                return result

            # 记录 LLM 调用次数（内部计数）
            self._llm_invocations += 1

            # 解析工具调用
            tool_calls = self._extract_tool_calls(response)
            content = self._extract_content(response)
            reasoning = getattr(response, "reasoning_content", None) or (
                response.get("reasoning_content") if isinstance(response, dict) else None
            )

            if not tool_calls:
                # LLM 直接返回文本 → 结束
                self.context.add_assistant_message(content)
                final_text = content or "（模型未返回有效内容）"
                review_score = 0.0

                # 输出质量自检（默认关闭，Agent 节点可开启）
                if self.config.self_review_enabled and not _self_review_attempted:
                    review = await self._self_review(final_text, task_context=user_input)
                    steps.append(AgentStep(
                        iteration=self.context.iteration,
                        type="tool_result",
                        content=f"self_review: score={review['score']:.2f} passed={review['passed']}",
                        tool_name="self_review",
                        tool_input={"content": final_text[:200]},
                        tool_result=json.dumps(review, ensure_ascii=False),
                    ))
                    if review["passed"]:
                        review_score = review["score"]
                        logger.info("self_review 通过 (%.2f >= %.2f)", review["score"], review["threshold"])
                    else:
                        logger.info("self_review 未通过 (%.2f < %.2f)，追加修正", review["score"], review["threshold"])
                        _self_review_attempted = True
                        # 追加修正提示
                        fix_prompt = (
                            f"你的上一个回答未通过质量检查（评分 {review['score']:.1f}/{review['threshold']}）。\n"
                            f"问题：{'；'.join(review['issues'][:3])}\n"
                            f"改进建议：{'；'.join(review['suggestions'][:3])}\n"
                            "请修正你的回答，确保满足上述建议。"
                        )
                        self.context.add_user_message(fix_prompt)
                        continue  # 回到 ReAct 循环，让 LLM 修正

                steps.append(AgentStep(
                    iteration=self.context.iteration,
                    type="final",
                    content=final_text,
                    reasoning=reasoning,
                ))
                # 保存记忆
                await self.memory.save_context(user_input, final_text, self.context.messages)
                # 保存学习模式
                if self.config.memory.learning_enabled:
                    await self._save_learning_pattern(
                        user_input, steps, success=True,
                        iterations_used=self.context.iteration,
                        tool_calls_made=self.context.tool_calls_made,
                    )
                # 提取知识到全局知识池（Agent 间知识共享）
                await self._extract_global_knowledge(user_input, final_text, steps, review_score)
                result = AgentResult(
                    success=True,
                    content=final_text,
                    iterations_used=self.context.iteration,
                    tool_calls_made=self.context.tool_calls_made,
                    steps=steps,
                )
                self._fire_execution_log(user_input, result, _run_start)
                self._attach_token_usage(result)
                return result

            # 有工具调用 → 先记录 assistant 消息（含 tool_calls）
            self.context.add_assistant_message(content or "", tool_calls, reasoning)

            # 记录思考步骤（含工具调用意图）
            tc_names = [tc["function"]["name"] for tc in tool_calls]
            tc_args_list = []
            for tc in tool_calls:
                try:
                    tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
                except (json.JSONDecodeError, TypeError):
                    raw_args = tc["function"].get("arguments", "")
                    logger.warning("工具参数 JSON 解析失败，使用空对象: %.200s", str(raw_args))
                    tc_args_list.append({})

            steps.append(AgentStep(
                iteration=self.context.iteration,
                type="think",
                content=content or f"调用工具: {', '.join(tc_names)}",
                reasoning=reasoning,
                tool_name=tc_names[0] if len(tc_names) == 1 else None,
                tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
            ))

            if self.execution_logger:
                self.execution_logger.info(
                    f"Agent 调用 {len(tool_calls)} 个工具",
                    data={"tool_calls": tc_names,
                          "iteration": self.context.iteration},
                )

            # 逐一执行工具
            for tc in tool_calls:
                tfn = tc.get("function", {})
                tname = tfn.get("name", "unknown")
                tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")

                try:
                    targs = json.loads(tfn.get("arguments", "{}"))
                except (json.JSONDecodeError, TypeError):
                    targs = {}

                # Hook: PreToolUse — 可拦截/修改工具调用
                hook_ctx = HookContext(
                    event=HookEvent.PRE_TOOL_USE,
                    tool_name=tname,
                    tool_input=targs,
                    session_id=self.context.session_id,
                    agent_name=self.config.name,
                    user_id=self.config.user_id,
                )
                hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx)
                if not hook_res.allowed:
                    result = json.dumps({"error": hook_res.reason}, ensure_ascii=False)
                    self.context.add_tool_result(tcid, tname, result)
                    continue
                if hook_res.modified_input:
                    targs = hook_res.modified_input
                # 审批检查需要原始参数，所以审批在前；但如果 hook 改了参数，需要重新构建
                if hook_res.modified_input and tname in self.config.tools.require_approval:
                    tfn["arguments"] = json.dumps(targs, ensure_ascii=False)

                # 工具执行前审批检查
                if tname in self.config.tools.require_approval:
                    from app.services.approval_manager import approval_manager as _am
                    logger.info("Agent 工具需审批 [%s]: %s", tname, targs)
                    approval_req = await _am.submit(
                        tool_name=tname, args=targs,
                        timeout_ms=self.config.tools.approval_timeout_ms,
                    )
                    decision = approval_req.decision
                    if decision == "denied":
                        result = f"[审批拒绝] 工具 {tname} 需要人工审批但被拒绝。"
                        self.context.add_tool_result(tcid, tname, result)
                        continue
                    elif decision == "skip":
                        result = f"[审批跳过] 工具 {tname} 被跳过。"
                        self.context.add_tool_result(tcid, tname, result)
                        continue
                    # decision == "approved" → 继续执行

                logger.info("Agent 执行工具 [%s]: %s", tname, targs)
                try:
                    result = await self.tool_manager.execute(tname, targs)
                except Exception as tool_err:
                    logger.error("工具 '%s' 执行异常: %s", tname, tool_err, exc_info=True)
                    result = json.dumps({
                        "error": f"工具 '{tname}' 执行异常: {tool_err}"
                    }, ensure_ascii=False)

                steps.append(AgentStep(
                    iteration=self.context.iteration,
                    type="tool_result",
                    content=f"工具 {tname} 返回结果",
                    tool_name=tname,
                    tool_input=targs,
                    tool_result=result[:500] + "..." if len(result) > 500 else result,
                ))

                self.context.add_tool_result(tcid, tname, result)
                self.context.tool_calls_made += 1

                # Hook: PostToolUse — 工具执行后处理
                post_ctx = HookContext(
                    event=HookEvent.POST_TOOL_USE,
                    tool_name=tname,
                    tool_input=targs,
                    tool_output=result,
                    session_id=self.context.session_id,
                    agent_name=self.config.name,
                    user_id=self.config.user_id,
                )
                await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx)

                # 崩溃恢复快照 (P4)
                self._fire_recovery_snapshot()

                # 预算检查：工具调用次数
                if self.context.tool_calls_made > budget.max_tool_calls:
                    err = f"已超过工具调用预算（{budget.max_tool_calls} 次）"
                    logger.warning(err)
                    steps.append(AgentStep(iteration=self.context.iteration, type="tool_result",
                                           content=err, tool_name=tname))
                    result = AgentResult(success=False, content=err, truncated=True,
                                       iterations_used=self.context.iteration,
                                       tool_calls_made=self.context.tool_calls_made,
                                       steps=steps, error=err)
                    self._fire_execution_log(user_input, result, _run_start)
                    self._attach_token_usage(result)
                    return result

                if self.on_tool_executed:
                    try:
                        await self.on_tool_executed(tname)
                    except WorkflowExecutionError:
                        raise
                    except Exception:
                        pass

                if self.execution_logger:
                    preview = result[:300] + "..." if len(result) > 300 else result
                    self.execution_logger.info(
                        f"工具 {tname} 执行完成",
                        data={"tool_name": tname, "result_preview": preview},
                    )

        # 达到最大迭代次数
        last_content = ""
        for m in reversed(self.context.messages):
            if m.get("role") == "assistant" and m.get("content"):
                last_content = m["content"]
                break

        logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
        await self.memory.save_context(user_input, last_content or "（已达最大迭代次数）", self.context.messages)
        # 保存学习模式（即使截断，标记为未成功以便后续分析）
        if self.config.memory.learning_enabled:
            await self._save_learning_pattern(
                user_input, steps, success=False,
                iterations_used=self.context.iteration,
                tool_calls_made=self.context.tool_calls_made,
            )
        # 提取知识到全局知识池（即便截断，工具调用序列仍有参考价值）
        if last_content:
            await self._extract_global_knowledge(user_input, last_content, steps)
        if last_content:
            steps.append(AgentStep(
                iteration=self.context.iteration,
                type="final",
                content=last_content,
            ))
        truncation_msg = f"已达最大迭代次数 ({max_iter})，任务被截断"
        result = AgentResult(
            success=False,
            content=last_content or truncation_msg,
            truncated=True,
            iterations_used=self.context.iteration,
            tool_calls_made=self.context.tool_calls_made,
            steps=steps,
            error=truncation_msg,
        )
        self._fire_execution_log(user_input, result, _run_start)
        self._attach_token_usage(result)
        return result

    async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]:
        """
        流式执行 Agent 单轮对话（支持 streamlined 模式）。

        与 run() 逻辑相同，但在每个关键步骤 yield SSE 事件。
        当 streamlined=True 时，工具调用会被折叠为累计摘要。
        """
        if self._streamlined_transformer:
            self._streamlined_transformer.reset()
            async for event in self._run_stream_impl(user_input):
                transformed = self._streamlined_transformer.transform(event)
                if transformed is not None:
                    yield transformed
            flushed = self._streamlined_transformer.flush()
            if flushed:
                yield flushed
        else:
            async for event in self._run_stream_impl(user_input):
                yield event

    async def _run_stream_impl(self, user_input: str) -> AsyncGenerator[dict, None]:
        """
        流式执行 Agent 单轮对话（内部实现）。

        与 run() 逻辑相同，但在每个关键步骤 yield SSE 事件：
        - think: LLM 思考中，准备调用工具
        - tool_call: 即将执行工具
        - tool_result: 工具执行完毕
        - final: 最终回答
        - error: 出错/预算超限
        """
        max_iter = max(1, self.config.llm.max_iterations)
        self.context.iteration = 0
        self.context.tool_calls_made = 0

        # 1. 系统提示词分层装配
        if self._prompt_sections_enabled:
            system_prompt = await self._compose_system_prompt(user_input)
            self.context.set_system_prompt(system_prompt)
            if not self._memory_context_loaded:
                self._memory_context_loaded = True
                logger.info("分层装配已完成（静态段 + 动态段）")
        elif not self._memory_context_loaded:
            await self._inject_memory_context(user_input)
            self._memory_context_loaded = True
            await self._inject_knowledge_context(user_input)

        # 2. 追加用户消息
        self.context.add_user_message(user_input)

        # 2.5 计划模式 (P2) — 流式生成执行计划
        plan: Optional[Plan] = None
        if self.plan_mode and self.config.llm.plan_mode_enabled:
            yield {"type": "plan_generating", "content": "正在生成执行计划…", "iteration": 0}
            try:
                plan = await self.plan_mode.generate_plan(
                    user_input=user_input,
                    available_tools=self.tool_manager.tool_names(),
                    messages_history=self.context.messages,
                )
                logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps))
                yield {
                    "type": "plan",
                    "content": plan.to_markdown(),
                    "plan_data": plan.to_dict(),
                    "iteration": 0,
                    "session_id": self.context.session_id,
                }
                if self.config.llm.plan_approval_required:
                    # 等待外部审批（通过 on_approval_required 回调）
                    approved = await self.plan_mode.present_plan(plan)
                    if not approved:
                        logger.info("计划模式: 计划被拒绝")
                        yield {
                            "type": "plan_rejected",
                            "content": "计划已被拒绝",
                            "plan_data": plan.to_dict(),
                            "iteration": 0,
                            "session_id": self.context.session_id,
                        }
                        return
                    yield {
                        "type": "plan_approved",
                        "content": "计划已批准，开始执行",
                        "iteration": 0,
                        "session_id": self.context.session_id,
                    }
            except Exception as e:
                logger.warning("计划生成失败，回退到直接执行: %s", e)
                yield {"type": "plan_failed", "content": f"计划生成失败: {e}", "iteration": 0}
                plan = None

        # 3. ReAct 循环
        llm = _LLMClient(self.config.llm)
        tool_schemas = self.tool_manager.get_tool_schemas()
        has_tools = self.tool_manager.has_tools()
        steps: List[AgentStep] = []
        _self_review_attempted = False

        llm_callback_ctx = {"step_type": "think", "tool_name": None}

        def _llm_callback(metrics: Dict[str, Any]):
            # Token 预算追踪 (P2)
            if self._token_budget:
                prompt_tok = metrics.get("prompt_tokens", 0)
                comp_tok = metrics.get("completion_tokens", 0)
                if prompt_tok <= 0:
                    prompt_tok = self._token_budget.input_tokens  # fallback estimate
                self._token_budget.record_llm_call(
                    prompt_tokens=prompt_tok,
                    completion_tokens=comp_tok,
                    iteration=self.context.iteration,
                    step_type=llm_callback_ctx["step_type"],
                )
            if self.on_llm_call:
                metrics.update({
                    "session_id": self.context.session_id,
                    "user_id": self.config.user_id,
                    "step_type": llm_callback_ctx["step_type"],
                    "tool_name": llm_callback_ctx["tool_name"],
                })
                self.on_llm_call(metrics)

        while self.context.iteration < max_iter:
            self.context.iteration += 1

            # Token 预算检查：每次迭代前更新输入 token 估计
            if self._token_budget:
                self._token_budget.update_from_counter(self.context.messages)
                self._token_budget.reset_compaction_attempts()

            # 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩
            if self.compaction_engine and self.context.iteration > 1:
                if self._token_budget and self._token_budget.needs_compaction:
                    self._token_budget.record_compaction_attempt()
                    logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line)
                compact_result = await self.compaction_engine.maybe_compact(
                    self.context.messages,
                    self.config.llm.context_window,
                )
                if compact_result.strategy != CompactionStrategy.NONE:
                    self.context.replace_internal_messages(
                        [m for m in compact_result.messages
                         if m.get("role") != "system"]
                    )
                    logger.debug(
                        "压缩完成: strategy=%s saved=%d tokens",
                        compact_result.strategy.value, compact_result.tokens_saved,
                    )

            messages = self.memory.trim_messages(self.context.messages)

            # 预算检查：LLM 调用次数（在调用 LLM 之前检查，避免浪费额度）
            budget = self.config.budget
            if self._llm_invocations >= budget.max_llm_invocations:
                err = f"已超过 LLM 调用预算（{budget.max_llm_invocations} 次）"
                logger.warning(err)
                yield {"type": "error", "content": err, "iteration": self.context.iteration,
                       "truncated": True}
                await self.memory.save_context(user_input, err, self.context.messages)
                return

            # 调用外部 LLM 预算回调（WorkflowEngine 注入）
            if self.on_llm_invocation:
                try:
                    self.on_llm_invocation()
                except Exception as e:
                    err = f"LLM 调用超出工作流预算: {e}"
                    logger.warning(err)
                    yield {"type": "error", "content": err, "iteration": self.context.iteration,
                           "truncated": True}
                    return

            # think 事件：告知前端 Agent 正在思考（让 UI 即时反馈，避免假死感）
            yield {"type": "think", "content": "", "reasoning": None, "iteration": self.context.iteration}

            # 调用 LLM
            try:
                response = await llm.chat(
                    messages=messages,
                    tools=tool_schemas if has_tools and self.context.iteration == 1 else
                               (tool_schemas if has_tools else None),
                    iteration=self.context.iteration,
                    on_completion=_llm_callback,
                )
            except Exception as e:
                err_str = str(e)
                logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
                if self.context.iteration < max_iter and self._is_retryable(err_str):
                    yield {"type": "error", "content": f"LLM 调用失败（可重试）: {err_str}",
                           "iteration": self.context.iteration}
                    continue
                yield {"type": "error", "content": f"LLM 调用失败: {err_str}",
                       "iteration": self.context.iteration}
                return

            # 记录 LLM 调用次数（内部计数）
            self._llm_invocations += 1

            # 解析工具调用
            tool_calls = self._extract_tool_calls(response)
            content = self._extract_content(response)
            reasoning = getattr(response, "reasoning_content", None) or (
                response.get("reasoning_content") if isinstance(response, dict) else None
            )

            if not tool_calls:
                # LLM 直接返回文本 → 结束
                self.context.add_assistant_message(content)
                final_text = content or "（模型未返回有效内容）"
                review_score = 0.0

                # 输出质量自检（默认关闭）
                if self.config.self_review_enabled and not _self_review_attempted:
                    review = await self._self_review(final_text, task_context=user_input)
                    yield {
                        "type": "tool_result",
                        "content": f"self_review: score={review['score']:.2f} passed={review['passed']}",
                        "tool_name": "self_review",
                        "iteration": self.context.iteration,
                        "session_id": self.context.session_id,
                    }
                    if review["passed"]:
                        review_score = review["score"]
                        logger.info("self_review 通过 (%.2f >= %.2f)", review["score"], review["threshold"])
                    else:
                        logger.info("self_review 未通过 (%.2f < %.2f)，追加修正", review["score"], review["threshold"])
                        _self_review_attempted = True
                        yield {
                            "type": "think",
                            "content": f"自检未通过（{review['score']:.1f}），正在修正：{'；'.join(review['suggestions'][:2])}",
                            "iteration": self.context.iteration,
                            "session_id": self.context.session_id,
                        }
                        fix_prompt = (
                            f"你的上一个回答未通过质量检查（评分 {review['score']:.1f}/{review['threshold']}）。\n"
                            f"问题：{'；'.join(review['issues'][:3])}\n"
                            f"改进建议：{'；'.join(review['suggestions'][:3])}\n"
                            "请修正你的回答，确保满足上述建议。"
                        )
                        self.context.add_user_message(fix_prompt)
                        continue  # 回到 ReAct 循环，让 LLM 修正

                token_usage_final = self._token_budget.summary() if self._token_budget else None
                yield {
                    "type": "final",
                    "content": final_text,
                    "reasoning": reasoning,
                    "iteration": self.context.iteration,
                    "iterations_used": self.context.iteration,
                    "tool_calls_made": self.context.tool_calls_made,
                    "session_id": self.context.session_id,
                    "token_usage": token_usage_final,
                }
                await self.memory.save_context(user_input, final_text, self.context.messages)
                # 保存学习模式
                if self.config.memory.learning_enabled:
                    await self._save_learning_pattern(
                        user_input, steps, success=True,
                        iterations_used=self.context.iteration,
                        tool_calls_made=self.context.tool_calls_made,
                    )
                # 提取知识到全局知识池（Agent 间知识共享）
                await self._extract_global_knowledge(user_input, final_text, steps, review_score)
                return

            # 有工具调用 → 先记录 assistant 消息
            self.context.add_assistant_message(content or "", tool_calls, reasoning)

            # yield think 事件
            tc_names = [tc["function"]["name"] for tc in tool_calls]
            tc_args_list = []
            for tc in tool_calls:
                try:
                    tc_args_list.append(json.loads(tc["function"].get("arguments", "{}")))
                except (json.JSONDecodeError, TypeError):
                    raw_args = tc["function"].get("arguments", "")
                    logger.warning("工具参数 JSON 解析失败，使用空对象: %.200s", str(raw_args))
                    tc_args_list.append({})

            yield {
                "type": "think",
                "content": content or f"调用工具: {', '.join(tc_names)}",
                "reasoning": reasoning,
                "tool_names": tc_names,
                "iteration": self.context.iteration,
            }

            steps.append(AgentStep(
                iteration=self.context.iteration,
                type="think",
                content=content or f"调用工具: {', '.join(tc_names)}",
                reasoning=reasoning,
                tool_name=tc_names[0] if len(tc_names) == 1 else None,
                tool_input=tc_args_list[0] if len(tc_args_list) == 1 else None,
            ))

            if self.execution_logger:
                self.execution_logger.info(
                    f"Agent 调用 {len(tool_calls)} 个工具",
                    data={"tool_calls": tc_names, "iteration": self.context.iteration},
                )

            # 逐一执行工具
            for tc in tool_calls:
                tfn = tc.get("function", {})
                tname = tfn.get("name", "unknown")
                tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")

                try:
                    targs = json.loads(tfn.get("arguments", "{}"))
                except (json.JSONDecodeError, TypeError):
                    targs = {}

                # Hook: PreToolUse — 可拦截/修改工具调用 (流式)
                hook_ctx = HookContext(
                    event=HookEvent.PRE_TOOL_USE,
                    tool_name=tname,
                    tool_input=targs,
                    session_id=self.context.session_id,
                    agent_name=self.config.name,
                    user_id=self.config.user_id,
                )
                hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx)
                if not hook_res.allowed:
                    result = json.dumps({"error": hook_res.reason}, ensure_ascii=False)
                    yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
                    self.context.add_tool_result(tcid, tname, result)
                    continue
                if hook_res.modified_input:
                    targs = hook_res.modified_input

                # yield tool_call 事件
                yield {
                    "type": "tool_call",
                    "name": tname,
                    "input": targs,
                    "iteration": self.context.iteration,
                }

                # 工具执行前审批检查（流式：先 create → yield 事件带 ID → 等待决定）
                if tname in self.config.tools.require_approval:
                    from app.services.approval_manager import approval_manager as _am
                    logger.info("Agent 工具需审批 [%s]: %s", tname, targs)
                    approval_req = _am.create(tool_name=tname, args=targs)
                    yield {
                        "type": "approval_required",
                        "approval_id": approval_req.approval_id,
                        "tool_name": tname,
                        "args": targs,
                        "iteration": self.context.iteration,
                    }
                    decision = await _am.wait_for_decision(
                        approval_req.approval_id,
                        timeout_ms=self.config.tools.approval_timeout_ms,
                    )
                    if decision == "denied":
                        result = f"[审批拒绝] 工具 {tname} 需要人工审批但被拒绝。"
                        yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
                        self.context.add_tool_result(tcid, tname, result)
                        continue
                    elif decision == "skip":
                        result = f"[审批跳过] 工具 {tname} 被跳过。"
                        yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
                        self.context.add_tool_result(tcid, tname, result)
                        continue
                    # decision == "approved" → 继续执行

                logger.info("Agent 执行工具 [%s]: %s", tname, targs)
                try:
                    result = await self.tool_manager.execute(tname, targs)
                except Exception as tool_err:
                    logger.error("工具 '%s' 执行异常: %s", tname, tool_err, exc_info=True)
                    result = json.dumps({
                        "error": f"工具 '{tname}' 执行异常: {tool_err}"
                    }, ensure_ascii=False)

                # yield tool_result 事件
                yield {
                    "type": "tool_result",
                    "name": tname,
                    "result": result[:500] + "..." if len(result) > 500 else result,
                    "iteration": self.context.iteration,
                }

                steps.append(AgentStep(
                    iteration=self.context.iteration,
                    type="tool_result",
                    content=f"工具 {tname} 返回结果",
                    tool_name=tname,
                    tool_input=targs,
                    tool_result=result[:500] + "..." if len(result) > 500 else result,
                ))

                self.context.add_tool_result(tcid, tname, result)
                self.context.tool_calls_made += 1

                # Hook: PostToolUse — 工具执行后处理 (流式)
                post_ctx = HookContext(
                    event=HookEvent.POST_TOOL_USE,
                    tool_name=tname,
                    tool_input=targs,
                    tool_output=result,
                    session_id=self.context.session_id,
                    agent_name=self.config.name,
                    user_id=self.config.user_id,
                )
                await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx)

                # 崩溃恢复快照 (P4)
                self._fire_recovery_snapshot()

                # 预算检查：工具调用次数
                if self.context.tool_calls_made > budget.max_tool_calls:
                    err = f"已超过工具调用预算（{budget.max_tool_calls} 次）"
                    logger.warning(err)
                    yield {"type": "error", "content": err, "iteration": self.context.iteration,
                           "truncated": True}
                    return

                if self.on_tool_executed:
                    try:
                        await self.on_tool_executed(tname)
                    except WorkflowExecutionError:
                        raise
                    except Exception:
                        pass

                if self.execution_logger:
                    preview = result[:300] + "..." if len(result) > 300 else result
                    self.execution_logger.info(
                        f"工具 {tname} 执行完成",
                        data={"tool_name": tname, "result_preview": preview},
                    )

        # Hook: Stop — 对话完成
        stop_ctx = HookContext(
            event=HookEvent.STOP,
            session_id=self.context.session_id,
            agent_name=self.config.name,
            user_id=self.config.user_id,
        )
        await self.hook_manager.trigger(HookEvent.STOP, stop_ctx)

        # 达到最大迭代次数
        last_content = ""
        for m in reversed(self.context.messages):
            if m.get("role") == "assistant" and m.get("content"):
                last_content = m["content"]
                break

        logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
        await self.memory.save_context(user_input, last_content or "（已达最大迭代次数）", self.context.messages)
        # 保存学习模式（即便截断，工具调用模式仍有参考价值）
        if self.config.memory.learning_enabled:
            await self._save_learning_pattern(
                user_input, steps, success=True,
                iterations_used=self.context.iteration,
                tool_calls_made=self.context.tool_calls_made,
            )
        # 提取知识到全局知识池（即便截断，工具调用序列仍有参考价值）
        if last_content:
            await self._extract_global_knowledge(user_input, last_content, steps)
        token_usage_truncated = self._token_budget.summary() if self._token_budget else None
        yield {
            "type": "final",
            "content": last_content or "已达最大迭代次数，但模型未返回最终回答。",
            "iteration": self.context.iteration,
            "iterations_used": self.context.iteration,
            "tool_calls_made": self.context.tool_calls_made,
            "truncated": True,
            "session_id": self.context.session_id,
            "token_usage": token_usage_truncated,
        }

    async def _compose_system_prompt(self, query: str = "") -> str:
        """使用分层装配构建完整系统提示词。

        将静态段 + 动态段并行解析后拼接，替代原先的字符串拼接方式。
        返回最终的 system_prompt 字符串。
        """
        if not self._prompt_composer:
            # 降级：使用原有字符串拼接方式
            enriched = self.config.system_prompt.rstrip("\n")
            mem_text = await self.memory.initialize(query=query)
            if mem_text:
                enriched += "\n\n" + mem_text
            if self.config.memory.learning_enabled:
                pattern_hint = await self._inject_learning_patterns(query)
                if pattern_hint:
                    enriched += "\n\n" + pattern_hint
            if self._memdir and self._memdir_manifest:
                memdir_text = await self._inject_memdir_context(query)
                if memdir_text:
                    enriched += "\n\n" + memdir_text
            try:
                enriched = knowledge_retriever.inject_knowledge(enriched, query)
            except Exception:
                pass
            return enriched

        # 分层装配路径
        ps_config = self.config.prompt_sections
        d_switches = ps_config.dynamic_sections

        # 清除上一次运行的动态段
        # (静态段保留缓存，动态段每次重算)
        self._prompt_composer._dynamic_sections.clear()

        # 动态段：环境信息
        if d_switches.get("environment", True):
            self._prompt_composer.add_dynamic(PromptSection(
                "environment",
                lambda uid=self.config.user_id: section_environment(uid),
                cache_break=True,
            ))

        # 动态段：语言偏好
        if d_switches.get("language", True):
            lang = ps_config.language
            if lang:
                self._prompt_composer.add_dynamic(PromptSection(
                    "language",
                    lambda l=lang: section_language(l),
                    cache_break=False,
                ))

        # 动态段：长期记忆上下文
        if d_switches.get("memory_context", True):
            mem_text = await self.memory.initialize(query=query)
            if mem_text:
                self._prompt_composer.add_dynamic(PromptSection(
                    "memory_context",
                    lambda t=mem_text: f"# Long-term Memory\n\n{t}",
                    cache_break=True,
                ))

        # 动态段：学习模式提示
        if self.config.memory.learning_enabled:
            pattern_hint = await self._inject_learning_patterns(query)
            if pattern_hint:
                self._prompt_composer.add_dynamic(PromptSection(
                    "learning_patterns",
                    lambda p=pattern_hint: p,
                    cache_break=True,
                ))

        # 动态段：文件式记忆
        if self._memdir and self._memdir_manifest:
            memdir_text = await self._inject_memdir_context(query)
            if memdir_text:
                self._prompt_composer.add_dynamic(PromptSection(
                    "memdir",
                    lambda t=memdir_text: t,
                    cache_break=True,
                ))

        # 动态段：知识库检索
        if d_switches.get("memory_context", True):
            try:
                base_enriched = knowledge_retriever.inject_knowledge(
                    self.config.system_prompt, query
                )
                if base_enriched != self.config.system_prompt:
                    # 提取增量部分
                    knowledge_delta = base_enriched[len(self.config.system_prompt):].strip()
                    if knowledge_delta:
                        self._prompt_composer.add_dynamic(PromptSection(
                            "knowledge_base",
                            lambda kd=knowledge_delta: f"# Relevant Knowledge\n\n{kd}",
                            cache_break=True,
                        ))
            except Exception:
                pass

        # 工具列表段（默认关闭，太长）
        if d_switches.get("tool_list", False):
            tool_names = self.tool_manager.tool_names()
            if tool_names:
                tool_list_text = "\n".join(f"- {n}" for n in sorted(tool_names))
                self._prompt_composer.add_dynamic(PromptSection(
                    "tool_list",
                    lambda t=tool_list_text: f"# Available Tools\n\n{t}",
                    cache_break=False,
                ))

        # 解析 + 装配
        return await self._prompt_composer.assemble_full()

    async def _inject_memory_context(self, query: str = "") -> None:
        """加载长期记忆并注入 system prompt。"""
        mem_text = await self.memory.initialize(query=query)
        enriched = self.config.system_prompt.rstrip("\n")

        if mem_text:
            enriched += "\n\n" + mem_text

        # 注入学习模式提示（历史工具使用建议）
        if self.config.memory.learning_enabled:
            pattern_hint = await self._inject_learning_patterns(query)
            if pattern_hint:
                enriched += "\n\n" + pattern_hint

        # 注入文件式记忆 (MEMORY.md)
        if self._memdir and self._memdir_manifest:
            memdir_text = await self._inject_memdir_context(query)
            if memdir_text:
                enriched += "\n\n" + memdir_text

        self.context.set_system_prompt(enriched)
        logger.info("Agent 已注入长期记忆上下文")

    async def _inject_memdir_context(self, query: str) -> str:
        """加载文件式记忆并构建注入文本。"""
        if not self._memdir or not self._memdir_manifest:
            return ""

        parts: List[str] = []

        # 记忆操作指导（首次注入）
        memdir_prompt = self._memdir.build_system_prompt()
        parts.append(memdir_prompt)

        # AI 驱动的相关性选择
        if self._memdir_manifest.entries:
            try:
                selected = await memory_selector.select(
                    query=query,
                    manifest=self._memdir_manifest,
                    recent_tools=self.tool_manager.tool_names(),
                )
                if selected:
                    # 读取选中的记忆文件
                    parts.append("\n## 相关记忆\n")
                    for fn in selected:
                        entry = next(
                            (e for e in self._memdir_manifest.entries
                             if e.filename == fn), None
                        )
                        if entry:
                            # 加载完整内容
                            try:
                                with open(entry.filepath, "r", encoding="utf-8") as _f:
                                    _, content = parse_frontmatter(_f.read())
                            except Exception:
                                content = entry.content
                            if not content:
                                content = entry.content
                            staleness = entry.staleness_note
                            parts.append(
                                f"<system-reminder>\n"
                                f"### [{entry.mem_type.value}] {entry.name}\n"
                                f"{content[:2000]}"
                            )
                            if staleness:
                                parts.append(f"\n{staleness}")
                            parts.append("</system-reminder>")
            except Exception as e:
                logger.warning("AI 记忆选择失败: %s", e)

        return "\n".join(parts)

    async def _inject_learning_patterns(self, query: str) -> str:
        """查询学习模式，返回格式化的提示文本。"""
        from app.core.database import SessionLocal
        db = None
        try:
            db = SessionLocal()
            patterns = load_relevant_patterns(
                db, self._learning_scope_kind, self.memory.scope_id, query
            )
            return format_pattern_hint(patterns, query)
        except Exception as e:
            logger.warning("加载学习模式失败: %s", e)
            return ""

    async def _inject_knowledge_context(self, query: str) -> None:
        """从知识进化库检索相关经验并注入 system prompt。"""
        try:
            enriched = knowledge_retriever.inject_knowledge(
                self.context.system_prompt, query
            )
            if enriched != self.context.system_prompt:
                self.context.set_system_prompt(enriched)
                logger.info("Agent 已注入相关知识库经验")
        except Exception as e:
            logger.debug("知识检索注入跳过: %s", e)

    async def _save_learning_pattern(
        self, query: str, steps: List[AgentStep],
        success: bool, iterations_used: int, tool_calls_made: int,
    ) -> None:
        """从执行结果中提取模式并保存。"""
        from app.core.database import SessionLocal
        db = None
        try:
            db = SessionLocal()
            pattern_data = extract_pattern_from_result(
                query=query,
                steps=steps,
                success=success,
                iterations_used=iterations_used,
                tool_calls_made=tool_calls_made,
            )
            save_learning_pattern(
                db, self._learning_scope_kind,
                self.memory.scope_id, pattern_data,
            )
        except Exception as e:
            logger.warning("保存学习模式失败: %s", e)
        finally:
            if db:
                db.close()

    async def _extract_global_knowledge(
        self, user_input: str, final_answer: str, steps: List[AgentStep],
        self_review_score: float = 0.0,
    ) -> None:
        """从 Agent 执行结果中提取知识，写入全局知识池（Agent 间共享）。"""
        # 提取工具调用名称作为 tags
        tool_names = list(dict.fromkeys(
            s.tool_name for s in (steps or [])
            if s.tool_name and s.type == "tool_result"
        ))
        tags = tool_names[:5] if tool_names else ["对话"]

        # 提取关键信息：用户问题摘要 + 回答要点（前 500 字）
        content = (
            f"问题: {user_input[:300]}\n"
            f"回答要点: {final_answer[:500]}"
        )
        if tool_names:
            content += f"\n使用工具: {', '.join(tool_names[:5])}"

        source_agent_id = self.config.name if self.config.name != "default_agent" else ""
        source_user_id = self.config.user_id or ""

        # 置信度评估：基于 self_review 评分和工具执行成功数
        confidence = "medium"
        if self_review_score >= 0.8:
            confidence = "high"
        elif self_review_score > 0 and self_review_score < 0.5:
            confidence = "low"
        elif tool_names and len(tool_names) >= 2:
            confidence = "high"  # 多工具协作通常质量更高
        # TTL: 高置信度知识有效期更长
        ttl_hours = 720 if confidence == "high" else 168 if confidence == "medium" else 24

        await self.memory.save_global_knowledge(
            content=content,
            source_agent_id=source_agent_id,
            source_user_id=source_user_id,
            tags=tags,
            confidence=confidence,
            ttl_hours=ttl_hours,
        )

    async def _self_review(self, content: str, task_context: str = "") -> dict:
        """输出质量自检：用轻量 LLM 评判输出，返回 {score, passed, issues, suggestions}。"""
        criteria = (
            "回答必须准确、完整、切题。"
            "包含具体可执行的步骤或代码示例。"
            "无明显事实错误或遗漏。"
            "格式清晰，便于阅读。"
        )
        try:
            from app.agent_runtime.core import _LLMClient
            from app.agent_runtime.schemas import AgentLLMConfig

            review_config = AgentLLMConfig(
                provider=getattr(self.config.llm, 'provider', 'deepseek'),
                model="deepseek-v4-flash",
                temperature=0.1,
                max_tokens=800,
                request_timeout=30.0,
            )
            if self.config.llm.api_key:
                review_config.api_key = self.config.llm.api_key
            if self.config.llm.base_url:
                review_config.base_url = self.config.llm.base_url

            client = _LLMClient(review_config)

            judge_prompt = (
                "你是严格的内容质量评审专家。请根据以下标准对内容进行评分。\n\n"
                f"【评判标准】\n{criteria}\n\n"
                f"【待评审内容】\n{content[:8000]}\n"
            )
            if task_context:
                judge_prompt += f"\n【任务背景】\n{task_context[:2000]}\n"

            judge_prompt += (
                "\n请以 JSON 格式返回评审结果（严格只返回 JSON，不要任何其他文字）：\n"
                '{"score": 0.75, "passed": true, "issues": ["问题1"], '
                '"suggestions": ["建议1"], "summary": "一句话总结"}\n\n'
                "评分规则：1.0完美 0.8良好 0.6基本满足 0.4大部分未满足 0.2完全不满足\n"
                "score >= 0.6 时 passed=true，否则 passed=false\n"
            )

            messages = [{"role": "user", "content": judge_prompt}]
            resp = await client.chat(messages=messages, tools=None, iteration=0)
            judge_text = getattr(resp, 'content', '') or (
                resp.get('content', '') if isinstance(resp, dict) else str(resp)
            )

            # 解析 JSON
            try:
                judge_clean = judge_text.strip()
                if judge_clean.startswith("```"):
                    lines = judge_clean.split("\n")
                    judge_clean = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
                result = json.loads(judge_clean)
            except json.JSONDecodeError:
                import re as _sr_re
                m = _sr_re.search(r'\{[^{}]*"score"\s*:\s*[\d.]+[^{}]*\}', judge_text, _sr_re.DOTALL)
                if m:
                    try:
                        result = json.loads(m.group())
                    except json.JSONDecodeError:
                        result = {"score": 0.5, "passed": False, "issues": ["无法解析评审结果"], "suggestions": [], "summary": ""}
                else:
                    result = {"score": 0.5, "passed": False, "issues": ["无法解析评审结果"], "suggestions": [], "summary": ""}

            score = float(result.get("score", 0.5))
            threshold = self.config.llm.self_review_threshold
            passed = score >= threshold

            return {
                "score": score,
                "passed": passed,
                "threshold": threshold,
                "issues": result.get("issues", []),
                "suggestions": result.get("suggestions", []),
                "summary": result.get("summary", ""),
            }
        except Exception as e:
            logger.warning("self_review 执行失败: %s", e)
            return {"score": 0.0, "passed": False, "issues": [f"self_review 执行异常: {e}"],
                    "suggestions": ["请检查 self_review 配置或 LLM 可用性"], "error": str(e)}

    @staticmethod
    def _extract_tool_calls(response: Any) -> List[Dict[str, Any]]:
        """从 LLM 响应中提取工具调用列表。"""
        if response is None:
            return []
        # OpenAI SDK 格式
        if hasattr(response, "tool_calls") and response.tool_calls:
            result = []
            for tc in response.tool_calls:
                result.append({
                    "id": tc.id,
                    "type": tc.type,
                    "function": {
                        "name": tc.function.name,
                        "arguments": tc.function.arguments,
                    },
                })
            return result
        # 字典格式
        if isinstance(response, dict):
            tc_list = response.get("tool_calls") or []
            if tc_list:
                return tc_list
            # 检查 content 中是否嵌入了 DSML
            content = response.get("content") or ""
            if "invoke" in content or "function_call" in content:
                from app.services.llm_service import _parse_dsml_tool_invocations
                dsml = _parse_dsml_tool_invocations(content)
                if dsml:
                    return [
                        {
                            "id": f"dsml-{i}",
                            "type": "function",
                            "function": {
                                "name": inv["name"],
                                "arguments": json.dumps(inv["arguments"], ensure_ascii=False),
                            },
                        }
                        for i, inv in enumerate(dsml)
                    ]
        return []

    @staticmethod
    def _extract_content(response: Any) -> str:
        """从 LLM 响应中提取文本内容。"""
        if response is None:
            return ""
        if hasattr(response, "content"):
            return response.content or ""
        if isinstance(response, dict):
            return response.get("content") or ""
        return str(response)

    @staticmethod
    def _is_retryable(err_str: str) -> bool:
        """判断错误是否可重试（使用 ErrorClassifier）。"""
        try:
            error_type, _ = _error_classifier.classify(Exception(err_str))
            return error_type == ErrorType.RETRYABLE
        except Exception:
            err_lower = err_str.lower()
            return any(kw in err_lower for kw in (
                "timed out", "timeout", "connection error",
                "rate limit", "too many requests", "internal server error",
                "service unavailable", "temporarily unavailable",
            ))


# LLM 缓存辅助
def _llm_cache_key(messages: list, model: str) -> str:
    import hashlib
    raw = json.dumps({"msgs": messages[-4:], "model": model}, sort_keys=True, ensure_ascii=False)
    return f"llm:{model}:{hashlib.sha256(raw.encode()).hexdigest()[:16]}"

async def _llm_cache_get(key: str) -> Optional[str]:
    try:
        from app.core.redis_client import get_redis_client
        redis = get_redis_client()
        if redis:
            return await redis.get(key)
    except Exception:
        pass
    return None

async def _llm_cache_set(key: str, value: str, ttl_ms: int):
    try:
        from app.core.redis_client import get_redis_client
        redis = get_redis_client()
        if redis:
            await redis.setex(key, max(1, int(ttl_ms / 1000)), value)
    except Exception:
        pass


class _LLMClient:
    """轻量 LLM 客户端包装，复用已有 LLMService 能力。"""

    def __init__(self, config: Any):
        from app.services.llm_service import llm_service
        self._service = llm_service
        self._config = config

    async def chat(
        self,
        messages: List[Dict[str, Any]],
        tools: Optional[List[Dict[str, Any]]] = None,
        iteration: int = 1,
        on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
    ) -> Any:
        """调用 LLM，主模型失败时自动切换 fallback_llm 重试。"""
        from openai import AsyncOpenAI
        from app.core.config import settings

        # 优先从配置读取，其次从 settings（.env 加载），最后 os.environ
        api_key = self._config.api_key or settings.OPENAI_API_KEY or ""
        base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""

        if not api_key or api_key == "your-openai-api-key":
            api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
            base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"

        if not api_key:
            raise ValueError("未配置 API Key")

        return await self._do_chat(
            api_key=api_key, base_url=base_url, model=self._config.model,
            messages=messages, tools=tools, iteration=iteration,
            on_completion=on_completion,
        )

    async def _do_chat(
        self,
        api_key: str,
        base_url: str,
        model: str,
        messages: List[Dict[str, Any]],
        tools: Optional[List[Dict[str, Any]]] = None,
        iteration: int = 1,
        on_completion: Optional[Callable[[Dict[str, Any]], Any]] = None,
        _is_fallback: bool = False,
    ) -> Any:
        from openai import AsyncOpenAI
        from app.core.config import settings

        client = AsyncOpenAI(api_key=api_key, base_url=base_url)

        kwargs: Dict[str, Any] = {
            "model": model,
            "messages": messages,
            "temperature": self._config.temperature,
            "timeout": self._config.request_timeout,
        }
        if self._config.max_tokens:
            kwargs["max_tokens"] = self._config.max_tokens
        if self._config.extra_body:
            kwargs["extra_body"] = self._config.extra_body
        if tools:
            # Normalize tool schemas to OpenAI format: custom tools from the
            # marketplace may be stored as {"name":..., "parameters":...}
            # or {"function":{...}} without the required "type": "function".
            normalized = []
            for t in tools:
                if isinstance(t, dict):
                    if t.get("type") == "function":
                        # Already in correct format: {"type":"function","function":{...}}
                        normalized.append(t)
                    elif "function" in t:
                        # Has function key but missing type: {"function":{...}}
                        normalized.append({"type": "function", "function": t["function"]})
                    else:
                        # Raw schema: {"name":..., "parameters":...}
                        normalized.append({"type": "function", "function": t})
                else:
                    normalized.append(t)
            kwargs["tools"] = normalized
            kwargs["tool_choice"] = "auto"

        # LLM 响应缓存（仅不用工具时缓存，避免复杂序列化）
        if self._config.cache_enabled and not tools and not _is_fallback:
            cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
            cached = await _llm_cache_get(cache_key)
            if cached is not None:
                logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
                class _CachedMsg:
                    content = cached
                    tool_calls = None
                return _CachedMsg()

        start_time = time.perf_counter()
        last_error = None
        try:
            response = await client.chat.completions.create(**kwargs)
        except Exception as e:
            last_error = e

            # Reactive Compact: 上下文超限时压缩后重试 (Tier 3)
            if (
                self.compaction_engine
                and is_context_length_error(e)
                and self.compaction_engine.config.reactive_compact_enabled
            ):
                logger.warning("检测到上下文超限，触发 ReactiveCompact: %s", str(e)[:100])
                try:
                    compact_result = await self.compaction_engine.reactive_compact(
                        messages, e, self._config.context_window,
                    )
                    if compact_result.strategy != CompactionStrategy.NONE:
                        logger.info(
                            "ReactiveCompact 完成: saved=%d tokens, 重试中...",
                            compact_result.tokens_saved,
                        )
                        return await self._do_chat(
                            api_key=api_key, base_url=base_url,
                            model=model,
                            messages=compact_result.messages,
                            tools=tools,
                            iteration=iteration,
                            on_completion=on_completion,
                            _is_fallback=_is_fallback,
                        )
                except Exception as ce:
                    logger.error("ReactiveCompact 失败: %s", ce)

            # 降级回退：主模型失败时尝试 fallback_llm
            fallback = self._config.fallback_llm
            if fallback and isinstance(fallback, dict) and not _is_fallback:
                fb_model = fallback.get("model")
                fb_api_key = fallback.get("api_key")
                fb_base_url = fallback.get("base_url")
                if fb_model and (fb_api_key or fb_base_url):
                    logger.warning(
                        "主模型 %s 调用失败，降级到 %s: %s",
                        model, fb_model, str(e)[:200],
                    )
                    # 先报告主模型失败
                    latency_ms = int((time.perf_counter() - start_time) * 1000)
                    if on_completion:
                        on_completion({
                            "model": model, "provider": self._config.provider,
                            "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0,
                            "latency_ms": latency_ms, "iteration_number": iteration,
                            "status": "fallback", "error_message": str(e),
                        })
                    return await self._do_chat(
                        api_key=fb_api_key or api_key,
                        base_url=fb_base_url or base_url,
                        model=fb_model,
                        messages=messages, tools=tools,
                        iteration=iteration, on_completion=on_completion,
                        _is_fallback=True,
                    )
            raise

        latency_ms = int((time.perf_counter() - start_time) * 1000)
        message = response.choices[0].message

        # 缓存写入（仅不用工具时）
        if self._config.cache_enabled and not tools and message.content:
            ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
            await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)

        # 提取 token 用量
        usage = getattr(response, "usage", None)
        prompt_tokens = usage.prompt_tokens if usage else 0
        completion_tokens = usage.completion_tokens if usage else 0
        total_tokens = usage.total_tokens if usage else 0

        # 调用完成回调
        if on_completion:
            on_completion({
                "model": model,
                "provider": self._config.provider,
                "prompt_tokens": prompt_tokens or 0,
                "completion_tokens": completion_tokens or 0,
                "total_tokens": total_tokens or 0,
                "latency_ms": latency_ms,
                "iteration_number": iteration,
                "status": "success",
            })

        return message