backend/app/agent_runtime/core.py

"""
Agent Runtime 核心 —— 自主 ReAct 循环。

流程：
1. 接收用户输入 → 追加到消息列表
2. 调用 LLM（携带 tools schema）
3. 如果 LLM 返回工具调用 → 执行工具 → 结果追加到消息列表 → 回到 2
4. 如果 LLM 返回文本 → 作为最终回答返回
5. 超过 max_iterations → 强制终止
"""
from __future__ import annotations

import json
import logging
from typing import Any, Callable, Dict, List, Optional

from app.agent_runtime.schemas import (
    AgentConfig,
    AgentResult,
)
from app.agent_runtime.context import AgentContext
from app.agent_runtime.memory import AgentMemory
from app.agent_runtime.tool_manager import AgentToolManager

logger = logging.getLogger(__name__)

# 可重试的 API 异常
_RETRYABLE_ERRORS = (
    "timed out",
    "timeout",
    "connection error",
    "temporarily unavailable",
    "server disconnected",
    "rate limit",
    "too many requests",
    "internal server error",
    "service unavailable",
)


class AgentRuntime:
    """
    自主 Agent 运行时。

    用法：
        runtime = AgentRuntime(config)
        result = await runtime.run("帮我写个Python脚本")
    """

    def __init__(
        self,
        config: Optional[AgentConfig] = None,
        context: Optional[AgentContext] = None,
        memory: Optional[AgentMemory] = None,
        tool_manager: Optional[AgentToolManager] = None,
        execution_logger: Optional[Any] = None,
        on_tool_executed: Optional[Callable[[str], Any]] = None,
    ):
        self.config = config or AgentConfig()
        self.context = context or AgentContext(
            system_prompt=self.config.system_prompt,
            user_id=self.config.user_id,
        )
        self.memory = memory or AgentMemory(
            scope_id=self.config.user_id or self.config.name,
            max_history=self.config.memory.max_history_messages,
            persist=self.config.memory.persist_to_db,
        )
        self.tool_manager = tool_manager or AgentToolManager(
            include_tools=self.config.tools.include_tools,
            exclude_tools=self.config.tools.exclude_tools,
        )
        self.execution_logger = execution_logger
        self.on_tool_executed = on_tool_executed
        self._memory_context_loaded = False

    async def run(self, user_input: str) -> AgentResult:
        """
        执行 Agent 单轮对话。

        流程：加载记忆 → 追加用户消息 → ReAct 循环 → 保存记忆 → 返回结果。
        """
        max_iter = max(1, self.config.llm.max_iterations)
        self.context.iteration = 0
        self.context.tool_calls_made = 0

        # 1. 首次运行时加载长期记忆到 system prompt
        if not self._memory_context_loaded:
            await self._inject_memory_context()
            self._memory_context_loaded = True

        # 2. 追加用户消息
        self.context.add_user_message(user_input)

        # 3. ReAct 循环
        llm = _LLMClient(self.config.llm)
        tool_schemas = self.tool_manager.get_tool_schemas()
        has_tools = self.tool_manager.has_tools()

        while self.context.iteration < max_iter:
            self.context.iteration += 1

            # 裁剪过长历史
            messages = self.memory.trim_messages(self.context.messages)

            # 调用 LLM
            try:
                response = await llm.chat(
                    messages=messages,
                    tools=tool_schemas if has_tools and self.context.iteration == 1 else
                               (tool_schemas if has_tools else None),
                    iteration=self.context.iteration,
                )
            except Exception as e:
                err_str = str(e)
                logger.error("LLM 调用失败 (iteration=%s): %s", self.context.iteration, err_str)
                if self.context.iteration < max_iter and self._is_retryable(err_str):
                    continue
                return AgentResult(
                    success=False,
                    content=f"LLM 调用失败: {err_str}",
                    iterations_used=self.context.iteration,
                    tool_calls_made=self.context.tool_calls_made,
                    error=err_str,
                )

            # 解析工具调用
            tool_calls = self._extract_tool_calls(response)
            content = self._extract_content(response)

            if not tool_calls:
                # LLM 直接返回文本 → 结束
                self.context.add_assistant_message(content)
                final_text = content or "（模型未返回有效内容）"
                # 保存记忆
                await self.memory.save_context(user_input, final_text)
                return AgentResult(
                    success=True,
                    content=final_text,
                    iterations_used=self.context.iteration,
                    tool_calls_made=self.context.tool_calls_made,
                )

            # 有工具调用 → 先记录 assistant 消息（含 tool_calls + reasoning_content）
            reasoning = getattr(response, "reasoning_content", None) or (
                response.get("reasoning_content") if isinstance(response, dict) else None
            )
            self.context.add_assistant_message(content or "", tool_calls, reasoning)
            if self.execution_logger:
                self.execution_logger.info(
                    f"Agent 调用 {len(tool_calls)} 个工具",
                    data={"tool_calls": [tc["function"]["name"] for tc in tool_calls],
                          "iteration": self.context.iteration},
                )

            # 逐一执行工具
            for tc in tool_calls:
                tfn = tc.get("function", {})
                tname = tfn.get("name", "unknown")
                tcid = tc.get("id", f"call_{self.context.iteration}_{self.context.tool_calls_made}")

                try:
                    targs = json.loads(tfn.get("arguments", "{}"))
                except (json.JSONDecodeError, TypeError):
                    targs = {}

                logger.info("Agent 执行工具 [%s]: %s", tname, targs)
                result = await self.tool_manager.execute(tname, targs)

                self.context.add_tool_result(tcid, tname, result)
                self.context.tool_calls_made += 1

                if self.on_tool_executed:
                    try:
                        await self.on_tool_executed(tname)
                    except Exception:
                        pass

                if self.execution_logger:
                    preview = result[:300] + "..." if len(result) > 300 else result
                    self.execution_logger.info(
                        f"工具 {tname} 执行完成",
                        data={"tool_name": tname, "result_preview": preview},
                    )

        # 达到最大迭代次数
        last_content = ""
        for m in reversed(self.context.messages):
            if m.get("role") == "assistant" and m.get("content"):
                last_content = m["content"]
                break

        logger.warning("Agent 达到最大迭代次数 (%s)", max_iter)
        await self.memory.save_context(user_input, last_content or "（已达最大迭代次数）")
        return AgentResult(
            success=True,
            content=last_content or "已达最大迭代次数，但模型未返回最终回答。",
            truncated=True,
            iterations_used=self.context.iteration,
            tool_calls_made=self.context.tool_calls_made,
        )

    async def _inject_memory_context(self) -> None:
        """加载长期记忆并注入 system prompt。"""
        mem_text = await self.memory.initialize()
        if mem_text:
            enriched = (
                self.config.system_prompt.rstrip("\n")
                + "\n\n"
                + mem_text
            )
            self.context.set_system_prompt(enriched)
            logger.info("Agent 已注入长期记忆上下文")

    @staticmethod
    def _extract_tool_calls(response: Any) -> List[Dict[str, Any]]:
        """从 LLM 响应中提取工具调用列表。"""
        if response is None:
            return []
        # OpenAI SDK 格式
        if hasattr(response, "tool_calls") and response.tool_calls:
            result = []
            for tc in response.tool_calls:
                result.append({
                    "id": tc.id,
                    "type": tc.type,
                    "function": {
                        "name": tc.function.name,
                        "arguments": tc.function.arguments,
                    },
                })
            return result
        # 字典格式
        if isinstance(response, dict):
            tc_list = response.get("tool_calls") or []
            if tc_list:
                return tc_list
            # 检查 content 中是否嵌入了 DSML
            content = response.get("content") or ""
            if "invoke" in content or "function_call" in content:
                from app.services.llm_service import _parse_dsml_tool_invocations
                dsml = _parse_dsml_tool_invocations(content)
                if dsml:
                    return [
                        {
                            "id": f"dsml-{i}",
                            "type": "function",
                            "function": {
                                "name": inv["name"],
                                "arguments": json.dumps(inv["arguments"], ensure_ascii=False),
                            },
                        }
                        for i, inv in enumerate(dsml)
                    ]
        return []

    @staticmethod
    def _extract_content(response: Any) -> str:
        """从 LLM 响应中提取文本内容。"""
        if response is None:
            return ""
        if hasattr(response, "content"):
            return response.content or ""
        if isinstance(response, dict):
            return response.get("content") or ""
        return str(response)

    @staticmethod
    def _is_retryable(err_str: str) -> bool:
        """判断错误是否可重试。"""
        err_lower = err_str.lower()
        return any(kw in err_lower for kw in _RETRYABLE_ERRORS)


class _LLMClient:
    """轻量 LLM 客户端包装，复用已有 LLMService 能力。"""

    def __init__(self, config: Any):
        from app.services.llm_service import llm_service
        self._service = llm_service
        self._config = config

    async def chat(
        self,
        messages: List[Dict[str, Any]],
        tools: Optional[List[Dict[str, Any]]] = None,
        iteration: int = 1,
    ) -> Any:
        """
        调用 LLM。
        优先使用 llm_service.call_openai_with_tools（支持 ReAct 的多次工具调用）。

        但为避免外层 ReAct 与内部 ReAct 冲突：
        - 第 1 轮：使用标准 chat（无内部 ReAct），由外层 AgentRuntime 控制循环
        - 后续轮次：也使用标准 chat，仅追加工具结果
        """
        # 直接用 OpenAI/DeepSeek SDK 调用，由 AgentRuntime 控制循环
        from openai import AsyncOpenAI
        from app.core.config import settings

        # 优先从配置读取，其次从 settings（.env 加载），最后 os.environ
        api_key = self._config.api_key or settings.OPENAI_API_KEY or ""
        base_url = self._config.base_url or settings.OPENAI_BASE_URL or ""

        if not api_key or api_key == "your-openai-api-key":
            # 尝试 DeepSeek
            api_key = self._config.api_key or settings.DEEPSEEK_API_KEY or ""
            base_url = self._config.base_url or settings.DEEPSEEK_BASE_URL or "https://api.deepseek.com"

        if not api_key:
            raise ValueError("未配置 API Key")

        client = AsyncOpenAI(api_key=api_key, base_url=base_url)

        kwargs: Dict[str, Any] = {
            "model": self._config.model,
            "messages": messages,
            "temperature": self._config.temperature,
            "timeout": self._config.request_timeout,
        }
        if self._config.max_tokens:
            kwargs["max_tokens"] = self._config.max_tokens
        if self._config.extra_body:
            kwargs["extra_body"] = self._config.extra_body
        if tools:
            kwargs["tools"] = tools
            kwargs["tool_choice"] = "auto"

        response = await client.chat.completions.create(**kwargs)
        return response.choices[0].message