aiagent/backend/app/api/agent_chat.py

"""
Agent 独立聊天 API — 不依赖工作流 DAG，直接与 Agent Runtime 对话。

POST /api/v1/agent-chat/bare
    {"message": "你好，帮我..."}
    → {"content": "...", "iterations": 3, "tool_calls": 5}
"""
from __future__ import annotations

import logging
import json
from typing import Any, AsyncGenerator, Dict, List, Optional
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field

from app.core.database import get_db
from sqlalchemy.orm import Session
from app.api.auth import get_current_user
from app.models.user import User
from app.models.agent import Agent
from app.models.agent_llm_log import AgentLLMLog
from app.agent_runtime import (
    AgentRuntime,
    AgentConfig,
    AgentLLMConfig,
    AgentToolConfig,
    AgentBudgetConfig,
    AgentMemoryConfig,
    AgentStep,
    AgentOrchestrator,
    OrchestratorAgentConfig,
)
from app.core.config import settings

logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/agent-chat", tags=["agent-chat"])


def _make_llm_logger(
    db: Session,
    agent_id: Optional[str] = None,
    user_id: Optional[str] = None,
):
    """创建 LLM 调用日志回调，写入 AgentLLMLog 表。"""
    def _log(metrics: dict):
        try:
            log = AgentLLMLog(
                agent_id=agent_id,
                session_id=metrics.get("session_id"),
                user_id=user_id,
                model=metrics.get("model", ""),
                provider=metrics.get("provider"),
                prompt_tokens=metrics.get("prompt_tokens", 0),
                completion_tokens=metrics.get("completion_tokens", 0),
                total_tokens=metrics.get("total_tokens", 0),
                latency_ms=metrics.get("latency_ms", 0),
                iteration_number=metrics.get("iteration_number", 0),
                step_type=metrics.get("step_type"),
                tool_name=metrics.get("tool_name"),
                status=metrics.get("status", "success"),
                error_message=metrics.get("error_message"),
            )
            db.add(log)
            db.commit()
        except Exception as e:
            logger.warning("写入 AgentLLMLog 失败: %s", e)
    return _log


async def _sse_stream(gen: AsyncGenerator[dict, None]) -> AsyncGenerator[str, None]:
    """将 run_stream 生成的 dict 事件格式化为 SSE 文本流。"""
    async for event in gen:
        event_type = event.get("type", "message")
        data = {k: v for k, v in event.items() if k != "type"}
        yield f"event: {event_type}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"


class ChatRequest(BaseModel):
    message: str
    session_id: Optional[str] = None
    model: Optional[str] = None
    temperature: Optional[float] = None
    max_iterations: Optional[int] = None


class ChatResponse(BaseModel):
    content: str
    iterations_used: int
    tool_calls_made: int
    truncated: bool
    session_id: str
    agent_id: Optional[str] = None
    steps: List[AgentStep] = Field(default_factory=list, description="执行追踪步骤")


class OrchestrateAgentItem(BaseModel):
    """编排中单个 Agent 的定义"""
    id: str
    name: str = "Agent"
    system_prompt: str = "你是一个有用的AI助手。"
    model: str = "deepseek-v4-flash"
    provider: str = "deepseek"
    temperature: float = 0.7
    max_iterations: int = 10
    tools: List[str] = Field(default_factory=list)
    description: str = ""


class OrchestrateRequest(BaseModel):
    """多 Agent 编排请求"""
    message: str
    mode: str = "debate"
    agents: List[OrchestrateAgentItem] = Field(..., min_length=1)
    model: Optional[str] = None


class OrchestrateStepItem(BaseModel):
    """编排步骤"""
    agent_id: str
    agent_name: str
    input: str = ""
    output: str = ""
    iterations_used: int = 0
    tool_calls_made: int = 0
    error: Optional[str] = None


class OrchestrateResponse(BaseModel):
    """多 Agent 编排响应"""
    mode: str
    final_answer: str
    steps: List[OrchestrateStepItem] = Field(default_factory=list)
    agent_results: List[Dict[str, Any]] = Field(default_factory=list)


@router.post("/orchestrate", response_model=OrchestrateResponse)
async def orchestrate_agents(
    req: OrchestrateRequest,
    current_user: User = Depends(get_current_user),
    db: Session = Depends(get_db),
):
    """多 Agent 编排：支持 route / sequential / debate 三种模式。"""
    agents = [
        OrchestratorAgentConfig(
            id=a.id, name=a.name,
            system_prompt=a.system_prompt,
            model=req.model or a.model,
            provider=a.provider,
            temperature=a.temperature,
            max_iterations=a.max_iterations,
            tools=a.tools,
            description=a.description,
        )
        for a in req.agents
    ]

    on_llm_call = _make_llm_logger(db, agent_id=None, user_id=current_user.id)
    orchestrator = AgentOrchestrator(
        default_llm_config=AgentLLMConfig(
            model=req.model or "deepseek-v4-flash",
            temperature=0.3,
        ),
    )
    result = await orchestrator.run(req.mode, req.message, agents, on_llm_call=on_llm_call)

    return OrchestrateResponse(
        mode=result.mode,
        final_answer=result.final_answer,
        steps=[
            OrchestrateStepItem(
                agent_id=s.agent_id,
                agent_name=s.agent_name,
                input=s.input,
                output=s.output,
                iterations_used=s.iterations_used,
                tool_calls_made=s.tool_calls_made,
                error=s.error,
            )
            for s in result.steps
        ],
        agent_results=result.agent_results,
    )


@router.post("/bare", response_model=ChatResponse)
async def chat_bare(
    req: ChatRequest,
    current_user: User = Depends(get_current_user),
    db: Session = Depends(get_db),
):
    """无需 Agent 配置，使用默认设置直接对话。"""
    uid = current_user.id
    bare_scope = f"{uid}:__bare__" if uid else "__bare__"
    config = AgentConfig(
        name="bare_agent",
        system_prompt="你是一个有用的AI助手。请使用可用工具来帮助用户完成任务。",
        llm=AgentLLMConfig(
            model=req.model or (
                "gpt-4o-mini" if settings.OPENAI_API_KEY and settings.OPENAI_API_KEY != "your-openai-api-key"
                else "deepseek-v4-flash"
            ),
            temperature=req.temperature or 0.7,
            max_iterations=req.max_iterations or 10,
        ),
        user_id=uid,
        memory_scope_id=bare_scope,
    )
    on_llm_call = _make_llm_logger(db, agent_id=None, user_id=current_user.id)
    runtime = AgentRuntime(config=config, on_llm_call=on_llm_call)
    result = await runtime.run(req.message)

    return ChatResponse(
        content=result.content,
        iterations_used=result.iterations_used,
        tool_calls_made=result.tool_calls_made,
        truncated=result.truncated,
        session_id=runtime.context.session_id,
        steps=result.steps,
    )


@router.post("/bare/stream")
async def chat_bare_stream(
    req: ChatRequest,
    current_user: User = Depends(get_current_user),
    db: Session = Depends(get_db),
):
    """无需 Agent 配置，使用默认设置直接对话（流式 SSE）。"""
    uid = current_user.id
    bare_scope = f"{uid}:__bare__" if uid else "__bare__"
    config = AgentConfig(
        name="bare_agent",
        system_prompt="你是一个有用的AI助手。请使用可用工具来帮助用户完成任务。",
        llm=AgentLLMConfig(
            model=req.model or (
                "gpt-4o-mini" if settings.OPENAI_API_KEY and settings.OPENAI_API_KEY != "your-openai-api-key"
                else "deepseek-v4-flash"
            ),
            temperature=req.temperature or 0.7,
            max_iterations=req.max_iterations or 10,
        ),
        user_id=uid,
        memory_scope_id=bare_scope,
    )
    on_llm_call = _make_llm_logger(db, agent_id=None, user_id=current_user.id)
    runtime = AgentRuntime(config=config, on_llm_call=on_llm_call)
    return StreamingResponse(
        _sse_stream(runtime.run_stream(req.message)),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
        },
    )


@router.post("/{agent_id}", response_model=ChatResponse)
async def chat_with_agent(
    agent_id: str,
    req: ChatRequest,
    current_user: User = Depends(get_current_user),
    db: Session = Depends(get_db),
):
    """与指定的 Agent 对话。Agent 的工作流配置会用于构建 Runtime。"""
    agent = db.query(Agent).filter(Agent.id == agent_id).first()
    if not agent:
        raise HTTPException(status_code=404, detail="Agent 不存在")
    if agent.user_id and agent.user_id != current_user.id and current_user.role != "admin":
        raise HTTPException(status_code=403, detail="无权访问该 Agent")

    # 从 Agent 配置构建 Runtime
    wc = agent.workflow_config or {}
    nodes = wc.get("nodes", [])
    # 查找 agent 节点的配置（或第一个 llm 节点的配置）
    agent_node_cfg = _find_agent_node_config(nodes)

    # 构建 system prompt，并自动注入智能体名称
    system_prompt = agent_node_cfg.get("system_prompt") or agent.description or "你是一个有用的AI助手。"
    if agent.name:
        name_prefix = f"你的名字是{agent.name}"
        if name_prefix not in system_prompt:
            system_prompt = f"{name_prefix}。\n\n{system_prompt}"

    # 合并执行预算：Agent.budget_config 覆盖默认值
    budget = AgentBudgetConfig()
    if agent.budget_config and isinstance(agent.budget_config, dict):
        bc = agent.budget_config
        if "max_llm_invocations" in bc and bc["max_llm_invocations"] is not None:
            budget.max_llm_invocations = max(1, int(bc["max_llm_invocations"]))
        if "max_tool_calls" in bc and bc["max_tool_calls"] is not None:
            budget.max_tool_calls = max(1, int(bc["max_tool_calls"]))

    uid = current_user.id
    mem_scope = f"{uid}:{agent_id}" if uid else str(agent_id)
    memory_cfg = _build_memory_config_from_node(agent_node_cfg)
    config = AgentConfig(
        name=agent.name,
        system_prompt=system_prompt,
        llm=AgentLLMConfig(
            provider=agent_node_cfg.get("provider", "openai"),
            model=req.model or agent_node_cfg.get("model", "gpt-4o-mini"),
            temperature=req.temperature or float(agent_node_cfg.get("temperature", 0.7)),
            max_iterations=req.max_iterations or int(agent_node_cfg.get("max_iterations", 10)),
        ),
        tools=AgentToolConfig(
            include_tools=agent_node_cfg.get("tools", []),
            exclude_tools=agent_node_cfg.get("exclude_tools", []),
        ),
        memory=memory_cfg,
        budget=budget,
        user_id=uid,
        memory_scope_id=mem_scope,
    )

    on_llm_call = _make_llm_logger(db, agent_id=agent_id, user_id=current_user.id)
    runtime = AgentRuntime(config=config, on_llm_call=on_llm_call)
    result = await runtime.run(req.message)

    return ChatResponse(
        content=result.content,
        iterations_used=result.iterations_used,
        tool_calls_made=result.tool_calls_made,
        truncated=result.truncated,
        session_id=runtime.context.session_id,
        agent_id=agent_id,
        steps=result.steps,
    )


@router.post("/{agent_id}/stream")
async def chat_with_agent_stream(
    agent_id: str,
    req: ChatRequest,
    current_user: User = Depends(get_current_user),
    db: Session = Depends(get_db),
):
    """与指定的 Agent 对话（流式 SSE）。"""
    agent = db.query(Agent).filter(Agent.id == agent_id).first()
    if not agent:
        raise HTTPException(status_code=404, detail="Agent 不存在")
    if agent.user_id and agent.user_id != current_user.id and current_user.role != "admin":
        raise HTTPException(status_code=403, detail="无权访问该 Agent")

    wc = agent.workflow_config or {}
    nodes = wc.get("nodes", [])
    agent_node_cfg = _find_agent_node_config(nodes)

    system_prompt = agent_node_cfg.get("system_prompt") or agent.description or "你是一个有用的AI助手。"
    if agent.name:
        name_prefix = f"你的名字是{agent.name}"
        if name_prefix not in system_prompt:
            system_prompt = f"{name_prefix}。\n\n{system_prompt}"

    budget = AgentBudgetConfig()
    if agent.budget_config and isinstance(agent.budget_config, dict):
        bc = agent.budget_config
        if "max_llm_invocations" in bc and bc["max_llm_invocations"] is not None:
            budget.max_llm_invocations = max(1, int(bc["max_llm_invocations"]))
        if "max_tool_calls" in bc and bc["max_tool_calls"] is not None:
            budget.max_tool_calls = max(1, int(bc["max_tool_calls"]))

    uid = current_user.id
    mem_scope = f"{uid}:{agent_id}" if uid else str(agent_id)
    memory_cfg = _build_memory_config_from_node(agent_node_cfg)
    config = AgentConfig(
        name=agent.name,
        system_prompt=system_prompt,
        llm=AgentLLMConfig(
            provider=agent_node_cfg.get("provider", "openai"),
            model=req.model or agent_node_cfg.get("model", "gpt-4o-mini"),
            temperature=req.temperature or float(agent_node_cfg.get("temperature", 0.7)),
            max_iterations=req.max_iterations or int(agent_node_cfg.get("max_iterations", 10)),
        ),
        tools=AgentToolConfig(
            include_tools=agent_node_cfg.get("tools", []),
            exclude_tools=agent_node_cfg.get("exclude_tools", []),
        ),
        memory=memory_cfg,
        budget=budget,
        user_id=uid,
        memory_scope_id=mem_scope,
    )

    on_llm_call = _make_llm_logger(db, agent_id=agent_id, user_id=current_user.id)
    runtime = AgentRuntime(config=config, on_llm_call=on_llm_call)
    return StreamingResponse(
        _sse_stream(runtime.run_stream(req.message)),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
        },
    )


def _find_agent_node_config(nodes: list) -> Dict[str, Any]:
    """从工作流节点列表中查找第一个 agent 类型或 llm 类型的节点配置。"""
    if not nodes:
        return {}
    for node in nodes:
        typ = node.get("type", "")
        if typ in ("agent", "llm", "template"):
            return node.get("data") or {}
    return {}


def _build_memory_config_from_node(agent_node_cfg: dict) -> AgentMemoryConfig:
    """从 Agent 工作流节点配置中提取记忆配置。"""
    return AgentMemoryConfig(
        max_history_messages=int(agent_node_cfg.get("memory_max_history", 20)),
        vector_memory_top_k=int(agent_node_cfg.get("memory_vector_top_k", 5)),
        persist_to_db=bool(agent_node_cfg.get("memory_persist", True)),
        vector_memory_enabled=bool(agent_node_cfg.get("memory_vector_enabled", True)),
        learning_enabled=bool(agent_node_cfg.get("memory_learning", True)),
    )