""" Agent 独立聊天 API — 不依赖工作流 DAG,直接与 Agent Runtime 对话。 POST /api/v1/agent-chat/bare {"message": "你好,帮我..."} → {"content": "...", "iterations": 3, "tool_calls": 5} """ from __future__ import annotations import logging import json from typing import Any, AsyncGenerator, Dict, List, Optional from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field from app.core.database import get_db from sqlalchemy.orm import Session from app.api.auth import get_current_user from app.models.user import User from app.models.agent import Agent from app.models.agent_llm_log import AgentLLMLog from app.agent_runtime import ( AgentRuntime, AgentConfig, AgentLLMConfig, AgentToolConfig, AgentBudgetConfig, AgentMemoryConfig, AgentStep, AgentOrchestrator, OrchestratorAgentConfig, ) from app.core.config import settings logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/v1/agent-chat", tags=["agent-chat"]) def _make_llm_logger( db: Session, agent_id: Optional[str] = None, user_id: Optional[str] = None, ): """创建 LLM 调用日志回调,写入 AgentLLMLog 表。""" def _log(metrics: dict): try: log = AgentLLMLog( agent_id=agent_id, session_id=metrics.get("session_id"), user_id=user_id, model=metrics.get("model", ""), provider=metrics.get("provider"), prompt_tokens=metrics.get("prompt_tokens", 0), completion_tokens=metrics.get("completion_tokens", 0), total_tokens=metrics.get("total_tokens", 0), latency_ms=metrics.get("latency_ms", 0), iteration_number=metrics.get("iteration_number", 0), step_type=metrics.get("step_type"), tool_name=metrics.get("tool_name"), status=metrics.get("status", "success"), error_message=metrics.get("error_message"), ) db.add(log) db.commit() except Exception as e: logger.warning("写入 AgentLLMLog 失败: %s", e) return _log async def _sse_stream(gen: AsyncGenerator[dict, None]) -> AsyncGenerator[str, None]: """将 run_stream 生成的 dict 事件格式化为 SSE 文本流。""" async for event in gen: event_type = event.get("type", "message") data = {k: v for k, v in event.items() if k != "type"} yield f"event: {event_type}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n" class ChatRequest(BaseModel): message: str session_id: Optional[str] = None model: Optional[str] = None temperature: Optional[float] = None max_iterations: Optional[int] = None class ChatResponse(BaseModel): content: str iterations_used: int tool_calls_made: int truncated: bool session_id: str agent_id: Optional[str] = None steps: List[AgentStep] = Field(default_factory=list, description="执行追踪步骤") class OrchestrateAgentItem(BaseModel): """编排中单个 Agent 的定义""" id: str name: str = "Agent" system_prompt: str = "你是一个有用的AI助手。" model: str = "deepseek-v4-flash" provider: str = "deepseek" temperature: float = 0.7 max_iterations: int = 10 tools: List[str] = Field(default_factory=list) description: str = "" class OrchestrateRequest(BaseModel): """多 Agent 编排请求""" message: str mode: str = "debate" agents: List[OrchestrateAgentItem] = Field(..., min_length=1) model: Optional[str] = None class OrchestrateStepItem(BaseModel): """编排步骤""" agent_id: str agent_name: str input: str = "" output: str = "" iterations_used: int = 0 tool_calls_made: int = 0 error: Optional[str] = None class OrchestrateResponse(BaseModel): """多 Agent 编排响应""" mode: str final_answer: str steps: List[OrchestrateStepItem] = Field(default_factory=list) agent_results: List[Dict[str, Any]] = Field(default_factory=list) @router.post("/orchestrate", response_model=OrchestrateResponse) async def orchestrate_agents( req: OrchestrateRequest, current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): """多 Agent 编排:支持 route / sequential / debate 三种模式。""" agents = [ OrchestratorAgentConfig( id=a.id, name=a.name, system_prompt=a.system_prompt, model=req.model or a.model, provider=a.provider, temperature=a.temperature, max_iterations=a.max_iterations, tools=a.tools, description=a.description, ) for a in req.agents ] on_llm_call = _make_llm_logger(db, agent_id=None, user_id=current_user.id) orchestrator = AgentOrchestrator( default_llm_config=AgentLLMConfig( model=req.model or "deepseek-v4-flash", temperature=0.3, ), ) result = await orchestrator.run(req.mode, req.message, agents, on_llm_call=on_llm_call) return OrchestrateResponse( mode=result.mode, final_answer=result.final_answer, steps=[ OrchestrateStepItem( agent_id=s.agent_id, agent_name=s.agent_name, input=s.input, output=s.output, iterations_used=s.iterations_used, tool_calls_made=s.tool_calls_made, error=s.error, ) for s in result.steps ], agent_results=result.agent_results, ) class GraphOrchestrateRequest(BaseModel): """图编排请求 — 以 nodes + edges 描述 DAG""" message: str nodes: List[Dict[str, Any]] = Field(..., description="编排节点列表") edges: List[Dict[str, Any]] = Field(default_factory=list, description="编排连线列表") model: Optional[str] = None @router.post("/orchestrate/graph", response_model=OrchestrateResponse) async def orchestrate_graph( req: GraphOrchestrateRequest, current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): """图编排模式:按 DAG 拓扑顺序执行 Agent 和条件节点。""" on_llm_call = _make_llm_logger(db, agent_id=None, user_id=current_user.id) orchestrator = AgentOrchestrator( default_llm_config=AgentLLMConfig( model=req.model or "deepseek-v4-flash", temperature=0.3, ), ) result = await orchestrator._graph( req.message, req.nodes, req.edges, on_llm_call=on_llm_call, ) return OrchestrateResponse( mode=result.mode, final_answer=result.final_answer, steps=[ OrchestrateStepItem( agent_id=s.agent_id, agent_name=s.agent_name, input=s.input, output=s.output, iterations_used=s.iterations_used, tool_calls_made=s.tool_calls_made, error=s.error, ) for s in result.steps ], agent_results=result.agent_results, ) @router.post("/bare", response_model=ChatResponse) async def chat_bare( req: ChatRequest, current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): """无需 Agent 配置,使用默认设置直接对话。""" uid = current_user.id bare_scope = f"{uid}:__bare__" if uid else "__bare__" config = AgentConfig( name="bare_agent", system_prompt="你是一个有用的AI助手。请使用可用工具来帮助用户完成任务。", llm=AgentLLMConfig( model=req.model or ( "gpt-4o-mini" if settings.OPENAI_API_KEY and settings.OPENAI_API_KEY != "your-openai-api-key" else "deepseek-v4-flash" ), temperature=req.temperature or 0.7, max_iterations=req.max_iterations or 10, ), user_id=uid, memory_scope_id=bare_scope, ) on_llm_call = _make_llm_logger(db, agent_id=None, user_id=current_user.id) runtime = AgentRuntime(config=config, on_llm_call=on_llm_call) result = await runtime.run(req.message) return ChatResponse( content=result.content, iterations_used=result.iterations_used, tool_calls_made=result.tool_calls_made, truncated=result.truncated, session_id=runtime.context.session_id, steps=result.steps, ) @router.post("/bare/stream") async def chat_bare_stream( req: ChatRequest, current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): """无需 Agent 配置,使用默认设置直接对话(流式 SSE)。""" uid = current_user.id bare_scope = f"{uid}:__bare__" if uid else "__bare__" config = AgentConfig( name="bare_agent", system_prompt="你是一个有用的AI助手。请使用可用工具来帮助用户完成任务。", llm=AgentLLMConfig( model=req.model or ( "gpt-4o-mini" if settings.OPENAI_API_KEY and settings.OPENAI_API_KEY != "your-openai-api-key" else "deepseek-v4-flash" ), temperature=req.temperature or 0.7, max_iterations=req.max_iterations or 10, ), user_id=uid, memory_scope_id=bare_scope, ) on_llm_call = _make_llm_logger(db, agent_id=None, user_id=current_user.id) runtime = AgentRuntime(config=config, on_llm_call=on_llm_call) return StreamingResponse( _sse_stream(runtime.run_stream(req.message)), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no", }, ) @router.post("/{agent_id}", response_model=ChatResponse) async def chat_with_agent( agent_id: str, req: ChatRequest, current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): """与指定的 Agent 对话。Agent 的工作流配置会用于构建 Runtime。""" agent = db.query(Agent).filter(Agent.id == agent_id).first() if not agent: raise HTTPException(status_code=404, detail="Agent 不存在") if agent.user_id and agent.user_id != current_user.id and current_user.role != "admin": raise HTTPException(status_code=403, detail="无权访问该 Agent") # 从 Agent 配置构建 Runtime wc = agent.workflow_config or {} nodes = wc.get("nodes", []) # 查找 agent 节点的配置(或第一个 llm 节点的配置) agent_node_cfg = _find_agent_node_config(nodes) # 构建 system prompt,并自动注入智能体名称 system_prompt = agent_node_cfg.get("system_prompt") or agent.description or "你是一个有用的AI助手。" if agent.name: name_prefix = f"你的名字是{agent.name}" if name_prefix not in system_prompt: system_prompt = f"{name_prefix}。\n\n{system_prompt}" # 合并执行预算:Agent.budget_config 覆盖默认值 budget = AgentBudgetConfig() if agent.budget_config and isinstance(agent.budget_config, dict): bc = agent.budget_config if "max_llm_invocations" in bc and bc["max_llm_invocations"] is not None: budget.max_llm_invocations = max(1, int(bc["max_llm_invocations"])) if "max_tool_calls" in bc and bc["max_tool_calls"] is not None: budget.max_tool_calls = max(1, int(bc["max_tool_calls"])) uid = current_user.id mem_scope = f"{uid}:{agent_id}" if uid else str(agent_id) memory_cfg = _build_memory_config_from_node(agent_node_cfg) config = AgentConfig( name=agent.name, system_prompt=system_prompt, llm=AgentLLMConfig( provider=agent_node_cfg.get("provider", "openai"), model=req.model or agent_node_cfg.get("model", "gpt-4o-mini"), temperature=req.temperature or float(agent_node_cfg.get("temperature", 0.7)), max_iterations=req.max_iterations or int(agent_node_cfg.get("max_iterations", 10)), ), tools=AgentToolConfig( include_tools=agent_node_cfg.get("tools", []), exclude_tools=agent_node_cfg.get("exclude_tools", []), ), memory=memory_cfg, budget=budget, user_id=uid, memory_scope_id=mem_scope, ) on_llm_call = _make_llm_logger(db, agent_id=agent_id, user_id=current_user.id) runtime = AgentRuntime(config=config, on_llm_call=on_llm_call) result = await runtime.run(req.message) return ChatResponse( content=result.content, iterations_used=result.iterations_used, tool_calls_made=result.tool_calls_made, truncated=result.truncated, session_id=runtime.context.session_id, agent_id=agent_id, steps=result.steps, ) @router.post("/{agent_id}/stream") async def chat_with_agent_stream( agent_id: str, req: ChatRequest, current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): """与指定的 Agent 对话(流式 SSE)。""" agent = db.query(Agent).filter(Agent.id == agent_id).first() if not agent: raise HTTPException(status_code=404, detail="Agent 不存在") if agent.user_id and agent.user_id != current_user.id and current_user.role != "admin": raise HTTPException(status_code=403, detail="无权访问该 Agent") wc = agent.workflow_config or {} nodes = wc.get("nodes", []) agent_node_cfg = _find_agent_node_config(nodes) system_prompt = agent_node_cfg.get("system_prompt") or agent.description or "你是一个有用的AI助手。" if agent.name: name_prefix = f"你的名字是{agent.name}" if name_prefix not in system_prompt: system_prompt = f"{name_prefix}。\n\n{system_prompt}" budget = AgentBudgetConfig() if agent.budget_config and isinstance(agent.budget_config, dict): bc = agent.budget_config if "max_llm_invocations" in bc and bc["max_llm_invocations"] is not None: budget.max_llm_invocations = max(1, int(bc["max_llm_invocations"])) if "max_tool_calls" in bc and bc["max_tool_calls"] is not None: budget.max_tool_calls = max(1, int(bc["max_tool_calls"])) uid = current_user.id mem_scope = f"{uid}:{agent_id}" if uid else str(agent_id) memory_cfg = _build_memory_config_from_node(agent_node_cfg) config = AgentConfig( name=agent.name, system_prompt=system_prompt, llm=AgentLLMConfig( provider=agent_node_cfg.get("provider", "openai"), model=req.model or agent_node_cfg.get("model", "gpt-4o-mini"), temperature=req.temperature or float(agent_node_cfg.get("temperature", 0.7)), max_iterations=req.max_iterations or int(agent_node_cfg.get("max_iterations", 10)), ), tools=AgentToolConfig( include_tools=agent_node_cfg.get("tools", []), exclude_tools=agent_node_cfg.get("exclude_tools", []), ), memory=memory_cfg, budget=budget, user_id=uid, memory_scope_id=mem_scope, ) on_llm_call = _make_llm_logger(db, agent_id=agent_id, user_id=current_user.id) runtime = AgentRuntime(config=config, on_llm_call=on_llm_call) return StreamingResponse( _sse_stream(runtime.run_stream(req.message)), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no", }, ) def _find_agent_node_config(nodes: list) -> Dict[str, Any]: """从工作流节点列表中查找第一个 agent 类型或 llm 类型的节点配置。""" if not nodes: return {} for node in nodes: typ = node.get("type", "") if typ in ("agent", "llm", "template"): return node.get("data") or {} return {} def _build_memory_config_from_node(agent_node_cfg: dict) -> AgentMemoryConfig: """从 Agent 工作流节点配置中提取记忆配置。""" return AgentMemoryConfig( max_history_messages=int(agent_node_cfg.get("memory_max_history", 20)), vector_memory_top_k=int(agent_node_cfg.get("memory_vector_top_k", 5)), persist_to_db=bool(agent_node_cfg.get("memory_persist", True)), vector_memory_enabled=bool(agent_node_cfg.get("memory_vector_enabled", True)), learning_enabled=bool(agent_node_cfg.get("memory_learning", True)), )