backend/app/services/agent_monitoring_service.py

"""
Agent 监控服务 — 提供 Agent 专属统计数据
"""
from sqlalchemy.orm import Session
from sqlalchemy import func, and_
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
from app.models.agent_llm_log import AgentLLMLog
from app.models.agent import Agent
from app.models.execution import Execution
import logging

logger = logging.getLogger(__name__)


class AgentMonitoringService:
    """Agent 监控服务"""

    @staticmethod
    def get_overview(db: Session, user_id: Optional[str] = None) -> Dict[str, Any]:
        """
        获取 Agent 概览统计。
        - 总对话次数（Execution 中 agent_id 不为空的记录数）
        - 总 LLM 调用次数
        - 总 tokens 数（近似）
        - 总工具调用次数
        - 活跃 Agent 数
        """
        user_filter = Agent.user_id == user_id if user_id else True
        agent_ids_query = db.query(Agent.id).filter(user_filter)
        agent_ids = {row[0] for row in agent_ids_query.all()}

        # Agent 数量
        agent_count = len(agent_ids)

        # 对话次数（Execution 表中 agent 执行的记录）
        exec_filter = Execution.agent_id.in_(agent_ids) if agent_ids else False
        chat_count = 0
        if agent_ids:
            chat_count = db.query(func.count(Execution.id)).filter(exec_filter).scalar() or 0

        # LLM 调用统计
        llm_filter = AgentLLMLog.agent_id.in_(agent_ids) if agent_ids else False
        llm_count = 0
        total_prompt = 0
        total_completion = 0
        if agent_ids:
            stats = db.query(
                func.count(AgentLLMLog.id),
                func.coalesce(func.sum(AgentLLMLog.prompt_tokens), 0),
                func.coalesce(func.sum(AgentLLMLog.completion_tokens), 0),
            ).filter(llm_filter).first()
            llm_count = stats[0] or 0
            total_prompt = stats[1] or 0
            total_completion = stats[2] or 0

        # 工具调用次数（从 ExecutionLog 统计 agent 相关）
        tool_call_count = 0
        if agent_ids:
            tool_call_count = db.query(func.count(AgentLLMLog.id)).filter(
                and_(
                    AgentLLMLog.agent_id.in_(agent_ids),
                    AgentLLMLog.tool_name.isnot(None),
                )
            ).scalar() or 0

        return {
            "agent_count": agent_count,
            "chat_count": chat_count,
            "llm_call_count": llm_count,
            "total_prompt_tokens": total_prompt,
            "total_completion_tokens": total_completion,
            "total_tokens": total_prompt + total_completion,
            "tool_call_count": tool_call_count,
        }

    @staticmethod
    def get_llm_calls(
        db: Session,
        user_id: Optional[str] = None,
        days: int = 7,
        limit: int = 50,
    ) -> List[Dict[str, Any]]:
        """获取最近 LLM 调用记录。"""
        end_time = datetime.utcnow()
        start_time = end_time - timedelta(days=days)

        filters = [AgentLLMLog.created_at >= start_time]
        if user_id:
            filters.append(AgentLLMLog.user_id == user_id)

        records = db.query(AgentLLMLog).filter(
            and_(*filters)
        ).order_by(AgentLLMLog.created_at.desc()).limit(limit).all()

        return [
            {
                "id": r.id,
                "agent_id": r.agent_id,
                "session_id": r.session_id,
                "model": r.model,
                "provider": r.provider,
                "prompt_tokens": r.prompt_tokens,
                "completion_tokens": r.completion_tokens,
                "total_tokens": r.total_tokens,
                "latency_ms": r.latency_ms,
                "iteration_number": r.iteration_number,
                "step_type": r.step_type,
                "tool_name": r.tool_name,
                "status": r.status,
                "error_message": r.error_message,
                "created_at": r.created_at.isoformat() if r.created_at else None,
            }
            for r in records
        ]

    @staticmethod
    def get_agent_stats(
        db: Session,
        user_id: Optional[str] = None,
        days: int = 7,
    ) -> List[Dict[str, Any]]:
        """获取各 Agent 的用量统计（按 Agent 分组）。"""
        end_time = datetime.utcnow()
        start_time = end_time - timedelta(days=days)

        filters = [AgentLLMLog.created_at >= start_time]
        if user_id:
            filters.append(AgentLLMLog.user_id == user_id)

        rows = db.query(
            AgentLLMLog.agent_id,
            Agent.name.label("agent_name"),
            func.count(AgentLLMLog.id).label("call_count"),
            func.coalesce(func.sum(AgentLLMLog.prompt_tokens), 0).label("total_prompt"),
            func.coalesce(func.sum(AgentLLMLog.completion_tokens), 0).label("total_completion"),
            func.coalesce(func.sum(AgentLLMLog.total_tokens), 0).label("total_tokens"),
            func.coalesce(func.avg(AgentLLMLog.latency_ms), 0).label("avg_latency"),
            func.count(AgentLLMLog.tool_name).label("tool_calls"),
        ).outerjoin(
            Agent, AgentLLMLog.agent_id == Agent.id
        ).filter(
            and_(*filters)
        ).group_by(AgentLLMLog.agent_id).order_by(
            func.count(AgentLLMLog.id).desc()
        ).all()

        return [
            {
                "agent_id": r.agent_id or "未知",
                "agent_name": r.agent_name or "未知 Agent",
                "call_count": r.call_count,
                "total_prompt_tokens": r.total_prompt,
                "total_completion_tokens": r.total_completion,
                "total_tokens": r.total_tokens,
                "avg_latency_ms": round(r.avg_latency, 2) if r.avg_latency else 0,
                "tool_call_count": r.tool_calls or 0,
            }
            for r in rows
        ]

    @staticmethod
    def get_tool_usage(
        db: Session,
        user_id: Optional[str] = None,
        days: int = 7,
    ) -> List[Dict[str, Any]]:
        """获取工具调用频次统计。"""
        end_time = datetime.utcnow()
        start_time = end_time - timedelta(days=days)

        filters = [
            AgentLLMLog.created_at >= start_time,
            AgentLLMLog.tool_name.isnot(None),
        ]
        if user_id:
            filters.append(AgentLLMLog.user_id == user_id)

        rows = db.query(
            AgentLLMLog.tool_name,
            func.count(AgentLLMLog.id).label("call_count"),
            func.coalesce(func.sum(AgentLLMLog.total_tokens), 0).label("total_tokens"),
            func.coalesce(func.avg(AgentLLMLog.latency_ms), 0).label("avg_latency"),
        ).filter(
            and_(*filters)
        ).group_by(AgentLLMLog.tool_name).order_by(
            func.count(AgentLLMLog.id).desc()
        ).all()

        return [
            {
                "tool_name": r.tool_name or "未知",
                "call_count": r.call_count,
                "total_tokens": r.total_tokens,
                "avg_latency_ms": round(r.avg_latency, 2) if r.avg_latency else 0,
            }
            for r in rows
        ]

    @staticmethod
    def get_daily_trend(
        db: Session,
        user_id: Optional[str] = None,
        days: int = 7,
    ) -> List[Dict[str, Any]]:
        """获取每日 LLM 调用趋势。"""
        end_time = datetime.utcnow()
        start_time = end_time - timedelta(days=days)

        filters = [AgentLLMLog.created_at >= start_time]
        if user_id:
            filters.append(AgentLLMLog.user_id == user_id)

        results = []
        for i in range(days):
            day_start = end_time - timedelta(days=days - i)
            day_end = day_start + timedelta(days=1)
            day_filter = and_(
                *filters,
                AgentLLMLog.created_at >= day_start,
                AgentLLMLog.created_at < day_end,
            )
            day_count = db.query(func.count(AgentLLMLog.id)).filter(day_filter).scalar() or 0
            day_tokens = db.query(
                func.coalesce(func.sum(AgentLLMLog.total_tokens), 0)
            ).filter(day_filter).scalar() or 0

            results.append({
                "date": day_start.strftime("%m-%d"),
                "call_count": day_count,
                "total_tokens": day_tokens,
            })

        return results