aiagent/backend/app/api/system_logs.py

"""
系统日志统一查询 API
"""
import logging
from pathlib import Path
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any

from fastapi import APIRouter, Depends, Query, HTTPException, status
from sqlalchemy.orm import Session
from sqlalchemy import func, text, case
from pydantic import BaseModel

from app.core.database import get_db
from app.core.config import settings
from app.api.auth import get_current_user
from app.models.user import User
from app.models.execution_log import ExecutionLog
from app.models.agent_execution_log import AgentExecutionLog
from app.models.agent_llm_log import AgentLLMLog

logger = logging.getLogger(__name__)

router = APIRouter(
    prefix="/api/v1/system-logs",
    tags=["system-logs"],
    responses={
        401: {"description": "未授权"},
        403: {"description": "无权访问"},
        500: {"description": "服务器内部错误"}
    }
)


# ── Pydantic Schemas ─────────────────────────────────────────────

class UnifiedLogItem(BaseModel):
    id: str
    source: str          # "execution" | "agent" | "llm"
    level: Optional[str] = None
    message: str
    timestamp: Optional[datetime] = None
    resource_type: Optional[str] = None
    resource_id: Optional[str] = None
    duration_ms: Optional[int] = None
    username: Optional[str] = None

    class Config:
        from_attributes = True


class LogStatsResponse(BaseModel):
    total_count: int
    error_count: int
    warn_count: int
    info_count: int
    source_breakdown: Dict[str, int]        # {"execution": 123, "agent": 45, "llm": 67}
    hourly_trend: List[Dict[str, Any]]       # [{"hour": "2026-05-10T14", "count": 5}, ...]

    class Config:
        from_attributes = True


class AppLogItem(BaseModel):
    line_number: int
    content: str


# ── Helpers ──────────────────────────────────────────────────────

def _build_union_query(
    db: Session,
    source: Optional[str] = None,
    level: Optional[str] = None,
    keyword: Optional[str] = None,
    start_date: Optional[datetime] = None,
    end_date: Optional[datetime] = None,
    user_id: Optional[str] = None,
):
    """构建跨表 UNION ALL 查询"""
    queries: list = []
    params: dict = {}

    # 1) 工作流执行日志
    if source in (None, "all", "execution"):
        q = db.query(
            ExecutionLog.id.label("id"),
            text("'execution'").label("source"),
            ExecutionLog.level.label("level"),
            ExecutionLog.message.label("message"),
            ExecutionLog.timestamp.label("timestamp"),
            ExecutionLog.node_type.label("resource_type"),
            ExecutionLog.execution_id.label("resource_id"),
            ExecutionLog.duration.label("duration_ms"),
            text("NULL").label("username"),
        )
        if level:
            q = q.filter(ExecutionLog.level == level.upper())
        if keyword:
            q = q.filter(ExecutionLog.message.contains(keyword))
        if start_date:
            q = q.filter(ExecutionLog.timestamp >= start_date)
        if end_date:
            q = q.filter(ExecutionLog.timestamp <= end_date)
        queries.append(q)

    # 2) Agent 执行日志
    if source in (None, "all", "agent"):
        q = db.query(
            AgentExecutionLog.id.label("id"),
            text("'agent'").label("source"),
            case(
                (AgentExecutionLog.status == "failed", "ERROR"),
                (AgentExecutionLog.status == "completed", "INFO"),
                else_="INFO"
            ).label("level"),
            AgentExecutionLog.user_message.label("message"),
            AgentExecutionLog.created_at.label("timestamp"),
            text("'agent_chat'").label("resource_type"),
            AgentExecutionLog.agent_id.label("resource_id"),
            AgentExecutionLog.total_latency_ms.label("duration_ms"),
            text("NULL").label("username"),
        )
        if level:
            if level.upper() == "ERROR":
                q = q.filter(AgentExecutionLog.status == "failed")
            elif level.upper() == "WARN":
                q = q.filter(AgentExecutionLog.status == "failed")
            else:
                q = q.filter(AgentExecutionLog.status != "failed")
        if keyword:
            q = q.filter(AgentExecutionLog.user_message.contains(keyword))
        if start_date:
            q = q.filter(AgentExecutionLog.created_at >= start_date)
        if end_date:
            q = q.filter(AgentExecutionLog.created_at <= end_date)
        if user_id:
            q = q.filter(AgentExecutionLog.user_id == user_id)
        queries.append(q)

    # 3) LLM 调用日志
    if source in (None, "all", "llm"):
        q = db.query(
            AgentLLMLog.id.label("id"),
            text("'llm'").label("source"),
            case(
                (AgentLLMLog.status == "error", "ERROR"),
                (AgentLLMLog.status == "rate_limited", "WARN"),
                else_="INFO"
            ).label("level"),
            AgentLLMLog.assistant_content.label("message"),
            AgentLLMLog.created_at.label("timestamp"),
            text("'llm_call'").label("resource_type"),
            AgentLLMLog.agent_id.label("resource_id"),
            AgentLLMLog.latency_ms.label("duration_ms"),
            text("NULL").label("username"),
        )
        if level:
            if level.upper() == "ERROR":
                q = q.filter(AgentLLMLog.status == "error")
            elif level.upper() == "WARN":
                q = q.filter(AgentLLMLog.status == "rate_limited")
            else:
                q = q.filter(AgentLLMLog.status == "success")
        if keyword:
            q = q.filter(AgentLLMLog.assistant_content.contains(keyword))
        if start_date:
            q = q.filter(AgentLLMLog.created_at >= start_date)
        if end_date:
            q = q.filter(AgentLLMLog.created_at <= end_date)
        queries.append(q)

    return queries


def _check_admin(current_user: User):
    if getattr(current_user, "role", None) != "admin":
        from app.core.exceptions import ForbiddenError
        raise ForbiddenError("仅管理员可访问系统日志")


# ── Endpoints ────────────────────────────────────────────────────

@router.get("", response_model=List[UnifiedLogItem])
async def get_system_logs(
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
    source: Optional[str] = Query(None, description="日志来源: execution/agent/llm/all"),
    level: Optional[str] = Query(None, description="日志级别: INFO/WARN/ERROR"),
    keyword: Optional[str] = Query(None, description="关键词搜索"),
    start_date: Optional[str] = Query(None, description="开始时间 ISO格式"),
    end_date: Optional[str] = Query(None, description="结束时间 ISO格式"),
    skip: int = Query(0, ge=0),
    limit: int = Query(50, ge=1, le=1000),
):
    """
    统一日志查询：跨 execution_logs / agent_execution_logs / agent_llm_logs 联合查询。

    管理员可查全部，普通用户只能查看自己相关的 Agent 执行日志和 LLM 日志。
    """
    _check_admin(current_user)

    # 解析时间
    sd = datetime.fromisoformat(start_date) if start_date else None
    ed = datetime.fromisoformat(end_date) if end_date else None

    # 非 admin 限制用户范围
    user_id = None
    if getattr(current_user, "role", None) != "admin":
        user_id = current_user.id

    source_val = source if source else "all"
    queries = _build_union_query(db, source_val, level, keyword, sd, ed, user_id)

    if not queries:
        return []

    # UNION ALL
    union = queries[0]
    for q in queries[1:]:
        union = union.union_all(q)

    # 排序 + 分页
    total = union.count() if hasattr(union, 'count') else 0
    rows = union.order_by(text("timestamp DESC")).offset(skip).limit(limit).all()

    return rows


@router.get("/stats", response_model=LogStatsResponse)
async def get_system_logs_stats(
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
):
    """获取系统日志统计：各级别计数、各来源计数、24小时趋势"""
    _check_admin(current_user)

    now = datetime.utcnow()
    today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
    hours_24 = now - timedelta(hours=24)

    # 工作流执行日志统计
    exec_total = db.query(func.count(ExecutionLog.id)).filter(
        ExecutionLog.timestamp >= today_start
    ).scalar() or 0
    exec_errors = db.query(func.count(ExecutionLog.id)).filter(
        ExecutionLog.timestamp >= today_start,
        ExecutionLog.level == "ERROR"
    ).scalar() or 0
    exec_warns = db.query(func.count(ExecutionLog.id)).filter(
        ExecutionLog.timestamp >= today_start,
        ExecutionLog.level == "WARN"
    ).scalar() or 0

    # Agent 执行日志统计
    agent_total = db.query(func.count(AgentExecutionLog.id)).filter(
        AgentExecutionLog.created_at >= today_start
    ).scalar() or 0
    agent_errors = db.query(func.count(AgentExecutionLog.id)).filter(
        AgentExecutionLog.created_at >= today_start,
        AgentExecutionLog.status == "failed"
    ).scalar() or 0

    # LLM 调用日志统计
    llm_total = db.query(func.count(AgentLLMLog.id)).filter(
        AgentLLMLog.created_at >= today_start
    ).scalar() or 0
    llm_errors = db.query(func.count(AgentLLMLog.id)).filter(
        AgentLLMLog.created_at >= today_start,
        AgentLLMLog.status == "error"
    ).scalar() or 0

    total_count = exec_total + agent_total + llm_total
    error_count = exec_errors + agent_errors + llm_errors
    warn_count = exec_warns
    info_count = total_count - error_count - warn_count

    # 24小时趋势（按小时聚合）
    hourly: list[dict[str, Any]] = []
    for h in range(23, -1, -1):
        slot_start = now.replace(minute=0, second=0, microsecond=0) - timedelta(hours=h)
        slot_end = slot_start + timedelta(hours=1)

        exec_h = db.query(func.count(ExecutionLog.id)).filter(
            ExecutionLog.timestamp >= slot_start,
            ExecutionLog.timestamp < slot_end
        ).scalar() or 0
        agent_h = db.query(func.count(AgentExecutionLog.id)).filter(
            AgentExecutionLog.created_at >= slot_start,
            AgentExecutionLog.created_at < slot_end
        ).scalar() or 0
        llm_h = db.query(func.count(AgentLLMLog.id)).filter(
            AgentLLMLog.created_at >= slot_start,
            AgentLLMLog.created_at < slot_end
        ).scalar() or 0

        hourly.append({
            "hour": slot_start.isoformat(),
            "count": exec_h + agent_h + llm_h
        })

    return {
        "total_count": total_count,
        "error_count": error_count,
        "warn_count": warn_count,
        "info_count": info_count,
        "source_breakdown": {
            "execution": exec_total,
            "agent": agent_total,
            "llm": llm_total
        },
        "hourly_trend": hourly
    }


@router.get("/app-logs", response_model=List[AppLogItem])
async def get_app_logs(
    current_user: User = Depends(get_current_user),
    lines: int = Query(200, ge=10, le=2000, description="读取行数"),
    level: Optional[str] = Query(None, description="按级别过滤: INFO/WARNING/ERROR"),
):
    """读取应用程序文件日志的尾部行"""
    _check_admin(current_user)

    log_file = Path(settings.LOG_DIR) / "app.log"
    if not log_file.exists():
        return []

    level_filter = level.upper() if level else None

    try:
        with open(log_file, "r", encoding="utf-8", errors="replace") as f:
            all_lines = f.readlines()
    except Exception:
        return []

    # 取尾部 N 行，倒序输出
    tail = all_lines[-lines:]

    result: list[dict] = []
    for i, raw in enumerate(tail):
        content = raw.rstrip("\n").rstrip("\r")
        if level_filter and level_filter not in content.upper():
            continue
        result.append({
            "line_number": len(all_lines) - len(tail) + i + 1,
            "content": content
        })

    return result