第一次提交
This commit is contained in:
414
backend/app/api/alert_rules.py
Normal file
414
backend/app/api/alert_rules.py
Normal file
@@ -0,0 +1,414 @@
|
||||
"""
|
||||
告警规则管理API
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, Query, HTTPException, status
|
||||
from sqlalchemy.orm import Session
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
import logging
|
||||
from app.core.database import get_db
|
||||
from app.models.alert_rule import AlertRule, AlertLog
|
||||
from app.api.auth import get_current_user
|
||||
from app.models.user import User
|
||||
from app.core.exceptions import NotFoundError, ValidationError, ConflictError
|
||||
from app.services.alert_service import AlertService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api/v1/alert-rules",
|
||||
tags=["alert-rules"],
|
||||
responses={
|
||||
401: {"description": "未授权"},
|
||||
404: {"description": "资源不存在"},
|
||||
400: {"description": "请求参数错误"},
|
||||
500: {"description": "服务器内部错误"}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class AlertRuleCreate(BaseModel):
|
||||
"""告警规则创建模型"""
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
alert_type: str # execution_failed, execution_timeout, error_rate, resource_usage
|
||||
target_type: str # workflow, agent, system
|
||||
target_id: Optional[str] = None
|
||||
conditions: Dict[str, Any]
|
||||
notification_type: str # email, webhook, internal
|
||||
notification_config: Optional[Dict[str, Any]] = None
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
class AlertRuleUpdate(BaseModel):
|
||||
"""告警规则更新模型"""
|
||||
name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
alert_type: Optional[str] = None
|
||||
target_type: Optional[str] = None
|
||||
target_id: Optional[str] = None
|
||||
conditions: Optional[Dict[str, Any]] = None
|
||||
notification_type: Optional[str] = None
|
||||
notification_config: Optional[Dict[str, Any]] = None
|
||||
enabled: Optional[bool] = None
|
||||
|
||||
|
||||
class AlertRuleResponse(BaseModel):
|
||||
"""告警规则响应模型"""
|
||||
id: str
|
||||
name: str
|
||||
description: Optional[str]
|
||||
alert_type: str
|
||||
target_type: str
|
||||
target_id: Optional[str]
|
||||
conditions: Dict[str, Any]
|
||||
notification_type: str
|
||||
notification_config: Optional[Dict[str, Any]]
|
||||
enabled: bool
|
||||
trigger_count: int
|
||||
last_triggered_at: Optional[datetime]
|
||||
user_id: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class AlertLogResponse(BaseModel):
|
||||
"""告警日志响应模型"""
|
||||
id: str
|
||||
rule_id: str
|
||||
alert_type: str
|
||||
severity: str
|
||||
message: str
|
||||
details: Optional[Dict[str, Any]]
|
||||
status: str
|
||||
notification_type: Optional[str]
|
||||
notification_result: Optional[str]
|
||||
triggered_at: datetime
|
||||
acknowledged_at: Optional[datetime]
|
||||
acknowledged_by: Optional[str]
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
@router.get("", response_model=List[AlertRuleResponse])
|
||||
async def get_alert_rules(
|
||||
skip: int = Query(0, ge=0, description="跳过记录数"),
|
||||
limit: int = Query(100, ge=1, le=100, description="每页记录数"),
|
||||
alert_type: Optional[str] = Query(None, description="告警类型筛选"),
|
||||
enabled: Optional[bool] = Query(None, description="是否启用筛选"),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
获取告警规则列表
|
||||
|
||||
支持分页和筛选
|
||||
"""
|
||||
query = db.query(AlertRule).filter(AlertRule.user_id == current_user.id)
|
||||
|
||||
# 筛选
|
||||
if alert_type:
|
||||
query = query.filter(AlertRule.alert_type == alert_type)
|
||||
if enabled is not None:
|
||||
query = query.filter(AlertRule.enabled == enabled)
|
||||
|
||||
# 排序和分页
|
||||
rules = query.order_by(AlertRule.created_at.desc()).offset(skip).limit(limit).all()
|
||||
return rules
|
||||
|
||||
|
||||
@router.post("", response_model=AlertRuleResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_alert_rule(
|
||||
rule_data: AlertRuleCreate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
创建告警规则
|
||||
"""
|
||||
# 验证告警类型
|
||||
valid_alert_types = ['execution_failed', 'execution_timeout', 'error_rate', 'resource_usage']
|
||||
if rule_data.alert_type not in valid_alert_types:
|
||||
raise ValidationError(f"不支持的告警类型: {rule_data.alert_type}")
|
||||
|
||||
# 验证目标类型
|
||||
valid_target_types = ['workflow', 'agent', 'system']
|
||||
if rule_data.target_type not in valid_target_types:
|
||||
raise ValidationError(f"不支持的目标类型: {rule_data.target_type}")
|
||||
|
||||
# 验证通知方式
|
||||
valid_notification_types = ['email', 'webhook', 'internal']
|
||||
if rule_data.notification_type not in valid_notification_types:
|
||||
raise ValidationError(f"不支持的通知方式: {rule_data.notification_type}")
|
||||
|
||||
# 检查名称是否重复
|
||||
existing_rule = db.query(AlertRule).filter(
|
||||
AlertRule.name == rule_data.name,
|
||||
AlertRule.user_id == current_user.id
|
||||
).first()
|
||||
if existing_rule:
|
||||
raise ConflictError(f"告警规则名称 '{rule_data.name}' 已存在")
|
||||
|
||||
# 创建告警规则
|
||||
alert_rule = AlertRule(
|
||||
name=rule_data.name,
|
||||
description=rule_data.description,
|
||||
alert_type=rule_data.alert_type,
|
||||
target_type=rule_data.target_type,
|
||||
target_id=rule_data.target_id,
|
||||
conditions=rule_data.conditions,
|
||||
notification_type=rule_data.notification_type,
|
||||
notification_config=rule_data.notification_config,
|
||||
enabled=rule_data.enabled,
|
||||
user_id=current_user.id
|
||||
)
|
||||
db.add(alert_rule)
|
||||
db.commit()
|
||||
db.refresh(alert_rule)
|
||||
|
||||
logger.info(f"用户 {current_user.username} 创建了告警规则: {alert_rule.name} ({alert_rule.id})")
|
||||
return alert_rule
|
||||
|
||||
|
||||
@router.get("/{rule_id}", response_model=AlertRuleResponse)
|
||||
async def get_alert_rule(
|
||||
rule_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
获取告警规则详情
|
||||
"""
|
||||
rule = db.query(AlertRule).filter(
|
||||
AlertRule.id == rule_id,
|
||||
AlertRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not rule:
|
||||
raise NotFoundError(f"告警规则不存在: {rule_id}")
|
||||
|
||||
return rule
|
||||
|
||||
|
||||
@router.put("/{rule_id}", response_model=AlertRuleResponse)
|
||||
async def update_alert_rule(
|
||||
rule_id: str,
|
||||
rule_data: AlertRuleUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
更新告警规则
|
||||
"""
|
||||
rule = db.query(AlertRule).filter(
|
||||
AlertRule.id == rule_id,
|
||||
AlertRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not rule:
|
||||
raise NotFoundError(f"告警规则不存在: {rule_id}")
|
||||
|
||||
# 更新字段
|
||||
if rule_data.name is not None:
|
||||
# 检查名称是否重复
|
||||
existing_rule = db.query(AlertRule).filter(
|
||||
AlertRule.name == rule_data.name,
|
||||
AlertRule.user_id == current_user.id,
|
||||
AlertRule.id != rule_id
|
||||
).first()
|
||||
if existing_rule:
|
||||
raise ConflictError(f"告警规则名称 '{rule_data.name}' 已存在")
|
||||
rule.name = rule_data.name
|
||||
|
||||
if rule_data.description is not None:
|
||||
rule.description = rule_data.description
|
||||
|
||||
if rule_data.alert_type is not None:
|
||||
valid_alert_types = ['execution_failed', 'execution_timeout', 'error_rate', 'resource_usage']
|
||||
if rule_data.alert_type not in valid_alert_types:
|
||||
raise ValidationError(f"不支持的告警类型: {rule_data.alert_type}")
|
||||
rule.alert_type = rule_data.alert_type
|
||||
|
||||
if rule_data.target_type is not None:
|
||||
valid_target_types = ['workflow', 'agent', 'system']
|
||||
if rule_data.target_type not in valid_target_types:
|
||||
raise ValidationError(f"不支持的目标类型: {rule_data.target_type}")
|
||||
rule.target_type = rule_data.target_type
|
||||
|
||||
if rule_data.target_id is not None:
|
||||
rule.target_id = rule_data.target_id
|
||||
|
||||
if rule_data.conditions is not None:
|
||||
rule.conditions = rule_data.conditions
|
||||
|
||||
if rule_data.notification_type is not None:
|
||||
valid_notification_types = ['email', 'webhook', 'internal']
|
||||
if rule_data.notification_type not in valid_notification_types:
|
||||
raise ValidationError(f"不支持的通知方式: {rule_data.notification_type}")
|
||||
rule.notification_type = rule_data.notification_type
|
||||
|
||||
if rule_data.notification_config is not None:
|
||||
rule.notification_config = rule_data.notification_config
|
||||
|
||||
if rule_data.enabled is not None:
|
||||
rule.enabled = rule_data.enabled
|
||||
|
||||
db.commit()
|
||||
db.refresh(rule)
|
||||
|
||||
logger.info(f"用户 {current_user.username} 更新了告警规则: {rule.name} ({rule.id})")
|
||||
return rule
|
||||
|
||||
|
||||
@router.delete("/{rule_id}", status_code=status.HTTP_200_OK)
|
||||
async def delete_alert_rule(
|
||||
rule_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
删除告警规则
|
||||
"""
|
||||
rule = db.query(AlertRule).filter(
|
||||
AlertRule.id == rule_id,
|
||||
AlertRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not rule:
|
||||
raise NotFoundError(f"告警规则不存在: {rule_id}")
|
||||
|
||||
rule_name = rule.name
|
||||
db.delete(rule)
|
||||
db.commit()
|
||||
|
||||
logger.info(f"用户 {current_user.username} 删除了告警规则: {rule_name} ({rule_id})")
|
||||
return {"message": "告警规则已删除"}
|
||||
|
||||
|
||||
@router.get("/{rule_id}/logs", response_model=List[AlertLogResponse])
|
||||
async def get_alert_logs(
|
||||
rule_id: str,
|
||||
skip: int = Query(0, ge=0, description="跳过记录数"),
|
||||
limit: int = Query(50, ge=1, le=100, description="每页记录数"),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
获取告警规则的日志
|
||||
"""
|
||||
# 验证规则存在且属于当前用户
|
||||
rule = db.query(AlertRule).filter(
|
||||
AlertRule.id == rule_id,
|
||||
AlertRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not rule:
|
||||
raise NotFoundError(f"告警规则不存在: {rule_id}")
|
||||
|
||||
# 获取日志
|
||||
logs = db.query(AlertLog).filter(
|
||||
AlertLog.rule_id == rule_id
|
||||
).order_by(AlertLog.triggered_at.desc()).offset(skip).limit(limit).all()
|
||||
|
||||
return logs
|
||||
|
||||
|
||||
@router.post("/{rule_id}/acknowledge/{log_id}", status_code=status.HTTP_200_OK)
|
||||
async def acknowledge_alert(
|
||||
rule_id: str,
|
||||
log_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
确认告警
|
||||
"""
|
||||
# 验证规则存在且属于当前用户
|
||||
rule = db.query(AlertRule).filter(
|
||||
AlertRule.id == rule_id,
|
||||
AlertRule.user_id == current_user.id
|
||||
).first()
|
||||
|
||||
if not rule:
|
||||
raise NotFoundError(f"告警规则不存在: {rule_id}")
|
||||
|
||||
# 获取告警日志
|
||||
alert_log = db.query(AlertLog).filter(
|
||||
AlertLog.id == log_id,
|
||||
AlertLog.rule_id == rule_id
|
||||
).first()
|
||||
|
||||
if not alert_log:
|
||||
raise NotFoundError(f"告警日志不存在: {log_id}")
|
||||
|
||||
# 更新状态
|
||||
alert_log.status = "acknowledged"
|
||||
alert_log.acknowledged_at = datetime.utcnow()
|
||||
alert_log.acknowledged_by = current_user.id
|
||||
|
||||
db.commit()
|
||||
db.refresh(alert_log)
|
||||
|
||||
return {"message": "告警已确认", "log": alert_log}
|
||||
|
||||
|
||||
@router.post("/check", status_code=status.HTTP_200_OK)
|
||||
async def check_alerts(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
手动触发告警检查
|
||||
|
||||
检查所有启用的告警规则
|
||||
"""
|
||||
# 获取所有启用的告警规则
|
||||
rules = db.query(AlertRule).filter(
|
||||
AlertRule.enabled == True,
|
||||
AlertRule.user_id == current_user.id
|
||||
).all()
|
||||
|
||||
triggered_count = 0
|
||||
|
||||
for rule in rules:
|
||||
try:
|
||||
if rule.alert_type == 'error_rate':
|
||||
# 错误率告警需要检查所有相关执行
|
||||
should_trigger = await AlertService.check_error_rate(db, rule)
|
||||
if should_trigger:
|
||||
# 创建告警日志
|
||||
alert_log = AlertLog(
|
||||
rule_id=rule.id,
|
||||
alert_type=rule.alert_type,
|
||||
severity=rule.conditions.get('severity', 'warning'),
|
||||
message=f"错误率告警: {rule.target_type} 错误率超过阈值",
|
||||
details={
|
||||
"target_type": rule.target_type,
|
||||
"target_id": rule.target_id
|
||||
},
|
||||
status='pending',
|
||||
notification_type=rule.notification_type,
|
||||
triggered_at=datetime.utcnow()
|
||||
)
|
||||
db.add(alert_log)
|
||||
rule.trigger_count += 1
|
||||
rule.last_triggered_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(alert_log)
|
||||
|
||||
# 发送通知
|
||||
await AlertService.send_notification(db, alert_log, rule)
|
||||
triggered_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"检查告警规则失败 {rule.id}: {str(e)}")
|
||||
continue
|
||||
|
||||
return {
|
||||
"message": f"告警检查完成,触发 {triggered_count} 个告警",
|
||||
"triggered_count": triggered_count
|
||||
}
|
||||
Reference in New Issue
Block a user