""" 用户行为自动采集中间件 — 非侵入式记录 API 调用行为 """ import time import logging from typing import Optional from starlette.middleware.base import BaseHTTPMiddleware from starlette.requests import Request from app.services.behavior_collector import behavior_collector logger = logging.getLogger(__name__) # URL → (category, action_prefix) 映射 URL_CATEGORY_MAP = [ ("/api/v1/agents", "agent", "manage_agent"), ("/api/v1/workflows", "workflow", "manage_workflow"), ("/api/v1/executions", "execution", "view_execution"), ("/api/v1/agent-chat", "agent", "chat_agent"), ("/api/v1/auth/", "general", "auth"), ("/api/v1/goals", "decision", "manage_goal"), ("/api/v1/tasks", "decision", "manage_task"), ("/api/v1/agent-schedules", "agent", "manage_schedule"), ("/api/v1/model-configs", "general", "manage_config"), ("/api/v1/data-sources", "general", "manage_datasource"), ] # Read-only HTTP methods (less interesting for behavior learning) READ_METHODS = {"GET", "HEAD", "OPTIONS"} def _classify_request(path: str, method: str) -> tuple: """根据 URL 路径和 HTTP 方法分类行为。""" for prefix, category, action_prefix in URL_CATEGORY_MAP: if path.startswith(prefix): if method in READ_METHODS: return category, f"{action_prefix}_view" elif method == "POST": return category, f"{action_prefix}_create" elif method in ("PUT", "PATCH"): return category, f"{action_prefix}_update" elif method == "DELETE": return category, f"{action_prefix}_delete" return category, action_prefix return "general", f"api_{method.lower()}" def _extract_user_id(request: Request) -> Optional[str]: """从请求中提取用户 ID(JWT token 解析)。""" try: # 尝试从 auth header 解析 auth = request.headers.get("Authorization", "") if auth.startswith("Bearer "): token = auth[7:] # 简单解析 JWT payload(不验证签名,只提取 user_id) import base64, json parts = token.split(".") if len(parts) >= 2: payload = parts[1] # 补齐 padding(正确处理 len%4==0 的情况) padding = (4 - len(payload) % 4) % 4 payload += "=" * padding decoded = base64.urlsafe_b64decode(payload) data = json.loads(decoded) return data.get("user_id") or data.get("sub") except Exception: pass return None class BehaviorCollectionMiddleware(BaseHTTPMiddleware): """自动采集 API 调用行为的中间件。""" async def dispatch(self, request: Request, call_next): start_time = time.time() # 跳过非 API 路径 path = request.url.path if not path.startswith("/api/"): return await call_next(request) response = await call_next(request) # 异步记录行为(fire-and-forget) try: user_id = _extract_user_id(request) if user_id: category, action = _classify_request(path, request.method) duration_ms = int((time.time() - start_time) * 1000) behavior_collector.log_fire_and_forget( user_id=user_id, category=category, action=action, context={ "url": path, "method": request.method, "query_params": str(request.query_params) if request.query_params else None, }, result={ "status_code": response.status_code, "duration_ms": duration_ms, }, source="api", ip_address=request.client.host if request.client else None, ) except Exception as e: logger.debug("行为采集失败: %s", e) return response