111 lines
4.0 KiB
Python
111 lines
4.0 KiB
Python
|
|
"""
|
|||
|
|
用户行为自动采集中间件 — 非侵入式记录 API 调用行为
|
|||
|
|
"""
|
|||
|
|
import time
|
|||
|
|
import logging
|
|||
|
|
from typing import Optional
|
|||
|
|
|
|||
|
|
from starlette.middleware.base import BaseHTTPMiddleware
|
|||
|
|
from starlette.requests import Request
|
|||
|
|
|
|||
|
|
from app.services.behavior_collector import behavior_collector
|
|||
|
|
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
# URL → (category, action_prefix) 映射
|
|||
|
|
URL_CATEGORY_MAP = [
|
|||
|
|
("/api/v1/agents", "agent", "manage_agent"),
|
|||
|
|
("/api/v1/workflows", "workflow", "manage_workflow"),
|
|||
|
|
("/api/v1/executions", "execution", "view_execution"),
|
|||
|
|
("/api/v1/agent-chat", "agent", "chat_agent"),
|
|||
|
|
("/api/v1/auth/", "general", "auth"),
|
|||
|
|
("/api/v1/goals", "decision", "manage_goal"),
|
|||
|
|
("/api/v1/tasks", "decision", "manage_task"),
|
|||
|
|
("/api/v1/agent-schedules", "agent", "manage_schedule"),
|
|||
|
|
("/api/v1/model-configs", "general", "manage_config"),
|
|||
|
|
("/api/v1/data-sources", "general", "manage_datasource"),
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# Read-only HTTP methods (less interesting for behavior learning)
|
|||
|
|
READ_METHODS = {"GET", "HEAD", "OPTIONS"}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _classify_request(path: str, method: str) -> tuple:
|
|||
|
|
"""根据 URL 路径和 HTTP 方法分类行为。"""
|
|||
|
|
for prefix, category, action_prefix in URL_CATEGORY_MAP:
|
|||
|
|
if path.startswith(prefix):
|
|||
|
|
if method in READ_METHODS:
|
|||
|
|
return category, f"{action_prefix}_view"
|
|||
|
|
elif method == "POST":
|
|||
|
|
return category, f"{action_prefix}_create"
|
|||
|
|
elif method in ("PUT", "PATCH"):
|
|||
|
|
return category, f"{action_prefix}_update"
|
|||
|
|
elif method == "DELETE":
|
|||
|
|
return category, f"{action_prefix}_delete"
|
|||
|
|
return category, action_prefix
|
|||
|
|
return "general", f"api_{method.lower()}"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _extract_user_id(request: Request) -> Optional[str]:
|
|||
|
|
"""从请求中提取用户 ID(JWT token 解析)。"""
|
|||
|
|
try:
|
|||
|
|
# 尝试从 auth header 解析
|
|||
|
|
auth = request.headers.get("Authorization", "")
|
|||
|
|
if auth.startswith("Bearer "):
|
|||
|
|
token = auth[7:]
|
|||
|
|
# 简单解析 JWT payload(不验证签名,只提取 user_id)
|
|||
|
|
import base64, json
|
|||
|
|
parts = token.split(".")
|
|||
|
|
if len(parts) >= 2:
|
|||
|
|
payload = parts[1]
|
|||
|
|
# 补齐 padding(正确处理 len%4==0 的情况)
|
|||
|
|
padding = (4 - len(payload) % 4) % 4
|
|||
|
|
payload += "=" * padding
|
|||
|
|
decoded = base64.urlsafe_b64decode(payload)
|
|||
|
|
data = json.loads(decoded)
|
|||
|
|
return data.get("user_id") or data.get("sub")
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
class BehaviorCollectionMiddleware(BaseHTTPMiddleware):
|
|||
|
|
"""自动采集 API 调用行为的中间件。"""
|
|||
|
|
|
|||
|
|
async def dispatch(self, request: Request, call_next):
|
|||
|
|
start_time = time.time()
|
|||
|
|
|
|||
|
|
# 跳过非 API 路径
|
|||
|
|
path = request.url.path
|
|||
|
|
if not path.startswith("/api/"):
|
|||
|
|
return await call_next(request)
|
|||
|
|
|
|||
|
|
response = await call_next(request)
|
|||
|
|
|
|||
|
|
# 异步记录行为(fire-and-forget)
|
|||
|
|
try:
|
|||
|
|
user_id = _extract_user_id(request)
|
|||
|
|
if user_id:
|
|||
|
|
category, action = _classify_request(path, request.method)
|
|||
|
|
duration_ms = int((time.time() - start_time) * 1000)
|
|||
|
|
behavior_collector.log_fire_and_forget(
|
|||
|
|
user_id=user_id,
|
|||
|
|
category=category,
|
|||
|
|
action=action,
|
|||
|
|
context={
|
|||
|
|
"url": path,
|
|||
|
|
"method": request.method,
|
|||
|
|
"query_params": str(request.query_params) if request.query_params else None,
|
|||
|
|
},
|
|||
|
|
result={
|
|||
|
|
"status_code": response.status_code,
|
|||
|
|
"duration_ms": duration_ms,
|
|||
|
|
},
|
|||
|
|
source="api",
|
|||
|
|
ip_address=request.client.host if request.client else None,
|
|||
|
|
)
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.debug("行为采集失败: %s", e)
|
|||
|
|
|
|||
|
|
return response
|