## 安全修复 (12项) - Webhook接口添加全局Token认证,过滤敏感请求头 - 修复JWT Base64 padding公式,防止签名验证绕过 - 数据库密码/飞书Token从源码移除,改为环境变量 - 工作流引擎添加路径遍历防护 (_resolve_safe_path) - eval()添加模板长度上限检查 - 审批API添加认证依赖 - 前端v-html增强XSS转义,console.log仅开发模式输出 - 500错误不再暴露内部异常详情 ## Agent运行时修复 (7项) - 删除_inject_knowledge_context中未定义db变量的finally块 - 工具执行添加try/except保护,异常不崩溃Agent - LLM重试计入budget计数器 - self_review异常时passed=False - max_iterations截断标记success=False - 工具参数JSON解析失败时记录警告日志 - run()开始时重置_llm_invocations计数器 ## 配置与基础设施 - DEBUG默认False,SQL_ECHO独立配置项 - init_db()补全13个缺失模型导入 - 新增WEBHOOK_AUTH_TOKEN/SQL_ECHO配置项 - 新增.env.example模板文件 ## 前端修复 (12项) - 登录改用URLSearchParams替代FormData - 401拦截器通过Pinia store统一清理状态 - SSE流超时从60s延长至300s - final/error事件时清除streamTimeout - localStorage聊天记录添加24h TTL - safeParseArgCount替代模板中裸JSON.parse - fetchUser 401时同时清除user对象 ## 新增模块 - 知识进化: knowledge_extractor/retriever/tasks - 数字孪生: shadow_executor/comparison模型 - 行为采集: behavior_middleware/collector/fingerprint_engine - 代码审查: code_review_agent/document_review_agent - 反馈学习: feedback_learner - 瓶颈检测/优化引擎/成本估算/需求估算 - 速率限制器 (rate_limiter) - Alembic迁移 015-020 ## 文档 - 商业化落地计划 - 8篇docs文档 (架构/API/部署/开发/贡献等) - Docker Compose生产配置 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
111 lines
4.0 KiB
Python
111 lines
4.0 KiB
Python
"""
|
||
用户行为自动采集中间件 — 非侵入式记录 API 调用行为
|
||
"""
|
||
import time
|
||
import logging
|
||
from typing import Optional
|
||
|
||
from starlette.middleware.base import BaseHTTPMiddleware
|
||
from starlette.requests import Request
|
||
|
||
from app.services.behavior_collector import behavior_collector
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# URL → (category, action_prefix) 映射
|
||
URL_CATEGORY_MAP = [
|
||
("/api/v1/agents", "agent", "manage_agent"),
|
||
("/api/v1/workflows", "workflow", "manage_workflow"),
|
||
("/api/v1/executions", "execution", "view_execution"),
|
||
("/api/v1/agent-chat", "agent", "chat_agent"),
|
||
("/api/v1/auth/", "general", "auth"),
|
||
("/api/v1/goals", "decision", "manage_goal"),
|
||
("/api/v1/tasks", "decision", "manage_task"),
|
||
("/api/v1/agent-schedules", "agent", "manage_schedule"),
|
||
("/api/v1/model-configs", "general", "manage_config"),
|
||
("/api/v1/data-sources", "general", "manage_datasource"),
|
||
]
|
||
|
||
# Read-only HTTP methods (less interesting for behavior learning)
|
||
READ_METHODS = {"GET", "HEAD", "OPTIONS"}
|
||
|
||
|
||
def _classify_request(path: str, method: str) -> tuple:
|
||
"""根据 URL 路径和 HTTP 方法分类行为。"""
|
||
for prefix, category, action_prefix in URL_CATEGORY_MAP:
|
||
if path.startswith(prefix):
|
||
if method in READ_METHODS:
|
||
return category, f"{action_prefix}_view"
|
||
elif method == "POST":
|
||
return category, f"{action_prefix}_create"
|
||
elif method in ("PUT", "PATCH"):
|
||
return category, f"{action_prefix}_update"
|
||
elif method == "DELETE":
|
||
return category, f"{action_prefix}_delete"
|
||
return category, action_prefix
|
||
return "general", f"api_{method.lower()}"
|
||
|
||
|
||
def _extract_user_id(request: Request) -> Optional[str]:
|
||
"""从请求中提取用户 ID(JWT token 解析)。"""
|
||
try:
|
||
# 尝试从 auth header 解析
|
||
auth = request.headers.get("Authorization", "")
|
||
if auth.startswith("Bearer "):
|
||
token = auth[7:]
|
||
# 简单解析 JWT payload(不验证签名,只提取 user_id)
|
||
import base64, json
|
||
parts = token.split(".")
|
||
if len(parts) >= 2:
|
||
payload = parts[1]
|
||
# 补齐 padding(正确处理 len%4==0 的情况)
|
||
padding = (4 - len(payload) % 4) % 4
|
||
payload += "=" * padding
|
||
decoded = base64.urlsafe_b64decode(payload)
|
||
data = json.loads(decoded)
|
||
return data.get("user_id") or data.get("sub")
|
||
except Exception:
|
||
pass
|
||
return None
|
||
|
||
|
||
class BehaviorCollectionMiddleware(BaseHTTPMiddleware):
|
||
"""自动采集 API 调用行为的中间件。"""
|
||
|
||
async def dispatch(self, request: Request, call_next):
|
||
start_time = time.time()
|
||
|
||
# 跳过非 API 路径
|
||
path = request.url.path
|
||
if not path.startswith("/api/"):
|
||
return await call_next(request)
|
||
|
||
response = await call_next(request)
|
||
|
||
# 异步记录行为(fire-and-forget)
|
||
try:
|
||
user_id = _extract_user_id(request)
|
||
if user_id:
|
||
category, action = _classify_request(path, request.method)
|
||
duration_ms = int((time.time() - start_time) * 1000)
|
||
behavior_collector.log_fire_and_forget(
|
||
user_id=user_id,
|
||
category=category,
|
||
action=action,
|
||
context={
|
||
"url": path,
|
||
"method": request.method,
|
||
"query_params": str(request.query_params) if request.query_params else None,
|
||
},
|
||
result={
|
||
"status_code": response.status_code,
|
||
"duration_ms": duration_ms,
|
||
},
|
||
source="api",
|
||
ip_address=request.client.host if request.client else None,
|
||
)
|
||
except Exception as e:
|
||
logger.debug("行为采集失败: %s", e)
|
||
|
||
return response
|