fix: 修复35个安全与功能缺陷,补全知识进化/数字孪生/行为采集模块

## 安全修复 (12项)
- Webhook接口添加全局Token认证,过滤敏感请求头
- 修复JWT Base64 padding公式,防止签名验证绕过
- 数据库密码/飞书Token从源码移除,改为环境变量
- 工作流引擎添加路径遍历防护 (_resolve_safe_path)
- eval()添加模板长度上限检查
- 审批API添加认证依赖
- 前端v-html增强XSS转义,console.log仅开发模式输出
- 500错误不再暴露内部异常详情

## Agent运行时修复 (7项)
- 删除_inject_knowledge_context中未定义db变量的finally块
- 工具执行添加try/except保护,异常不崩溃Agent
- LLM重试计入budget计数器
- self_review异常时passed=False
- max_iterations截断标记success=False
- 工具参数JSON解析失败时记录警告日志
- run()开始时重置_llm_invocations计数器

## 配置与基础设施
- DEBUG默认False,SQL_ECHO独立配置项
- init_db()补全13个缺失模型导入
- 新增WEBHOOK_AUTH_TOKEN/SQL_ECHO配置项
- 新增.env.example模板文件

## 前端修复 (12项)
- 登录改用URLSearchParams替代FormData
- 401拦截器通过Pinia store统一清理状态
- SSE流超时从60s延长至300s
- final/error事件时清除streamTimeout
- localStorage聊天记录添加24h TTL
- safeParseArgCount替代模板中裸JSON.parse
- fetchUser 401时同时清除user对象

## 新增模块
- 知识进化: knowledge_extractor/retriever/tasks
- 数字孪生: shadow_executor/comparison模型
- 行为采集: behavior_middleware/collector/fingerprint_engine
- 代码审查: code_review_agent/document_review_agent
- 反馈学习: feedback_learner
- 瓶颈检测/优化引擎/成本估算/需求估算
- 速率限制器 (rate_limiter)
- Alembic迁移 015-020

## 文档
- 商业化落地计划
- 8篇docs文档 (架构/API/部署/开发/贡献等)
- Docker Compose生产配置

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-05-10 19:50:20 +08:00
parent f79dc0b3c6
commit ab1589921a
77 changed files with 9442 additions and 265 deletions

View File

@@ -8,6 +8,7 @@ from pydantic import BaseModel
from typing import Optional, Dict, Any
import logging
from app.core.database import get_db
from app.core.config import settings
from app.models.workflow import Workflow
from app.models.execution import Execution
from app.tasks.workflow_tasks import execute_workflow_task
@@ -17,6 +18,35 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/webhooks", tags=["webhooks"])
# 敏感 HTTP 头黑名单(禁止捕获存储)
_EXCLUDED_HEADERS = {
'host', 'content-length', 'connection', 'user-agent',
'authorization', 'cookie', 'set-cookie', 'x-forwarded-for',
'x-real-ip', 'x-auth-token', 'x-api-key',
}
# Webhook 专用安全头白名单(仅捕获这些头)
_ALLOWED_WEBHOOK_HEADERS_PREFIXES = ('x-', 'webhook-', 'gitlab-', 'github-')
def _verify_webhook_token(x_webhook_token: Optional[str]) -> None:
"""验证 Webhook Token。若配置了 WEBHOOK_AUTH_TOKEN 则必须匹配。"""
expected = settings.WEBHOOK_AUTH_TOKEN
if expected and (not x_webhook_token or x_webhook_token != expected):
raise HTTPException(status_code=401, detail="Webhook token 认证失败")
def _filter_webhook_headers(raw_headers: Dict[str, str]) -> Dict[str, str]:
"""过滤 webhook 请求头:只保留安全相关的业务头。"""
filtered = {}
for key, value in raw_headers.items():
lower = key.lower()
if lower in _EXCLUDED_HEADERS:
continue
if any(lower.startswith(p) for p in _ALLOWED_WEBHOOK_HEADERS_PREFIXES):
filtered[key] = value
return filtered
class WebhookTriggerRequest(BaseModel):
"""Webhook触发请求模型"""
@@ -47,35 +77,34 @@ async def trigger_workflow_by_webhook(
db: 数据库会话
"""
try:
# 验证 Webhook Token
_verify_webhook_token(x_webhook_token)
# 查找工作流
workflow = db.query(Workflow).filter(Workflow.id == workflow_id).first()
if not workflow:
raise HTTPException(status_code=404, detail="工作流不存在")
# 检查工作流状态
if workflow.status not in ['published', 'running']:
raise HTTPException(
status_code=400,
status_code=400,
detail=f"工作流状态为 {workflow.status}无法通过Webhook触发"
)
# 获取请求数据
try:
body_data = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
except:
except Exception:
body_data = {}
# 获取查询参数
query_params = dict(request.query_params)
# 获取请求头(排除一些系统头)
headers = {}
excluded_headers = ['host', 'content-length', 'connection', 'user-agent']
for key, value in request.headers.items():
if key.lower() not in excluded_headers:
headers[key] = value
# 获取请求头(仅保留安全的业务头)
headers = _filter_webhook_headers(dict(request.headers))
# 构建输入数据:合并查询参数、请求体和请求头
input_data = {
**query_params,
@@ -88,7 +117,7 @@ async def trigger_workflow_by_webhook(
'path': str(request.url.path)
}
}
# 创建执行记录
execution = Execution(
workflow_id=workflow_id,
@@ -98,7 +127,7 @@ async def trigger_workflow_by_webhook(
db.add(execution)
db.commit()
db.refresh(execution)
# 异步执行工作流
workflow_data = {
'nodes': workflow.nodes,
@@ -110,24 +139,24 @@ async def trigger_workflow_by_webhook(
workflow_data,
input_data
)
# 更新执行记录的task_id
execution.task_id = task.id
db.commit()
db.refresh(execution)
return {
"status": "success",
"message": "工作流已触发执行",
"execution_id": str(execution.id),
"task_id": task.id
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Webhook触发工作流失败: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=f"触发工作流失败: {str(e)}")
logger.error("Webhook触发工作流失败: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail="触发工作流失败")
@router.post("/trigger/by-name/{workflow_name}")
@@ -149,31 +178,30 @@ async def trigger_workflow_by_name(
db: 数据库会话
"""
try:
# 验证 Webhook Token
_verify_webhook_token(x_webhook_token)
# 查找工作流按名称且状态为published或running
workflow = db.query(Workflow).filter(
Workflow.name == workflow_name,
Workflow.status.in_(['published', 'running'])
).first()
if not workflow:
raise HTTPException(status_code=404, detail=f"未找到名称为 '{workflow_name}' 的已发布工作流")
# 获取请求数据
try:
body_data = await request.json() if request.headers.get("content-type", "").startswith("application/json") else {}
except:
except Exception:
body_data = {}
# 获取查询参数
query_params = dict(request.query_params)
# 获取请求头
headers = {}
excluded_headers = ['host', 'content-length', 'connection', 'user-agent']
for key, value in request.headers.items():
if key.lower() not in excluded_headers:
headers[key] = value
# 获取请求头(仅保留安全的业务头)
headers = _filter_webhook_headers(dict(request.headers))
# 构建输入数据
input_data = {
**query_params,
@@ -186,7 +214,7 @@ async def trigger_workflow_by_name(
'path': str(request.url.path)
}
}
# 创建执行记录
execution = Execution(
workflow_id=workflow.id,
@@ -196,7 +224,7 @@ async def trigger_workflow_by_name(
db.add(execution)
db.commit()
db.refresh(execution)
# 异步执行工作流
workflow_data = {
'nodes': workflow.nodes,
@@ -208,12 +236,12 @@ async def trigger_workflow_by_name(
workflow_data,
input_data
)
# 更新执行记录的task_id
execution.task_id = task.id
db.commit()
db.refresh(execution)
return {
"status": "success",
"message": "工作流已触发执行",
@@ -221,9 +249,9 @@ async def trigger_workflow_by_name(
"task_id": task.id,
"workflow_id": workflow.id
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Webhook触发工作流失败: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=f"触发工作流失败: {str(e)}")
logger.error("Webhook触发工作流失败: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail="触发工作流失败")