Files
aiagent/backend/app/services/behavior_collector.py
renjianbo ab1589921a fix: 修复35个安全与功能缺陷,补全知识进化/数字孪生/行为采集模块
## 安全修复 (12项)
- Webhook接口添加全局Token认证,过滤敏感请求头
- 修复JWT Base64 padding公式,防止签名验证绕过
- 数据库密码/飞书Token从源码移除,改为环境变量
- 工作流引擎添加路径遍历防护 (_resolve_safe_path)
- eval()添加模板长度上限检查
- 审批API添加认证依赖
- 前端v-html增强XSS转义,console.log仅开发模式输出
- 500错误不再暴露内部异常详情

## Agent运行时修复 (7项)
- 删除_inject_knowledge_context中未定义db变量的finally块
- 工具执行添加try/except保护,异常不崩溃Agent
- LLM重试计入budget计数器
- self_review异常时passed=False
- max_iterations截断标记success=False
- 工具参数JSON解析失败时记录警告日志
- run()开始时重置_llm_invocations计数器

## 配置与基础设施
- DEBUG默认False,SQL_ECHO独立配置项
- init_db()补全13个缺失模型导入
- 新增WEBHOOK_AUTH_TOKEN/SQL_ECHO配置项
- 新增.env.example模板文件

## 前端修复 (12项)
- 登录改用URLSearchParams替代FormData
- 401拦截器通过Pinia store统一清理状态
- SSE流超时从60s延长至300s
- final/error事件时清除streamTimeout
- localStorage聊天记录添加24h TTL
- safeParseArgCount替代模板中裸JSON.parse
- fetchUser 401时同时清除user对象

## 新增模块
- 知识进化: knowledge_extractor/retriever/tasks
- 数字孪生: shadow_executor/comparison模型
- 行为采集: behavior_middleware/collector/fingerprint_engine
- 代码审查: code_review_agent/document_review_agent
- 反馈学习: feedback_learner
- 瓶颈检测/优化引擎/成本估算/需求估算
- 速率限制器 (rate_limiter)
- Alembic迁移 015-020

## 文档
- 商业化落地计划
- 8篇docs文档 (架构/API/部署/开发/贡献等)
- Docker Compose生产配置

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-10 19:50:20 +08:00

155 lines
4.8 KiB
Python

"""
用户行为采集服务 — 非侵入式记录用户操作,用于数字分身学习
"""
from __future__ import annotations
import asyncio
import logging
from typing import Any, Dict, List, Optional
from sqlalchemy.orm import Session
from sqlalchemy import desc
from app.core.database import SessionLocal
from app.models.user_behavior import UserBehaviorLog, BehaviorCategory
logger = logging.getLogger(__name__)
class BehaviorCollector:
"""用户行为采集器(单例)"""
def log_sync(
self,
*,
user_id: str,
category: str,
action: str,
context: Optional[Dict[str, Any]] = None,
result: Optional[Dict[str, Any]] = None,
source: str = "api",
session_id: Optional[str] = None,
ip_address: Optional[str] = None,
user_agent: Optional[str] = None,
) -> Optional[str]:
"""同步写入行为日志,返回日志 ID。"""
db: Optional[Session] = None
try:
db = SessionLocal()
entry = UserBehaviorLog(
user_id=user_id,
category=category,
action=action,
context=context,
result=result,
source=source,
session_id=session_id,
ip_address=ip_address,
user_agent=user_agent,
)
db.add(entry)
db.commit()
db.refresh(entry)
return str(entry.id)
except Exception as e:
logger.warning("写入用户行为日志失败: %s", e)
if db:
try:
db.rollback()
except Exception:
pass
return None
finally:
if db:
try:
db.close()
except Exception:
pass
async def log(self, **kwargs) -> Optional[str]:
"""异步写入(线程池)。"""
loop = asyncio.get_running_loop()
return await loop.run_in_executor(None, lambda: self.log_sync(**kwargs))
def log_fire_and_forget(self, **kwargs):
"""Fire-and-forget 写入。"""
try:
asyncio.ensure_future(self.log(**kwargs))
except Exception:
pass
# ─── 查询方法 ───
def get_user_behaviors(
self,
user_id: str,
category: Optional[str] = None,
limit: int = 50,
skip: int = 0,
) -> List[Dict[str, Any]]:
"""获取用户行为历史。"""
db: Optional[Session] = None
try:
db = SessionLocal()
q = db.query(UserBehaviorLog).filter(UserBehaviorLog.user_id == user_id)
if category:
q = q.filter(UserBehaviorLog.category == category)
q = q.order_by(desc(UserBehaviorLog.created_at)).offset(skip).limit(limit)
rows = q.all()
return [
{
"id": r.id,
"category": r.category,
"action": r.action,
"context": r.context,
"result": r.result,
"source": r.source,
"created_at": r.created_at.isoformat() if r.created_at else None,
}
for r in rows
]
except Exception as e:
logger.warning("查询用户行为日志失败: %s", e)
return []
finally:
if db:
try:
db.close()
except Exception:
pass
def get_behavior_stats(self, user_id: str) -> Dict[str, Any]:
"""获取用户行为统计摘要。"""
db: Optional[Session] = None
try:
db = SessionLocal()
from sqlalchemy import func
total = db.query(func.count(UserBehaviorLog.id)).filter(
UserBehaviorLog.user_id == user_id
).scalar() or 0
by_category = {}
for cat in BehaviorCategory:
count = db.query(func.count(UserBehaviorLog.id)).filter(
UserBehaviorLog.user_id == user_id,
UserBehaviorLog.category == cat.value,
).scalar() or 0
by_category[cat.value] = count
return {
"user_id": user_id,
"total_behaviors": total,
"by_category": by_category,
}
except Exception as e:
logger.warning("查询行为统计失败: %s", e)
return {"user_id": user_id, "total_behaviors": 0, "by_category": {}}
finally:
if db:
try:
db.close()
except Exception:
pass
# 全局单例
behavior_collector = BehaviorCollector()