fix: 修复35个安全与功能缺陷,补全知识进化/数字孪生/行为采集模块

## 安全修复 (12项)
- Webhook接口添加全局Token认证,过滤敏感请求头
- 修复JWT Base64 padding公式,防止签名验证绕过
- 数据库密码/飞书Token从源码移除,改为环境变量
- 工作流引擎添加路径遍历防护 (_resolve_safe_path)
- eval()添加模板长度上限检查
- 审批API添加认证依赖
- 前端v-html增强XSS转义,console.log仅开发模式输出
- 500错误不再暴露内部异常详情

## Agent运行时修复 (7项)
- 删除_inject_knowledge_context中未定义db变量的finally块
- 工具执行添加try/except保护,异常不崩溃Agent
- LLM重试计入budget计数器
- self_review异常时passed=False
- max_iterations截断标记success=False
- 工具参数JSON解析失败时记录警告日志
- run()开始时重置_llm_invocations计数器

## 配置与基础设施
- DEBUG默认False,SQL_ECHO独立配置项
- init_db()补全13个缺失模型导入
- 新增WEBHOOK_AUTH_TOKEN/SQL_ECHO配置项
- 新增.env.example模板文件

## 前端修复 (12项)
- 登录改用URLSearchParams替代FormData
- 401拦截器通过Pinia store统一清理状态
- SSE流超时从60s延长至300s
- final/error事件时清除streamTimeout
- localStorage聊天记录添加24h TTL
- safeParseArgCount替代模板中裸JSON.parse
- fetchUser 401时同时清除user对象

## 新增模块
- 知识进化: knowledge_extractor/retriever/tasks
- 数字孪生: shadow_executor/comparison模型
- 行为采集: behavior_middleware/collector/fingerprint_engine
- 代码审查: code_review_agent/document_review_agent
- 反馈学习: feedback_learner
- 瓶颈检测/优化引擎/成本估算/需求估算
- 速率限制器 (rate_limiter)
- Alembic迁移 015-020

## 文档
- 商业化落地计划
- 8篇docs文档 (架构/API/部署/开发/贡献等)
- Docker Compose生产配置

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-05-10 19:50:20 +08:00
parent f79dc0b3c6
commit ab1589921a
77 changed files with 9442 additions and 265 deletions

View File

@@ -0,0 +1,229 @@
"""
知识提取器 - 从 Agent 执行日志中用 LLM 提取可复用知识
"""
from __future__ import annotations
import json
import logging
from typing import Any, Dict, List, Optional
from sqlalchemy import desc
from app.core.database import SessionLocal
logger = logging.getLogger(__name__)
KNOWLEDGE_EXTRACTION_PROMPT = """你是一个知识工程专家。请从以下 Agent 执行记录中提取可复用的经验知识。
执行输入: {input_text}
执行输出: {output_text}
是否成功: {success}
工具调用链: {tool_chain}
迭代次数: {iterations_used}
工具调用总次数: {tool_calls_made}
请提取为以下 JSON 格式(只输出 JSON不要其他文本
{{
"title": "一句话标题处理MySQL死锁的重试策略",
"category": "bug_fix / best_practice / workaround / optimization / insight",
"tags": ["tag1", "tag2"],
"situation": "什么场景下适用",
"solution": "具体解决方案或操作步骤",
"caveats": "注意事项或已知限制",
"confidence": 0.0-1.0
}}
如果这条执行记录没有值得沉淀的知识,返回:
{{"skip": true, "reason": "原因"}}
"""
class KnowledgeExtractor:
"""从执行日志中提取知识(使用 LLM"""
def __init__(self, llm_model: str = "deepseek-v4-flash"):
self.llm_model = llm_model
def _sync_llm_call(self, prompt: str) -> str:
"""同步调用 LLM。"""
from app.agent_runtime.core import _LLMClient
from app.agent_runtime.schemas import AgentLLMConfig
import asyncio as aio
client = _LLMClient(AgentLLMConfig(
provider="deepseek",
model=self.llm_model,
temperature=0.3,
max_iterations=1,
))
result = aio.run(client.chat(
messages=[{"role": "user", "content": prompt}],
tools=None,
iteration=1,
))
content = result.get("content", "") if isinstance(result, dict) else str(result)
return content
def extract_from_execution(self, log: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""从单条执行日志提取知识(同步)。"""
input_text = log.get("input_text", "") or ""
output_text = log.get("output_text", "") or ""
success = log.get("success", False)
tool_chain = log.get("tool_chain") or []
tool_calls_made = log.get("tool_calls_made", 0)
iterations_used = log.get("iterations_used", 0)
if len(input_text) < 20 or len(output_text) < 50:
return None
if not success and not tool_chain:
return None
tool_chain_str = json.dumps(tool_chain[:5], ensure_ascii=False) if tool_chain else ""
prompt = KNOWLEDGE_EXTRACTION_PROMPT.format(
input_text=input_text[:2000],
output_text=output_text[:2000],
success="" if success else "",
tool_chain=tool_chain_str,
iterations_used=iterations_used,
tool_calls_made=tool_calls_made,
)
try:
llm_output = self._sync_llm_call(prompt)
except Exception as e:
logger.warning("知识提取 LLM 调用失败: %s", e)
return None
try:
json_start = llm_output.find("{")
json_end = llm_output.rfind("}") + 1
if json_start >= 0 and json_end > json_start:
data = json.loads(llm_output[json_start:json_end])
else:
return None
except json.JSONDecodeError:
logger.warning("知识提取 JSON 解析失败: %s", llm_output[:200])
return None
if data.get("skip"):
return None
confidence = float(data.get("confidence", 0.5))
if confidence < 0.3:
return None
return {
"title": data.get("title", "未命名知识"),
"category": data.get("category", "insight"),
"tags": data.get("tags", []),
"situation": data.get("situation", ""),
"solution": data.get("solution", ""),
"caveats": data.get("caveats", ""),
"confidence": confidence,
}
def run_extraction_pipeline(self, limit: int = 10) -> int:
"""运行提取管道:从未处理的执行日志中提取知识。"""
from app.models.agent_execution_log import AgentExecutionLog
from app.models.knowledge_entry import KnowledgeEntry
db = None
extracted = 0
try:
db = SessionLocal()
logs = (
db.query(AgentExecutionLog)
.filter(
AgentExecutionLog.success == True,
AgentExecutionLog.knowledge_extracted == False,
AgentExecutionLog.output_text.isnot(None),
)
.order_by(desc(AgentExecutionLog.created_at))
.limit(limit)
.all()
)
if not logs:
logger.info("知识提取管道: 没有待处理的新日志")
return 0
logger.info("知识提取管道: 找到 %d 条待处理日志", len(logs))
for log_entry in logs:
log_data = {
"input_text": log_entry.input_text,
"output_text": log_entry.output_text,
"success": log_entry.success,
"tool_chain": log_entry.tool_chain,
"iterations_used": log_entry.iterations_used,
"tool_calls_made": log_entry.tool_calls_made,
}
knowledge = self.extract_from_execution(log_data)
if not knowledge:
log_entry.knowledge_extracted = True
db.commit()
continue
existing = (
db.query(KnowledgeEntry)
.filter(KnowledgeEntry.title == knowledge["title"])
.first()
)
if existing:
log_entry.knowledge_extracted = True
db.commit()
continue
embedding_text = (
knowledge["title"] + "\n" +
knowledge["situation"] + "\n" +
knowledge["solution"] + "\n" +
knowledge["caveats"]
)
entry = KnowledgeEntry(
title=knowledge["title"],
category=knowledge["category"],
tags=knowledge["tags"],
situation=knowledge["situation"],
solution=knowledge["solution"],
caveats=knowledge["caveats"],
source_execution_ids=[str(log_entry.id)],
source_agent_name=log_entry.agent_name,
source_model=log_entry.model,
embedding_text=embedding_text,
extracted_by="llm_auto",
confidence=knowledge["confidence"],
)
db.add(entry)
log_entry.knowledge_extracted = True
db.commit()
extracted += 1
logger.info(
"知识提取: %s -> %s (conf=%.2f)",
log_entry.agent_name, knowledge["title"], knowledge["confidence"],
)
return extracted
except Exception as e:
logger.error("知识提取管道异常: %s", e)
if db:
try:
db.rollback()
except Exception:
pass
return 0
finally:
if db:
try:
db.close()
except Exception:
pass
knowledge_extractor = KnowledgeExtractor()