Files
aiagent/backend/app/services/knowledge_retriever.py
renjianbo ab1589921a fix: 修复35个安全与功能缺陷,补全知识进化/数字孪生/行为采集模块
## 安全修复 (12项)
- Webhook接口添加全局Token认证,过滤敏感请求头
- 修复JWT Base64 padding公式,防止签名验证绕过
- 数据库密码/飞书Token从源码移除,改为环境变量
- 工作流引擎添加路径遍历防护 (_resolve_safe_path)
- eval()添加模板长度上限检查
- 审批API添加认证依赖
- 前端v-html增强XSS转义,console.log仅开发模式输出
- 500错误不再暴露内部异常详情

## Agent运行时修复 (7项)
- 删除_inject_knowledge_context中未定义db变量的finally块
- 工具执行添加try/except保护,异常不崩溃Agent
- LLM重试计入budget计数器
- self_review异常时passed=False
- max_iterations截断标记success=False
- 工具参数JSON解析失败时记录警告日志
- run()开始时重置_llm_invocations计数器

## 配置与基础设施
- DEBUG默认False,SQL_ECHO独立配置项
- init_db()补全13个缺失模型导入
- 新增WEBHOOK_AUTH_TOKEN/SQL_ECHO配置项
- 新增.env.example模板文件

## 前端修复 (12项)
- 登录改用URLSearchParams替代FormData
- 401拦截器通过Pinia store统一清理状态
- SSE流超时从60s延长至300s
- final/error事件时清除streamTimeout
- localStorage聊天记录添加24h TTL
- safeParseArgCount替代模板中裸JSON.parse
- fetchUser 401时同时清除user对象

## 新增模块
- 知识进化: knowledge_extractor/retriever/tasks
- 数字孪生: shadow_executor/comparison模型
- 行为采集: behavior_middleware/collector/fingerprint_engine
- 代码审查: code_review_agent/document_review_agent
- 反馈学习: feedback_learner
- 瓶颈检测/优化引擎/成本估算/需求估算
- 速率限制器 (rate_limiter)
- Alembic迁移 015-020

## 文档
- 商业化落地计划
- 8篇docs文档 (架构/API/部署/开发/贡献等)
- Docker Compose生产配置

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-10 19:50:20 +08:00

116 lines
4.0 KiB
Python

"""
知识检索服务 — 从知识库中语义检索相关经验,注入到 Agent 执行上下文
"""
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional
from sqlalchemy.orm import Session
from sqlalchemy import desc, func
from app.core.database import SessionLocal
logger = logging.getLogger(__name__)
class KnowledgeRetriever:
"""知识检索器 — 执行前从知识库检索相关经验"""
def __init__(self, top_k: int = 3, min_score: float = 0.3):
self.top_k = top_k
self.min_score = min_score
def retrieve(self, query: str, category: Optional[str] = None) -> List[Dict[str, Any]]:
"""基于关键词+标签检索相关知识条目(后续可升级为 embedding 语义检索)。"""
from app.models.knowledge_entry import KnowledgeEntry
db: Optional[Session] = None
try:
db = SessionLocal()
q = db.query(KnowledgeEntry).filter(
KnowledgeEntry.is_active == True,
)
if category:
q = q.filter(KnowledgeEntry.category == category)
# 关键词匹配:在 title / situation / solution / tags 中搜索
keywords = [w for w in query[:100].split() if len(w) >= 2]
if keywords:
conditions = []
for kw in keywords[:10]:
like_pat = f"%{kw}%"
conditions.append(KnowledgeEntry.title.like(like_pat))
conditions.append(KnowledgeEntry.situation.like(like_pat))
conditions.append(KnowledgeEntry.solution.like(like_pat))
from sqlalchemy import or_
q = q.filter(or_(*conditions))
entries = (
q.order_by(desc(KnowledgeEntry.confidence), desc(KnowledgeEntry.retrieval_count))
.limit(self.top_k)
.all()
)
results = []
for e in entries:
# 更新检索计数
e.retrieval_count = (e.retrieval_count or 0) + 1
results.append({
"id": str(e.id),
"title": e.title,
"category": e.category,
"tags": e.tags or [],
"situation": e.situation,
"solution": e.solution,
"caveats": e.caveats,
"confidence": e.confidence,
})
db.commit()
return results
except Exception as e:
logger.warning("知识检索失败: %s", e)
if db:
try:
db.rollback()
except Exception:
pass
return []
finally:
if db:
try:
db.close()
except Exception:
pass
def format_for_prompt(self, entries: List[Dict[str, Any]]) -> str:
"""将检索到的知识条目格式化为 Prompt 注入文本。"""
if not entries:
return ""
parts = ["\n## 相关知识库经验(请参考以下经验来更好地完成任务)\n"]
for i, e in enumerate(entries, 1):
parts.append(f"### {i}. {e['title']} (置信度: {e['confidence']:.0%})")
if e.get("situation"):
parts.append(f"**适用场景**: {e['situation']}")
if e.get("solution"):
parts.append(f"**方案**: {e['solution']}")
if e.get("caveats"):
parts.append(f"**注意**: {e['caveats']}")
parts.append("")
return "\n".join(parts)
def inject_knowledge(self, system_prompt: str, user_input: str) -> str:
"""检索相关知识并注入到 system prompt 中。"""
entries = self.retrieve(user_input)
knowledge_text = self.format_for_prompt(entries)
if knowledge_text:
return system_prompt + knowledge_text
return system_prompt
knowledge_retriever = KnowledgeRetriever()