Files
aiagent/backend/app/services/tool_discovery.py
renjianbo ab1589921a fix: 修复35个安全与功能缺陷,补全知识进化/数字孪生/行为采集模块
## 安全修复 (12项)
- Webhook接口添加全局Token认证,过滤敏感请求头
- 修复JWT Base64 padding公式,防止签名验证绕过
- 数据库密码/飞书Token从源码移除,改为环境变量
- 工作流引擎添加路径遍历防护 (_resolve_safe_path)
- eval()添加模板长度上限检查
- 审批API添加认证依赖
- 前端v-html增强XSS转义,console.log仅开发模式输出
- 500错误不再暴露内部异常详情

## Agent运行时修复 (7项)
- 删除_inject_knowledge_context中未定义db变量的finally块
- 工具执行添加try/except保护,异常不崩溃Agent
- LLM重试计入budget计数器
- self_review异常时passed=False
- max_iterations截断标记success=False
- 工具参数JSON解析失败时记录警告日志
- run()开始时重置_llm_invocations计数器

## 配置与基础设施
- DEBUG默认False,SQL_ECHO独立配置项
- init_db()补全13个缺失模型导入
- 新增WEBHOOK_AUTH_TOKEN/SQL_ECHO配置项
- 新增.env.example模板文件

## 前端修复 (12项)
- 登录改用URLSearchParams替代FormData
- 401拦截器通过Pinia store统一清理状态
- SSE流超时从60s延长至300s
- final/error事件时清除streamTimeout
- localStorage聊天记录添加24h TTL
- safeParseArgCount替代模板中裸JSON.parse
- fetchUser 401时同时清除user对象

## 新增模块
- 知识进化: knowledge_extractor/retriever/tasks
- 数字孪生: shadow_executor/comparison模型
- 行为采集: behavior_middleware/collector/fingerprint_engine
- 代码审查: code_review_agent/document_review_agent
- 反馈学习: feedback_learner
- 瓶颈检测/优化引擎/成本估算/需求估算
- 速率限制器 (rate_limiter)
- Alembic迁移 015-020

## 文档
- 商业化落地计划
- 8篇docs文档 (架构/API/部署/开发/贡献等)
- Docker Compose生产配置

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-10 19:50:20 +08:00

273 lines
9.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
新工具自动发现与集成 — 扫描内外工具源,自动评估和生成集成方案
"""
from __future__ import annotations
import logging
import os
import json
from typing import Any, Dict, List, Optional
from app.services.llm_service import llm_service
logger = logging.getLogger(__name__)
TOOL_DISCOVERY_PROMPT = """你是一个工具集成分析专家。分析以下新工具是否适合集成到 AI Agent 平台。
当前平台能力:
- Agent 工作流编排 (DAG)
- LLM 调用 (多模型支持)
- API 调用节点
- 代码执行节点 (Python/JS)
- 数据库查询节点
- 条件分支节点
- 通知节点 (飞书/Email)
新工具信息:
{tool_info}
请分析:
1. 匹配度 (0-1): 该工具与平台现有能力的互补程度
2. 适用场景: 什么情况下 Agent 需要使用此工具
3. 集成复杂度: low/medium/high
4. 建议的 adapter 类型: api_wrapper / code_executor / plugin
5. 集成方案: 简要描述如何接入
6. 潜在风险: 使用此工具需要注意的问题
返回 JSON 格式:
{{"match_score": 0.8, "scenarios": ["场景1"], "complexity": "medium", "adapter_type": "api_wrapper", "integration_plan": "方案描述", "risks": ["风险1"]}}
"""
class ToolDiscovery:
"""新工具自动发现与集成"""
def __init__(self):
self._external_sources = [
{"name": "github_trending", "url": "https://github.com/trending/python?since=weekly"},
{"name": "mcp_marketplace", "url": "https://github.com/modelcontextprotocol/servers"},
]
def scan_internal_tools(self, tools_dir: str = "") -> List[Dict[str, Any]]:
"""扫描平台内部 tools 目录,发现未注册的工具。"""
if not tools_dir:
base = os.path.dirname(os.path.dirname(__file__))
tools_dir = os.path.join(base, "tools")
if not os.path.isdir(tools_dir):
tools_dir = os.path.join(os.path.dirname(base), "tools")
discovered = []
if not os.path.isdir(tools_dir):
return discovered
from app.services.tool_registry import tool_registry
for filename in os.listdir(tools_dir):
if filename.startswith("_") or filename.startswith("__"):
continue
if filename.endswith(".py") and filename != "__init__.py":
tool_name = filename[:-3]
registered = tool_registry.get(tool_name) if hasattr(tool_registry, 'get') else None
discovered.append({
"tool_name": tool_name,
"source": "internal",
"registered": registered is not None,
"file_path": os.path.join(tools_dir, filename),
})
return discovered
def scan_external_sources(self) -> List[Dict[str, Any]]:
"""扫描外部工具源GitHub trending, MCP marketplace"""
discovered = []
for source in self._external_sources:
try:
import urllib.request
req = urllib.request.Request(source["url"], headers={"User-Agent": "AI-Agent-Platform"})
# 只记录源信息,实际爬取在 evaluate_and_rank 中按需进行
discovered.append({
"source_name": source["name"],
"url": source["url"],
"status": "available",
})
except Exception as e:
logger.warning("外部源 %s 不可用: %s", source["name"], e)
discovered.append({
"source_name": source["name"],
"url": source["url"],
"status": "unavailable",
"error": str(e),
})
return discovered
def evaluate_tool(self, tool_name: str, tool_description: str = "",
tool_source: str = "", tool_docs: str = "") -> Dict[str, Any]:
"""评估单个工具的集成价值(使用 LLM"""
tool_info = f"""
工具名称: {tool_name}
来源: {tool_source}
描述: {tool_description}
文档/代码摘要: {tool_docs[:3000]}
"""
prompt = TOOL_DISCOVERY_PROMPT.format(tool_info=tool_info)
result = {
"tool_name": tool_name,
"match_score": 0.0,
"scenarios": [],
"complexity": "unknown",
"adapter_type": "unknown",
"integration_plan": "",
"risks": [],
}
try:
response = llm_service.chat_sync(
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=1000,
)
content = response.get("content", "") if isinstance(response, dict) else str(response)
# 提取 JSON
json_match = self._extract_json(content)
if json_match:
evaluation = json.loads(json_match)
result.update(evaluation)
result["llm_raw"] = content[:500]
except Exception as e:
logger.error("LLM 评估工具 %s 失败: %s", tool_name, e)
result["error"] = str(e)
return result
def _extract_json(self, text: str) -> Optional[str]:
"""从文本中提取 JSON 块。"""
import re
# 尝试提取 ```json ... ``` 块
match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
if match:
return match.group(1)
# 尝试直接找 {...}
match = re.search(r'\{[^{}]*"match_score"[^{}]*\}', text, re.DOTALL)
if match:
return match.group(0)
return None
def discover_and_rank(self, limit: int = 10) -> Dict[str, Any]:
"""完整的发现+评估+排序流程。"""
internal = self.scan_internal_tools()
# 聚焦未注册的工具
unregistered = [t for t in internal if not t["registered"]]
evaluations = []
for tool in unregistered[:limit]:
eval_result = self.evaluate_tool(
tool_name=tool["tool_name"],
tool_source="internal",
)
evaluations.append(eval_result)
evaluations.sort(key=lambda x: x.get("match_score", 0), reverse=True)
high_match = [e for e in evaluations if e.get("match_score", 0) >= 0.8]
medium_match = [e for e in evaluations if 0.5 <= e.get("match_score", 0) < 0.8]
return {
"total_discovered": len(internal),
"unregistered": len(unregistered),
"evaluated": len(evaluations),
"high_match": high_match,
"medium_match": medium_match,
"all_ranked": evaluations,
"recommendation": (
f"发现 {len(high_match)} 个高匹配工具建议立即集成, "
f"{len(medium_match)} 个中匹配工具可进一步评估"
),
}
def generate_adapter(self, tool_name: str, evaluation: Dict[str, Any]) -> str:
"""根据评估结果生成 tool adapter 代码框架。"""
adapter_type = evaluation.get("adapter_type", "api_wrapper")
if adapter_type == "api_wrapper":
return self._generate_api_adapter(tool_name, evaluation)
elif adapter_type == "code_executor":
return self._generate_code_adapter(tool_name, evaluation)
else:
return self._generate_plugin_adapter(tool_name, evaluation)
def _generate_api_adapter(self, tool_name: str, eval_result: Dict[str, Any]) -> str:
"""生成 API 包装器 adapter。"""
return f'''"""
{tool_name} — 自动发现的工具适配器
匹配度: {eval_result.get("match_score", "N/A")}
场景: {", ".join(eval_result.get("scenarios", []))}
"""
from typing import Any, Dict, Optional
async def {tool_name}(params: Dict[str, Any]) -> Dict[str, Any]:
"""Auto-generated adapter for {tool_name}"""
try:
# TODO: 根据 API 文档实现具体调用逻辑
result = {{
"success": True,
"data": None,
"message": "Adapter stub — 请根据 API 文档完善",
}}
return result
except Exception as e:
return {{"success": False, "error": str(e)}}
'''
def _generate_code_adapter(self, tool_name: str, eval_result: Dict[str, Any]) -> str:
return f'''"""
{tool_name} — 代码执行器适配器
匹配度: {eval_result.get("match_score", "N/A")}
"""
import subprocess
from typing import Any, Dict
async def {tool_name}(code: str, language: str = "python") -> Dict[str, Any]:
"""Auto-generated code executor adapter for {tool_name}"""
try:
executor = "python" if language == "python" else "node"
result = subprocess.run(
[executor, "-c", code],
capture_output=True, text=True, timeout=30
)
return {{
"success": result.returncode == 0,
"stdout": result.stdout,
"stderr": result.stderr,
}}
except Exception as e:
return {{"success": False, "error": str(e)}}
'''
def _generate_plugin_adapter(self, tool_name: str, eval_result: Dict[str, Any]) -> str:
return f'''"""
{tool_name} — 插件适配器
匹配度: {eval_result.get("match_score", "N/A")}
"""
from typing import Any, Dict
class {tool_name.title().replace("_", "")}Plugin:
"""Auto-generated plugin adapter for {tool_name}"""
def __init__(self):
self.name = "{tool_name}"
async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""执行插件逻辑"""
# TODO: 根据文档实现
return {{"success": True, "data": None}}
'''
tool_discovery = ToolDiscovery()