Files
aiagent/backend/app/services/builtin_tools.py

2318 lines
82 KiB
Python
Raw Normal View History

2026-01-23 09:49:45 +08:00
"""
内置工具实现
"""
2026-03-06 22:31:41 +08:00
from typing import Dict, Any, Optional, List, Tuple
from pathlib import Path
2026-01-23 09:49:45 +08:00
import httpx
import json
import os
import logging
import re
import math
from datetime import datetime, timedelta
import platform
import sys
2026-03-06 22:31:41 +08:00
import asyncio
import subprocess
import shlex
2026-03-06 22:31:41 +08:00
from sqlalchemy import text
from sqlalchemy.exc import SQLAlchemyError
from app.core.config import settings
2026-01-23 09:49:45 +08:00
logger = logging.getLogger(__name__)
def _local_file_workspace_root() -> Path:
"""file_read/file_write 允许操作的根目录(解析后路径)。"""
raw = (getattr(settings, "LOCAL_FILE_TOOLS_ROOT", None) or "").strip()
if raw:
return Path(raw).expanduser().resolve()
# backend/app/services/builtin_tools.py -> 上级 x4 = 仓库根(与历史上 join(services_dir, '../../..') 一致)
return Path(__file__).resolve().parent.parent.parent.parent
def _sanitize_tool_path_string(file_path: str) -> str:
"""
去掉 LLM/DSML/Markdown 泄漏到路径末尾的非法字符 123.md<引号避免 WinError 123
"""
if not isinstance(file_path, str):
return str(file_path)
s = file_path.replace("\ufeff", "").strip()
s = s.strip('"').strip("'")
# 行尾非法Windows 文件名不能含 <>:"|?*;模型常把半角/全角尖括号粘在扩展名后
tail_bad = '<>"|?*'
while s and s[-1] in tail_bad:
s = s[:-1].rstrip()
# 若仍含未配对尖括号片段在行尾(如 xxx.md<
s = re.sub(r'[<][|]?\s*$', "", s).rstrip()
return s
def _resolve_path_under_workspace(file_path: str) -> Tuple[Optional[Path], Optional[str]]:
"""将用户给定路径解析为绝对路径,且必须位于工作区内。"""
file_path = _sanitize_tool_path_string(file_path)
root = _local_file_workspace_root()
try:
p = Path(file_path).expanduser()
if not p.is_absolute():
p = (root / p).resolve()
else:
p = p.resolve()
except (OSError, ValueError) as e:
return None, str(e)
try:
p.relative_to(root)
except ValueError:
return None, f"不允许访问工作区外路径,允许根目录: {root}"
return p, None
_HTTP_RESPONSE_HEADER_ALLOWLIST = frozenset(
{
"content-type",
"content-length",
"content-encoding",
"date",
"last-modified",
"location",
"server",
"cache-control",
}
)
def _compact_http_response_headers(response: httpx.Response) -> Dict[str, str]:
"""减少 Set-Cookie 等大字段进入 LLM 上下文。"""
out: Dict[str, str] = {}
for key, value in response.headers.multi_items():
lk = key.lower()
if lk in ("set-cookie", "cookie"):
continue
if lk in _HTTP_RESPONSE_HEADER_ALLOWLIST:
out[key] = value if len(value) <= 2048 else value[:2048] + "..."
if not out:
n = 0
for key, value in response.headers.multi_items():
lk = key.lower()
if lk in ("set-cookie", "cookie"):
continue
if n >= 12:
break
out[key] = value if len(value) <= 512 else value[:512] + "..."
n += 1
return out
def _truncate_http_body_for_tool(body: Any, max_chars: int) -> Tuple[Any, bool, Optional[str]]:
"""
HTTP 正文限制在 max_chars 字符以内避免门户首页等大 HTML 撑爆模型 context
返回 (可能被截断后的 body, 是否截断, 说明文案)
"""
if max_chars <= 0:
max_chars = 32_000
note: Optional[str] = None
if isinstance(body, str):
if len(body) <= max_chars:
return body, False, None
note = (
f"正文已截断:原始约 {len(body)} 字符,仅保留前 {max_chars} 字符。"
"门户/频道首页通常过大,摘要请优先使用具体文章页 URL。"
)
return body[:max_chars], True, note
try:
serialized = json.dumps(body, ensure_ascii=False)
except (TypeError, ValueError):
serialized = str(body)
if len(serialized) <= max_chars:
return body, False, None
note = f"JSON 响应已截断:序列化长度约 {len(serialized)} 字符,仅保留前 {max_chars} 字符。"
return serialized[:max_chars], True, note
2026-01-23 09:49:45 +08:00
async def http_request_tool(
url: str,
method: str = "GET",
headers: Optional[Dict[str, str]] = None,
body: Any = None,
max_body_chars: Optional[int] = None,
2026-01-23 09:49:45 +08:00
) -> str:
"""
HTTP请求工具
Args:
url: 请求URL
method: HTTP方法 (GET, POST, PUT, DELETE)
headers: 请求头
body: 请求体POST/PUT时使用
max_body_chars: 响应正文写入工具结果的最大字符数默认读取配置 HTTP_REQUEST_MAX_BODY_CHARS
2026-01-23 09:49:45 +08:00
Returns:
JSON格式的响应结果
"""
try:
limit = max_body_chars
if limit is None:
limit = int(getattr(settings, "HTTP_REQUEST_MAX_BODY_CHARS", 32_000) or 32_000)
limit = max(4096, min(limit, 200_000))
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
2026-01-23 09:49:45 +08:00
method_upper = method.upper()
if method_upper == "GET":
response = await client.get(url, headers=headers)
elif method_upper == "POST":
response = await client.post(url, json=body, headers=headers)
elif method_upper == "PUT":
response = await client.put(url, json=body, headers=headers)
elif method_upper == "DELETE":
response = await client.delete(url, headers=headers)
else:
raise ValueError(f"不支持的HTTP方法: {method}")
# 尝试解析JSON响应
try:
response_body = response.json()
except Exception:
2026-01-23 09:49:45 +08:00
response_body = response.text
truncated_body, truncated, trunc_note = _truncate_http_body_for_tool(response_body, limit)
result: Dict[str, Any] = {
2026-01-23 09:49:45 +08:00
"status_code": response.status_code,
"headers": _compact_http_response_headers(response),
"body": truncated_body,
2026-01-23 09:49:45 +08:00
}
if truncated:
result["body_truncated"] = True
if trunc_note:
result["truncation_note"] = trunc_note
2026-01-23 09:49:45 +08:00
return json.dumps(result, ensure_ascii=False)
except Exception as e:
logger.error(f"HTTP请求工具执行失败: {str(e)}")
return json.dumps({"error": str(e)}, ensure_ascii=False)
_FILE_READ_TEXT_SUFFIXES = frozenset(
{
".txt",
".md",
".markdown",
".csv",
".tsv",
".json",
".jsonl",
".xml",
".html",
".htm",
".css",
".js",
".mjs",
".cjs",
".ts",
".tsx",
".jsx",
".vue",
".py",
".java",
".go",
".rs",
".sql",
".sh",
".bat",
".ps1",
".yaml",
".yml",
".toml",
".ini",
".cfg",
".log",
".rst",
".tex",
".gitignore",
".env",
".properties",
}
)
_FILE_READ_IMAGE_SUFFIXES = frozenset(
{".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tif", ".tiff"}
)
def _truncate_utf8_text(s: str, max_bytes: int) -> str:
raw = s.encode("utf-8")
if len(raw) <= max_bytes:
return s
cut = raw[:max_bytes].decode("utf-8", errors="ignore")
return cut + "\n\n...[内容已按字节上限截断]"
def _read_pdf_text_sync(path: Path, max_bytes: int) -> str:
from pypdf import PdfReader
reader = PdfReader(str(path))
parts: List[str] = []
for page in reader.pages:
t = page.extract_text() or ""
if t.strip():
parts.append(t)
body = "\n\n".join(parts) if parts else ""
if not body.strip():
return "[PDF 未解析出文本,可能是扫描件或图片型 PDF可将页导出为图片后上传或安装 OCR 流程]"
return _truncate_utf8_text(body, max_bytes)
def _read_docx_text_sync(path: Path, max_bytes: int) -> str:
import docx
doc = docx.Document(str(path))
lines = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
body = "\n".join(lines)
return _truncate_utf8_text(body or "[docx 中未找到段落文本]", max_bytes)
def _read_xlsx_text_sync(path: Path, max_bytes: int) -> str:
from openpyxl import load_workbook
wb = load_workbook(filename=str(path), read_only=True, data_only=True)
try:
lines: List[str] = []
for ws in wb.worksheets:
lines.append(f"## {ws.title}")
for row in ws.iter_rows(values_only=True):
cells = ["" if c is None else str(c) for c in row]
if any(x.strip() for x in cells):
lines.append("\t".join(cells))
body = "\n".join(lines)
finally:
wb.close()
return _truncate_utf8_text(body or "[xlsx 中未读到单元格文本]", max_bytes)
def _tessdata_dir_for_ocr() -> Optional[Path]:
"""返回 tessdata 目录(内含 .traineddata传给 Tesseract --tessdata-dir。"""
raw = (getattr(settings, "TESSERACT_TESSDATA_DIR", None) or "").strip()
if raw:
p = Path(raw).expanduser().resolve()
return p if p.is_dir() else None
root = _local_file_workspace_root()
loc = root / "tessdata"
if loc.is_dir() and any(loc.glob("*.traineddata")):
return loc.resolve()
return None
def _read_image_ocr_sync(path: Path, max_bytes: int) -> str:
from PIL import Image
import pytesseract
cmd = (getattr(settings, "TESSERACT_CMD", None) or "").strip()
if cmd:
pytesseract.pytesseract.tesseract_cmd = cmd
tess_cfg = ""
td = _tessdata_dir_for_ocr()
if td is not None:
tess_cfg = f"--tessdata-dir {shlex.quote(str(td))}"
img = Image.open(path)
if img.mode not in ("RGB", "L"):
img = img.convert("RGB")
text = ""
last_err: Optional[Exception] = None
for lang in ("chi_sim+eng", "eng"):
try:
chunk = pytesseract.image_to_string(img, lang=lang, config=tess_cfg) or ""
if chunk.strip():
text = chunk
break
except Exception as e:
last_err = e
continue
if not text.strip() and last_err:
raise last_err
if not text.strip():
return "[图片中未识别到文字;可尝试更清晰、正向拍摄的作业照片]"
return _truncate_utf8_text(text, max_bytes)
2026-01-23 09:49:45 +08:00
async def file_read_tool(file_path: str) -> str:
"""
文件读取工具UTF-8 文本PDF 文本docxxlsx 单元格文本常见图片 OCR需本机 Tesseract
2026-01-23 09:49:45 +08:00
Args:
file_path: 文件路径相对工作区根或工作区内绝对路径
2026-01-23 09:49:45 +08:00
Returns:
JSONfile_path, content, size error
2026-01-23 09:49:45 +08:00
"""
try:
max_bytes = int(getattr(settings, "LOCAL_FILE_READ_MAX_BYTES", 2_097_152) or 2_097_152)
path, err = _resolve_path_under_workspace(file_path)
if err:
return json.dumps({"error": err}, ensure_ascii=False)
if not path.is_file():
return json.dumps({"error": f"文件不存在或不是普通文件: {path}"}, ensure_ascii=False)
fsize = path.stat().st_size
if fsize > max_bytes:
return json.dumps(
{"error": f"文件过大({fsize} 字节),上限 {max_bytes}"},
ensure_ascii=False,
)
suffix = path.suffix.lower()
if suffix == ".doc":
return json.dumps(
{
"error": "暂不支持旧版 .doc请在 Word 中另存为 .docx 后上传",
"file_path": str(path),
},
ensure_ascii=False,
)
content: str
extract_mode = "text"
if suffix in _FILE_READ_IMAGE_SUFFIXES:
extract_mode = "image_ocr"
try:
content = await asyncio.to_thread(_read_image_ocr_sync, path, max_bytes)
except ImportError as e:
return json.dumps(
{
"error": f"图片识别依赖未安装: {e}。请在后端环境执行 pip install Pillow pytesseract",
"file_path": str(path),
},
ensure_ascii=False,
)
except Exception as e:
err_low = str(e).lower()
if "tesseract" in err_low or "tesseractnotfound" in type(e).__name__.lower():
return json.dumps(
{
"error": (
"未找到 Tesseract OCR。请安装 TesseractWindows 可装官方安装包),"
"并在 .env 中配置 TESSERACT_CMD 指向 tesseract.exe或将其加入 PATH"
"中文识别需额外下载 chi_sim 语言包。"
),
"file_path": str(path),
},
ensure_ascii=False,
)
logger.warning("图片 OCR 失败: %s", e)
return json.dumps({"error": f"图片识别失败: {e}", "file_path": str(path)}, ensure_ascii=False)
elif suffix == ".pdf":
extract_mode = "pdf"
try:
content = await asyncio.to_thread(_read_pdf_text_sync, path, max_bytes)
except ImportError as e:
return json.dumps(
{"error": f"PDF 解析依赖未安装: {e}。请 pip install pypdf", "file_path": str(path)},
ensure_ascii=False,
)
except Exception as e:
logger.warning("PDF 解析失败: %s", e)
return json.dumps({"error": f"PDF 解析失败: {e}", "file_path": str(path)}, ensure_ascii=False)
elif suffix == ".docx":
extract_mode = "docx"
try:
content = await asyncio.to_thread(_read_docx_text_sync, path, max_bytes)
except ImportError as e:
return json.dumps(
{"error": f"Word 解析依赖未安装: {e}。请 pip install python-docx", "file_path": str(path)},
ensure_ascii=False,
)
except Exception as e:
logger.warning("docx 解析失败: %s", e)
return json.dumps({"error": f"docx 解析失败: {e}", "file_path": str(path)}, ensure_ascii=False)
elif suffix == ".xlsx":
extract_mode = "xlsx"
try:
content = await asyncio.to_thread(_read_xlsx_text_sync, path, max_bytes)
except ImportError as e:
return json.dumps(
{"error": f"Excel 解析依赖未安装: {e}。请 pip install openpyxl", "file_path": str(path)},
ensure_ascii=False,
)
except Exception as e:
logger.warning("xlsx 解析失败: %s", e)
return json.dumps({"error": f"xlsx 解析失败: {e}", "file_path": str(path)}, ensure_ascii=False)
elif suffix in _FILE_READ_TEXT_SUFFIXES or suffix == "":
extract_mode = "text"
content = path.read_text(encoding="utf-8", errors="replace")
content = _truncate_utf8_text(content, max_bytes)
else:
# 未知扩展名:仍尝试按 UTF-8 文本读(兼容无后缀文本)
extract_mode = "text"
content = path.read_text(encoding="utf-8", errors="replace")
content = _truncate_utf8_text(content, max_bytes)
out = {
"file_path": str(path),
"content": content,
"size": len(content.encode("utf-8")),
"extract_mode": extract_mode,
}
logger.info("file_read %s mode=%s content_len=%s", path, extract_mode, out["size"])
return json.dumps(out, ensure_ascii=False)
2026-01-23 09:49:45 +08:00
except FileNotFoundError:
return json.dumps({"error": f"文件不存在: {file_path}"}, ensure_ascii=False)
2026-01-23 09:49:45 +08:00
except Exception as e:
logger.error(f"文件读取工具执行失败: {str(e)}")
return json.dumps({"error": str(e)}, ensure_ascii=False)
2026-01-23 09:49:45 +08:00
async def file_write_tool(file_path: str, content: str, mode: str = "w") -> str:
"""
文件写入工具
Args:
file_path: 文件路径
content: 要写入的内容
mode: 写入模式w=覆盖a=追加
Returns:
写入结果
"""
try:
max_bytes = int(getattr(settings, "LOCAL_FILE_WRITE_MAX_BYTES", 2_097_152) or 2_097_152)
raw = content if isinstance(content, str) else str(content)
enc_len = len(raw.encode("utf-8"))
if enc_len > max_bytes:
return json.dumps(
{"error": f"写入内容过大({enc_len} 字节 UTF-8上限 {max_bytes}"},
ensure_ascii=False,
)
path, err = _resolve_path_under_workspace(file_path)
if err:
return json.dumps({"error": err}, ensure_ascii=False)
path.parent.mkdir(parents=True, exist_ok=True)
2026-01-23 09:49:45 +08:00
write_mode = "a" if mode == "a" else "w"
if write_mode == "a":
with path.open("a", encoding="utf-8") as f:
f.write(raw)
else:
path.write_text(raw, encoding="utf-8")
return json.dumps(
{
"success": True,
"file_path": str(path),
"mode": write_mode,
"content_length": enc_len,
},
ensure_ascii=False,
)
2026-01-23 09:49:45 +08:00
except Exception as e:
logger.error(f"文件写入工具执行失败: {str(e)}")
return json.dumps({"error": str(e)}, ensure_ascii=False)
2026-01-23 09:49:45 +08:00
async def text_analyze_tool(text: str, operation: str = "count") -> str:
"""
文本分析工具
Args:
text: 要分析的文本
operation: 操作类型count=统计, keywords=提取关键词, summary=摘要
Returns:
分析结果
"""
try:
if operation == "count":
# 统计字数、字符数、行数等
char_count = len(text)
char_count_no_spaces = len(text.replace(" ", ""))
word_count = len(text.split())
line_count = len(text.splitlines())
paragraph_count = len([p for p in text.split("\n\n") if p.strip()])
return json.dumps({
"char_count": char_count,
"char_count_no_spaces": char_count_no_spaces,
"word_count": word_count,
"line_count": line_count,
"paragraph_count": paragraph_count
}, ensure_ascii=False)
elif operation == "keywords":
# 简单的关键词提取(基于词频)
words = re.findall(r'\b\w+\b', text.lower())
word_freq = {}
for word in words:
if len(word) > 2: # 忽略太短的词
word_freq[word] = word_freq.get(word, 0) + 1
# 取频率最高的10个词
top_keywords = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:10]
return json.dumps({
"keywords": [{"word": k, "frequency": v} for k, v in top_keywords]
}, ensure_ascii=False)
elif operation == "summary":
# 简单的摘要取前3句
sentences = re.split(r'[.!?。!?]\s+', text)
summary = ". ".join(sentences[:3]) + "."
return json.dumps({
"summary": summary,
"original_length": len(text),
"summary_length": len(summary)
}, ensure_ascii=False)
else:
return json.dumps({
"error": f"不支持的操作类型: {operation}"
}, ensure_ascii=False)
except Exception as e:
logger.error(f"文本分析工具执行失败: {str(e)}")
return json.dumps({
"error": str(e)
}, ensure_ascii=False)
async def datetime_tool(operation: str = "now", format: str = "%Y-%m-%d %H:%M:%S",
timezone: Optional[str] = None) -> str:
"""
日期时间工具
Args:
operation: 操作类型now=当前时间, format=格式化, parse=解析
format: 时间格式
timezone: 时区暂未实现
Returns:
日期时间结果
"""
try:
if operation == "now":
now = datetime.now()
return json.dumps({
"datetime": now.strftime(format),
"timestamp": now.timestamp(),
"iso_format": now.isoformat()
}, ensure_ascii=False)
elif operation == "format":
now = datetime.now()
return json.dumps({
"formatted": now.strftime(format)
}, ensure_ascii=False)
else:
return json.dumps({
"error": f"不支持的操作类型: {operation}"
}, ensure_ascii=False)
except Exception as e:
logger.error(f"日期时间工具执行失败: {str(e)}")
return json.dumps({
"error": str(e)
}, ensure_ascii=False)
async def math_calculate_tool(expression: str) -> str:
"""
数学计算工具安全限制只允许基本数学运算
Args:
expression: 数学表达式 "2+2", "sqrt(16)", "sin(0)"
Returns:
计算结果
"""
try:
# 安全检查:只允许数字、基本运算符和安全的数学函数
allowed_chars = set("0123456789+-*/.() ")
allowed_functions = ['sqrt', 'sin', 'cos', 'tan', 'log', 'exp', 'abs', 'pow']
# 检查表达式是否安全
if not all(c in allowed_chars or any(f in expression for f in allowed_functions) for c in expression):
return json.dumps({
"error": "表达式包含不安全的字符"
}, ensure_ascii=False)
# 构建安全的数学环境
safe_dict = {
"__builtins__": {},
"abs": abs,
"sqrt": math.sqrt,
"sin": math.sin,
"cos": math.cos,
"tan": math.tan,
"log": math.log,
"exp": math.exp,
"pow": pow,
"pi": math.pi,
"e": math.e
}
result = eval(expression, safe_dict)
return json.dumps({
"expression": expression,
"result": result,
"result_type": type(result).__name__
}, ensure_ascii=False)
except Exception as e:
logger.error(f"数学计算工具执行失败: {str(e)}")
return json.dumps({
"error": str(e)
}, ensure_ascii=False)
async def system_info_tool() -> str:
"""
系统信息工具
Returns:
系统信息
"""
try:
info = {
"platform": platform.system(),
"platform_version": platform.version(),
"architecture": platform.machine(),
"processor": platform.processor(),
"python_version": sys.version,
"python_version_info": {
"major": sys.version_info.major,
"minor": sys.version_info.minor,
"micro": sys.version_info.micro,
},
# file_read/file_write 使用的允许根目录(与 LOCAL_FILE_TOOLS_ROOT 一致)
"local_file_workspace_root": str(_local_file_workspace_root()),
2026-01-23 09:49:45 +08:00
}
return json.dumps(info, ensure_ascii=False)
except Exception as e:
logger.error(f"系统信息工具执行失败: {str(e)}")
return json.dumps({
"error": str(e)
}, ensure_ascii=False)
async def json_process_tool(json_string: str, operation: str = "parse") -> str:
"""
JSON处理工具
Args:
json_string: JSON字符串
operation: 操作类型parse=解析, stringify=序列化, validate=验证
Returns:
处理结果
"""
try:
if operation == "parse":
data = json.loads(json_string)
return json.dumps({
"parsed": data,
"type": type(data).__name__
}, ensure_ascii=False)
elif operation == "stringify":
# 如果输入已经是字符串,尝试解析后再序列化
try:
data = json.loads(json_string)
except:
data = json_string
return json.dumps({
"stringified": json.dumps(data, ensure_ascii=False, indent=2)
}, ensure_ascii=False)
elif operation == "validate":
try:
json.loads(json_string)
return json.dumps({
"valid": True
}, ensure_ascii=False)
except json.JSONDecodeError as e:
return json.dumps({
"valid": False,
"error": str(e)
}, ensure_ascii=False)
else:
return json.dumps({
"error": f"不支持的操作类型: {operation}"
}, ensure_ascii=False)
except Exception as e:
logger.error(f"JSON处理工具执行失败: {str(e)}")
return json.dumps({
"error": str(e)
}, ensure_ascii=False)
2026-03-06 22:31:41 +08:00
def _validate_sql_query(sql: str) -> Tuple[bool, Optional[str]]:
2026-01-23 09:49:45 +08:00
"""
2026-03-06 22:31:41 +08:00
验证SQL查询的安全性
2026-01-23 09:49:45 +08:00
Args:
2026-03-06 22:31:41 +08:00
sql: SQL查询语句
2026-01-23 09:49:45 +08:00
Returns:
2026-03-06 22:31:41 +08:00
(是否安全, 错误信息)
2026-01-23 09:49:45 +08:00
"""
2026-03-06 22:31:41 +08:00
# 去除注释和多余空白
sql_clean = re.sub(r'--.*?\n', '', sql)
sql_clean = re.sub(r'/\*.*?\*/', '', sql_clean, flags=re.DOTALL)
sql_clean = ' '.join(sql_clean.split())
# 转换为小写以便检查
sql_lower = sql_clean.lower().strip()
# 只允许SELECT查询
if not sql_lower.startswith('select'):
return False, "只允许SELECT查询不允许INSERT、UPDATE、DELETE、DROP等操作"
# 检查危险关键字
dangerous_keywords = [
'insert', 'update', 'delete', 'drop', 'truncate', 'alter',
'create', 'grant', 'revoke', 'exec', 'execute', 'call',
'commit', 'rollback', 'savepoint'
]
for keyword in dangerous_keywords:
# 使用单词边界匹配,避免误判(如"select"中包含"select"
pattern = r'\b' + keyword + r'\b'
if re.search(pattern, sql_lower):
return False, f"检测到危险关键字: {keyword.upper()},查询被拒绝"
# 检查是否有多个SQL语句防止SQL注入
if ';' in sql_clean and sql_clean.count(';') > 1:
return False, "不允许执行多个SQL语句"
return True, None
async def _execute_default_db_query(sql: str, timeout: int = 30) -> List[Dict[str, Any]]:
"""
执行默认数据库查询
Args:
sql: SQL查询语句
timeout: 查询超时时间
Returns:
查询结果列表
"""
from app.core.database import engine
try:
# 使用asyncio实现超时控制
loop = asyncio.get_event_loop()
def _execute():
with engine.connect() as connection:
result = connection.execute(text(sql))
# 获取列名
columns = result.keys()
# 获取所有行
rows = result.fetchall()
# 转换为字典列表
return [dict(zip(columns, row)) for row in rows]
# 执行查询,带超时控制
result = await asyncio.wait_for(
asyncio.to_thread(_execute),
timeout=timeout
)
return result
except asyncio.TimeoutError:
raise Exception(f"查询超时(超过{timeout}秒)")
except SQLAlchemyError as e:
raise Exception(f"数据库查询失败: {str(e)}")
except Exception as e:
raise Exception(f"执行查询时发生错误: {str(e)}")
async def _execute_data_source_query(data_source_id: str, sql: str, timeout: int = 30) -> List[Dict[str, Any]]:
"""
通过数据源ID执行查询
Args:
data_source_id: 数据源ID
sql: SQL查询语句
timeout: 查询超时时间
Returns:
查询结果列表
"""
from app.core.database import SessionLocal
from app.models.data_source import DataSource
from app.services.data_source_connector import create_connector
db = SessionLocal()
try:
# 获取数据源配置
data_source = db.query(DataSource).filter(DataSource.id == data_source_id).first()
if not data_source:
raise Exception(f"数据源不存在: {data_source_id}")
if data_source.status != 'active':
raise Exception(f"数据源状态异常: {data_source.status}")
# 创建连接器
connector = create_connector(data_source.type, data_source.config)
# 执行查询(带超时控制)
query_params = {'sql': sql}
# 使用asyncio实现超时控制
def _execute():
return connector.query(query_params)
result = await asyncio.wait_for(
asyncio.to_thread(_execute),
timeout=timeout
)
# 确保结果是列表格式
if not isinstance(result, list):
result = [result] if result else []
return result
except asyncio.TimeoutError:
raise Exception(f"查询超时(超过{timeout}秒)")
except Exception as e:
raise Exception(f"数据源查询失败: {str(e)}")
finally:
db.close()
async def database_query_tool(
query: str,
database: str = "default",
data_source_id: Optional[str] = None,
timeout: int = 30
) -> str:
"""
数据库查询工具
Args:
query: SQL查询语句只允许SELECT
database: 数据库名称已废弃保留用于兼容
data_source_id: 数据源ID可选如果提供则使用指定数据源
timeout: 查询超时时间默认30秒
Returns:
JSON格式的查询结果
"""
try:
# 验证SQL安全性
is_safe, error_msg = _validate_sql_query(query)
if not is_safe:
return json.dumps({
"error": error_msg,
"query": query
}, ensure_ascii=False)
# 验证超时时间
if timeout <= 0 or timeout > 300:
return json.dumps({
"error": "超时时间必须在1-300秒之间",
"timeout": timeout
}, ensure_ascii=False)
# 执行查询
if data_source_id:
# 使用指定的数据源
result = await _execute_data_source_query(data_source_id, query, timeout)
else:
# 使用默认数据库
result = await _execute_default_db_query(query, timeout)
# 格式化结果
return json.dumps({
"success": True,
"row_count": len(result),
"data": result,
"query": query
}, ensure_ascii=False, default=str)
except Exception as e:
logger.error(f"数据库查询工具执行失败: {str(e)}")
return json.dumps({
"error": str(e),
"query": query
}, ensure_ascii=False)
async def adb_log_tool(
command: str = "logcat",
filter_tag: Optional[str] = None,
level: Optional[str] = None,
max_lines: int = 100,
timeout: int = 10
) -> str:
"""
ADB日志工具 - 获取Android设备日志
Args:
command: ADB命令类型logcat=获取日志, devices=列出设备, shell=执行shell命令
filter_tag: 日志标签过滤 "ActivityManager", "SystemServer"
level: 日志级别过滤V/D/I/W/E/F/S "E" 只显示错误
max_lines: 最大返回行数默认100行避免输出过长
timeout: 命令执行超时时间默认10秒
Returns:
JSON格式的执行结果
"""
try:
# 验证超时时间
if timeout <= 0 or timeout > 60:
return json.dumps({
"error": "超时时间必须在1-60秒之间",
"timeout": timeout
}, ensure_ascii=False)
# 验证最大行数
if max_lines <= 0 or max_lines > 10000:
return json.dumps({
"error": "最大行数必须在1-10000之间",
"max_lines": max_lines
}, ensure_ascii=False)
# 构建adb命令
if command == "logcat":
# 获取日志
adb_cmd = ["adb", "logcat", "-d"] # -d 表示获取已缓存的日志
# 添加日志级别过滤
if level:
level = level.upper()
if level in ["V", "D", "I", "W", "E", "F", "S"]:
adb_cmd.extend([f"*:{level}"])
else:
return json.dumps({
"error": f"无效的日志级别: {level},支持: V/D/I/W/E/F/S"
}, ensure_ascii=False)
# 添加标签过滤
if filter_tag:
adb_cmd.append(filter_tag)
# 限制输出行数
adb_cmd.extend(["-t", str(max_lines)])
elif command == "devices":
# 列出连接的设备
adb_cmd = ["adb", "devices", "-l"]
elif command == "shell":
# 执行shell命令受限只允许安全命令
if filter_tag:
# filter_tag在这里作为shell命令
# 只允许安全的命令
safe_commands = ["getprop", "dumpsys", "pm", "am", "settings"]
cmd_parts = filter_tag.split()
if not cmd_parts or cmd_parts[0] not in safe_commands:
return json.dumps({
"error": f"不允许执行命令: {filter_tag},只允许: {', '.join(safe_commands)}"
}, ensure_ascii=False)
adb_cmd = ["adb", "shell"] + cmd_parts
else:
return json.dumps({
"error": "shell命令需要提供filter_tag参数作为命令"
}, ensure_ascii=False)
else:
return json.dumps({
"error": f"不支持的ADB命令: {command},支持: logcat/devices/shell"
}, ensure_ascii=False)
# 执行adb命令
def _execute_adb():
try:
result = subprocess.run(
adb_cmd,
capture_output=True,
text=True,
timeout=timeout,
encoding='utf-8',
errors='replace' # 处理编码错误
)
return result
except subprocess.TimeoutExpired:
raise Exception(f"ADB命令执行超时超过{timeout}秒)")
except FileNotFoundError:
raise Exception("未找到adb命令请确保已安装Android SDK Platform Tools并配置PATH")
except Exception as e:
raise Exception(f"执行ADB命令失败: {str(e)}")
# 异步执行命令
result = await asyncio.wait_for(
asyncio.to_thread(_execute_adb),
timeout=timeout + 2 # 额外2秒缓冲
)
# 处理结果
output_lines = result.stdout.split('\n') if result.stdout else []
error_lines = result.stderr.split('\n') if result.stderr else []
# 如果输出太长,截断
if len(output_lines) > max_lines:
output_lines = output_lines[:max_lines]
output_lines.append(f"... (已截断,共 {len(result.stdout.split(chr(10)))} 行)")
return json.dumps({
"success": result.returncode == 0,
"command": " ".join(adb_cmd),
"return_code": result.returncode,
"output": "\n".join(output_lines),
"output_lines": len(output_lines),
"error": "\n".join(error_lines) if error_lines and any(error_lines) else None,
"timestamp": datetime.now().isoformat()
}, ensure_ascii=False)
except asyncio.TimeoutError:
return json.dumps({
"error": f"ADB命令执行超时超过{timeout}秒)",
"command": " ".join(adb_cmd) if 'adb_cmd' in locals() else command
}, ensure_ascii=False)
except Exception as e:
logger.error(f"ADB日志工具执行失败: {str(e)}")
return json.dumps({
"error": str(e),
"command": " ".join(adb_cmd) if 'adb_cmd' in locals() else command
}, ensure_ascii=False)
2026-01-23 09:49:45 +08:00
# 工具定义OpenAI Function格式
HTTP_REQUEST_SCHEMA = {
"type": "function",
"function": {
"name": "http_request",
"description": "发送HTTP请求支持GET、POST、PUT、DELETE方法。可以用于调用API、获取网页内容等。",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "请求的URL地址"
},
"method": {
"type": "string",
"enum": ["GET", "POST", "PUT", "DELETE"],
"description": "HTTP请求方法",
"default": "GET"
},
"headers": {
"type": "object",
"description": "HTTP请求头可选",
"additionalProperties": {
"type": "string"
}
},
"body": {
"type": "object",
"description": "请求体POST/PUT时使用可选"
},
"max_body_chars": {
"type": "integer",
"description": (
"响应正文写入结果的最大字符数(可选)。"
"门户首页等大 HTML 默认会按平台配置截断;摘要单篇文章可适当调大(如 80000仍可能受模型总上下文限制。"
),
},
2026-01-23 09:49:45 +08:00
},
"required": ["url", "method"]
}
}
}
FILE_READ_SCHEMA = {
"type": "function",
"function": {
"name": "file_read",
"description": (
"读取工作区内文件并尽量提取可读文本UTF-8 文本、.pdf 文字层、.docx 段落、.xlsx 单元格、"
"常见图片(.png/.jpg 等)用 OCR 提取文字(需服务器安装 Tesseract可选配置 TESSERACT_CMD"
"用户上传附件后消息中会给出相对路径,请用本工具读取该路径。路径须落在 LOCAL_FILE_TOOLS_ROOT 下。"
),
2026-01-23 09:49:45 +08:00
"parameters": {
"type": "object",
"properties": {
"file_path": {
"type": "string",
"description": "要读取的文件路径(相对于项目根目录或绝对路径)"
}
},
"required": ["file_path"]
}
}
}
FILE_WRITE_SCHEMA = {
"type": "function",
"function": {
"name": "file_write",
"description": "写入本地文本文件UTF-8w 覆盖 / a 追加)。路径须落在工作区内;父目录不存在会自动创建。勿写入密码、密钥等大型敏感内容。",
2026-01-23 09:49:45 +08:00
"parameters": {
"type": "object",
"properties": {
"file_path": {
"type": "string",
"description": "要写入的文件路径(相对于项目根目录或绝对路径)"
},
"content": {
"type": "string",
"description": "要写入的内容"
},
"mode": {
"type": "string",
"enum": ["w", "a"],
"description": "写入模式w=覆盖写入a=追加写入",
"default": "w"
}
},
"required": ["file_path", "content"]
}
}
}
TEXT_ANALYZE_SCHEMA = {
"type": "function",
"function": {
"name": "text_analyze",
"description": "分析文本内容,支持统计字数、提取关键词、生成摘要等功能。",
"parameters": {
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "要分析的文本内容"
},
"operation": {
"type": "string",
"enum": ["count", "keywords", "summary"],
"description": "操作类型count=统计字数等信息keywords=提取关键词summary=生成摘要",
"default": "count"
}
},
"required": ["text"]
}
}
}
DATETIME_SCHEMA = {
"type": "function",
"function": {
"name": "datetime",
"description": "获取和处理日期时间信息,支持获取当前时间、格式化时间等。",
"parameters": {
"type": "object",
"properties": {
"operation": {
"type": "string",
"enum": ["now", "format"],
"description": "操作类型now=获取当前时间format=格式化时间",
"default": "now"
},
"format": {
"type": "string",
"description": "时间格式字符串(如:%Y-%m-%d %H:%M:%S",
"default": "%Y-%m-%d %H:%M:%S"
}
}
}
}
}
MATH_CALCULATE_SCHEMA = {
"type": "function",
"function": {
"name": "math_calculate",
"description": "执行数学计算支持基本运算和常用数学函数如sqrt, sin, cos, log等",
"parameters": {
"type": "object",
"properties": {
"expression": {
"type": "string",
"description": "数学表达式2+2, sqrt(16), sin(0), pow(2,3)"
}
},
"required": ["expression"]
}
}
}
SYSTEM_INFO_SCHEMA = {
"type": "function",
"function": {
"name": "system_info",
"description": "获取系统信息(含 local_file_workspace_rootfile_read/file_write 允许访问的根目录)。用户问「工作区路径」时应调用本工具并如实转述该字段。",
2026-01-23 09:49:45 +08:00
"parameters": {
"type": "object",
"properties": {}
}
}
}
JSON_PROCESS_SCHEMA = {
"type": "function",
"function": {
"name": "json_process",
"description": "处理JSON数据支持解析、序列化、验证等功能。",
"parameters": {
"type": "object",
"properties": {
"json_string": {
"type": "string",
"description": "JSON字符串"
},
"operation": {
"type": "string",
"enum": ["parse", "stringify", "validate"],
"description": "操作类型parse=解析JSONstringify=序列化为JSONvalidate=验证JSON格式",
"default": "parse"
}
},
"required": ["json_string"]
}
}
}
2026-03-06 22:31:41 +08:00
ADB_LOG_SCHEMA = {
"type": "function",
"function": {
"name": "adb_log",
"description": "执行ADB命令获取Android设备日志。支持logcat获取日志、devices列出设备、shell执行shell命令。可以过滤日志标签和级别。",
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "string",
"enum": ["logcat", "devices", "shell"],
"description": "ADB命令类型logcat=获取日志devices=列出连接的设备shell=执行shell命令",
"default": "logcat"
},
"filter_tag": {
"type": "string",
"description": "日志标签过滤(如 'ActivityManager', 'SystemServer'或shell命令当command=shell时"
},
"level": {
"type": "string",
"enum": ["V", "D", "I", "W", "E", "F", "S"],
"description": "日志级别过滤V=Verbose, D=Debug, I=Info, W=Warning, E=Error, F=Fatal, S=Silent"
},
"max_lines": {
"type": "integer",
"description": "最大返回行数默认100行避免输出过长",
"default": 100
},
"timeout": {
"type": "integer",
"description": "命令执行超时时间默认10秒最大60秒",
"default": 10
}
}
}
}
}
2026-01-23 09:49:45 +08:00
DATABASE_QUERY_SCHEMA = {
"type": "function",
"function": {
"name": "database_query",
2026-03-06 22:31:41 +08:00
"description": "执行数据库查询只允许SELECT查询支持默认数据库和指定数据源。可以查询工作流、Agent、执行记录等系统数据或通过数据源ID查询外部数据库。",
2026-01-23 09:49:45 +08:00
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
2026-03-06 22:31:41 +08:00
"description": "SQL查询语句只允许SELECT查询不允许INSERT、UPDATE、DELETE等操作"
2026-01-23 09:49:45 +08:00
},
2026-03-06 22:31:41 +08:00
"data_source_id": {
2026-01-23 09:49:45 +08:00
"type": "string",
2026-03-06 22:31:41 +08:00
"description": "数据源ID可选如果提供则使用指定的数据源否则使用默认数据库"
},
"timeout": {
"type": "integer",
"description": "查询超时时间默认30秒最大300秒",
"default": 30,
"minimum": 1,
"maximum": 300
2026-01-23 09:49:45 +08:00
}
},
"required": ["query"]
}
}
}
# ─── 加密/哈希/编码工具 ──────────────────────────────────
async def crypto_util_tool(
operation: str = "uuid",
data: Optional[str] = None,
algorithm: str = "sha256",
) -> str:
"""
加密/哈希/编码工具
Args:
operation: 操作类型
- uuid: 生成 UUID
- base64_encode: Base64 编码
- base64_decode: Base64 解码
- hash: 计算哈希值
data: 输入数据base64/hash 操作需要
algorithm: 哈希算法hash 操作时使用支持 md5/sha1/sha256/sha512
Returns:
操作结果
"""
import hashlib
import base64
import uuid as _uuid
try:
if operation == "uuid":
u = str(_uuid.uuid4())
return json.dumps({"uuid": u, "type": "uuid4"}, ensure_ascii=False)
elif operation == "base64_encode":
if not data:
return json.dumps({"error": "data 参数不能为空"}, ensure_ascii=False)
encoded = base64.b64encode(data.encode("utf-8")).decode("utf-8")
return json.dumps({"encoded": encoded}, ensure_ascii=False)
elif operation == "base64_decode":
if not data:
return json.dumps({"error": "data 参数不能为空"}, ensure_ascii=False)
try:
decoded = base64.b64decode(data.encode("utf-8")).decode("utf-8")
return json.dumps({"decoded": decoded}, ensure_ascii=False)
except Exception as e:
return json.dumps({"error": f"Base64 解码失败: {e}"}, ensure_ascii=False)
elif operation == "hash":
if not data:
return json.dumps({"error": "data 参数不能为空"}, ensure_ascii=False)
valid_algos = {"md5": hashlib.md5, "sha1": hashlib.sha1,
"sha256": hashlib.sha256, "sha512": hashlib.sha512}
if algorithm not in valid_algos:
return json.dumps(
{"error": f"不支持的算法: {algorithm},支持: {list(valid_algos.keys())}"},
ensure_ascii=False,
)
h = valid_algos[algorithm](data.encode("utf-8")).hexdigest()
return json.dumps({"algorithm": algorithm, "hash": h}, ensure_ascii=False)
else:
return json.dumps(
{"error": f"不支持的操作: {operation},支持: uuid/base64_encode/base64_decode/hash"},
ensure_ascii=False,
)
except Exception as e:
logger.error(f"crypto_util 工具执行失败: {e}", exc_info=True)
return json.dumps({"error": f"操作失败: {e}"}, ensure_ascii=False)
# ─── 随机生成工具 ─────────────────────────────────────────
async def random_generate_tool(
generate_type: str = "password",
length: int = 16,
count: int = 1,
min_val: float = 0,
max_val: float = 100,
) -> str:
"""
随机数据生成工具
Args:
generate_type: 生成类型
- password: 安全密码含大小写字母+数字+符号
- string: 随机字符串字母+数字
- number: 随机整数
- float: 随机浮点数
length: 密码/字符串长度默认16
count: 生成数量默认1
min_val: 随机数最小值number/float 时使用
max_val: 随机数最大值number/float 时使用
Returns:
生成的随机数据
"""
import random
import string as _string
import secrets
try:
count = max(1, min(count, 50)) # 限制最多 50 个
if generate_type == "password":
length = max(8, min(length, 128))
chars = _string.ascii_letters + _string.digits + "!@#$%^&*()_+-=[]{}|;:,.<>?"
result = []
for _ in range(count):
pwd = "".join(secrets.choice(chars) for _ in range(length))
result.append(pwd)
return json.dumps({
"passwords": result,
"length": length,
"strength": "high" if length >= 16 else "medium" if length >= 12 else "basic",
}, ensure_ascii=False)
elif generate_type == "string":
length = max(1, min(length, 256))
chars = _string.ascii_letters + _string.digits
result = ["".join(secrets.choice(chars) for _ in range(length)) for _ in range(count)]
return json.dumps({"strings": result, "length": length}, ensure_ascii=False)
elif generate_type == "number":
min_val, max_val = int(min_val), int(max_val)
if min_val > max_val:
min_val, max_val = max_val, min_val
result = [secrets.randbelow(max_val - min_val + 1) + min_val for _ in range(count)]
return json.dumps({"numbers": result, "range": [min_val, max_val]}, ensure_ascii=False)
elif generate_type == "float":
if min_val > max_val:
min_val, max_val = max_val, min_val
result = [round(random.uniform(min_val, max_val), 6) for _ in range(count)]
return json.dumps({"floats": result, "range": [min_val, max_val]}, ensure_ascii=False)
else:
return json.dumps(
{"error": f"不支持的类型: {generate_type},支持: password/string/number/float"},
ensure_ascii=False,
)
except Exception as e:
logger.error(f"random_generate 工具执行失败: {e}", exc_info=True)
return json.dumps({"error": f"生成失败: {e}"}, ensure_ascii=False)
# ─── 邮件发送工具 ─────────────────────────────────────────
async def send_email_tool(
to: str,
subject: str,
body: str,
smtp_host: str = "",
smtp_port: int = 587,
smtp_user: str = "",
smtp_password: str = "",
from_name: str = "",
body_type: str = "plain",
) -> str:
"""
发送邮件工具
Args:
to: 收件人邮箱地址多个用逗号分隔
subject: 邮件主题
body: 邮件正文
smtp_host: SMTP 服务器地址 smtp.qq.com留空从环境变量读取
smtp_port: SMTP 端口默认 587
smtp_user: SMTP 用户名/发件人邮箱留空从环境变量读取
smtp_password: SMTP 密码/授权码留空从环境变量读取
from_name: 发件人显示名称可选
body_type: 正文类型 plain=纯文本, html=HTML
Returns:
发送结果
"""
try:
# 从环境变量读取 SMTP 配置(如果未传参)
import os
host = smtp_host or os.getenv("SMTP_HOST", "")
user = smtp_user or os.getenv("SMTP_USER", "")
password = smtp_password or os.getenv("SMTP_PASSWORD", "")
if not host or not user or not password:
return json.dumps({
"error": "SMTP 配置不完整。请在 .env 中设置 SMTP_HOST/SMTP_USER/SMTP_PASSWORD或在调用时传入 smtp_host/smtp_user/smtp_password 参数",
}, ensure_ascii=False)
import aiosmtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
recipients = [r.strip() for r in to.split(",") if r.strip()]
if not recipients:
return json.dumps({"error": "收件人地址不能为空"}, ensure_ascii=False)
msg = MIMEMultipart()
msg["From"] = f"{from_name} <{user}>" if from_name else user
msg["To"] = ", ".join(recipients)
msg["Subject"] = subject
msg.attach(MIMEText(body, body_type if body_type == "html" else "plain", "utf-8"))
await aiosmtplib.send(
msg,
hostname=host,
port=smtp_port,
username=user,
password=password,
start_tls=True,
)
return json.dumps({
"success": True,
"message": f"邮件已发送到 {len(recipients)} 个收件人",
"recipients": recipients,
}, ensure_ascii=False)
except Exception as e:
logger.error(f"send_email 工具执行失败: {e}", exc_info=True)
return json.dumps({"error": f"邮件发送失败: {e}"}, ensure_ascii=False)
# ─── URL 解析工具 ─────────────────────────────────────────
async def url_parse_tool(
url: str,
operation: str = "parse",
key: Optional[str] = None,
params: Optional[str] = None,
) -> str:
"""
URL 解析和构建工具
Args:
url: 要处理的 URL
operation: 操作类型
- parse: 解析 URL 为各部分协议/主机/端口/路径/参数/片段
- get_param: 获取指定查询参数的值需传 key
- all_params: 获取所有查询参数
- build: 构建 URLurl=基础URL, params=要追加的参数 JSON
key: 查询参数名operation=get_param 时使用
params: 参数 JSON 字符串operation=build 时使用 {"page": "1", "size": "10"}
Returns:
处理结果
"""
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
try:
if operation == "parse":
parsed = urlparse(url)
return json.dumps({
"scheme": parsed.scheme,
"hostname": parsed.hostname,
"port": parsed.port,
"path": parsed.path,
"params": parsed.params,
"query": parsed.query,
"fragment": parsed.fragment,
}, ensure_ascii=False)
elif operation == "get_param":
if not key:
return json.dumps({"error": "需要提供 key 参数"}, ensure_ascii=False)
parsed = urlparse(url)
query_params = parse_qs(parsed.query)
val = query_params.get(key, [None])[0]
return json.dumps({"key": key, "value": val}, ensure_ascii=False)
elif operation == "all_params":
parsed = urlparse(url)
query_params = parse_qs(parsed.query)
# parse_qs 返回 {key: [value1, value2]},简化为单值
flat = {k: v[0] if len(v) == 1 else v for k, v in query_params.items()}
return json.dumps({"params": flat}, ensure_ascii=False)
elif operation == "build":
parsed = urlparse(url)
existing = parse_qs(parsed.query, keep_blank_values=True)
if params:
try:
extra = json.loads(params)
if isinstance(extra, dict):
existing.update(extra)
except json.JSONDecodeError as e:
return json.dumps({"error": f"params JSON 解析失败: {e}"}, ensure_ascii=False)
new_query = urlencode(existing, doseq=True)
new_parsed = parsed._replace(query=new_query)
new_url = urlunparse(new_parsed)
return json.dumps({"url": new_url}, ensure_ascii=False)
else:
return json.dumps(
{"error": f"不支持的操作: {operation},支持: parse/get_param/all_params/build"},
ensure_ascii=False,
)
except Exception as e:
logger.error(f"url_parse 工具执行失败: {e}", exc_info=True)
return json.dumps({"error": f"URL 处理失败: {e}"}, ensure_ascii=False)
# ─── 正则表达式工具 ─────────────────────────────────────
async def regex_test_tool(
pattern: str,
test_string: str,
operation: str = "match",
flags: str = "",
replacement: str = "",
) -> str:
"""
正则表达式测试工具
Args:
pattern: 正则表达式模式
test_string: 要测试的字符串
operation: 操作类型
- match: 查找第一个匹配
- findall: 查找所有匹配
- replace: 替换匹配内容需传 replacement
- split: 按正则分割字符串
- validate: 验证完整字符串是否匹配
flags: 标志可选组合i=忽略大小写, m=多行, s=点匹配换行
replacement: 替换文本operation=replace 时使用
Returns:
处理结果
"""
import re as _re
try:
# 解析 flags
flag_val = 0
for c in flags.lower():
if c == "i":
flag_val |= _re.IGNORECASE
elif c == "m":
flag_val |= _re.MULTILINE
elif c == "s":
flag_val |= _re.DOTALL
if operation == "match":
m = _re.search(pattern, test_string, flag_val)
if m:
info = {"match": m.group(0), "start": m.start(), "end": m.end()}
if m.groups():
info["groups"] = list(m.groups())
if m.groupdict():
info["named_groups"] = {k: v for k, v in m.groupdict().items()}
return json.dumps(info, ensure_ascii=False)
return json.dumps({"match": None, "message": "无匹配"}, ensure_ascii=False)
elif operation == "findall":
matches = _re.findall(pattern, test_string, flag_val)
return json.dumps({"count": len(matches), "matches": matches[:100]}, ensure_ascii=False)
elif operation == "replace":
result = _re.sub(pattern, replacement, test_string, flags=flag_val)
return json.dumps({"result": result}, ensure_ascii=False)
elif operation == "split":
parts = _re.split(pattern, test_string, flags=flag_val)
return json.dumps({"parts": parts, "count": len(parts)}, ensure_ascii=False)
elif operation == "validate":
is_match = bool(_re.fullmatch(pattern, test_string, flag_val))
return json.dumps({"valid": is_match, "pattern": pattern}, ensure_ascii=False)
else:
return json.dumps(
{"error": f"不支持的操作: {operation},支持: match/findall/replace/split/validate"},
ensure_ascii=False,
)
except _re.error as e:
return json.dumps({"error": f"正则表达式语法错误: {e}"}, ensure_ascii=False)
except Exception as e:
logger.error(f"regex_test 工具执行失败: {e}", exc_info=True)
return json.dumps({"error": f"操作失败: {e}"}, ensure_ascii=False)
# ─── 定时任务工具 ─────────────────────────────────────────
async def schedule_create_tool(
agent_id: str,
name: str,
cron_expression: str,
input_message: str,
webhook_url: Optional[str] = None,
) -> str:
"""
Agent 创建定时任务
Args:
agent_id: Agent ID
name: 任务名称"每日早报"
cron_expression: 标准 5 cron 表达式 0 9 * * * 表示每天9点
input_message: 定时执行时发送给 Agent 的消息
webhook_url: 可选飞书机器人 Webhook URL执行完成后推送通知
Returns:
执行结果
"""
try:
from app.core.database import SessionLocal
from app.models.agent import Agent
from app.models.agent_schedule import AgentSchedule
from app.services.agent_schedule_service import compute_next_run
db = SessionLocal()
try:
agent = db.query(Agent).filter(Agent.id == agent_id).first()
if not agent:
return json.dumps({"error": f"Agent 不存在: {agent_id}"}, ensure_ascii=False)
# 尝试计算下次执行时间
try:
next_run = compute_next_run(cron_expression)
except (ValueError, KeyError) as e:
return json.dumps({"error": f"cron 表达式无效: {e}(标准 5 位格式,如 0 9 * * *"}, ensure_ascii=False)
# 获取 user_id优先代理的 owner否则 fallback 到第一个用户
user_id = agent.user_id
if not user_id:
from app.models.user import User
first_user = db.query(User).first()
if not first_user:
return json.dumps({"error": "数据库无用户,无法创建定时任务"}, ensure_ascii=False)
user_id = first_user.id
schedule = AgentSchedule(
agent_id=agent_id,
name=name,
cron_expression=cron_expression,
input_message=input_message,
webhook_url=webhook_url,
enabled=True,
next_run_at=next_run,
user_id=user_id,
)
db.add(schedule)
db.commit()
db.refresh(schedule)
return json.dumps({
"success": True,
"schedule_id": schedule.id,
"name": schedule.name,
"cron": schedule.cron_expression,
"next_run_at": next_run.isoformat(),
"message": f"定时任务「{name}」已创建,将于 {next_run.isoformat()} 首次执行",
}, ensure_ascii=False)
finally:
db.close()
except Exception as e:
logger.error(f"schedule_create 工具执行失败: {e}", exc_info=True)
return json.dumps({"error": f"创建定时任务失败: {e}"}, ensure_ascii=False)
async def schedule_list_tool(agent_id: str) -> str:
"""
列出 Agent 的所有定时任务
Args:
agent_id: Agent ID
Returns:
定时任务列表
"""
try:
from app.core.database import SessionLocal
from app.models.agent_schedule import AgentSchedule
db = SessionLocal()
try:
schedules = (
db.query(AgentSchedule)
.filter(
AgentSchedule.agent_id == agent_id,
)
.order_by(AgentSchedule.created_at.desc())
.all()
)
items = []
for s in schedules:
items.append({
"id": s.id,
"name": s.name,
"cron": s.cron_expression,
"enabled": s.enabled,
"next_run": s.next_run_at.isoformat() if s.next_run_at else None,
"last_run": s.last_run_at.isoformat() if s.last_run_at else None,
"last_status": s.last_run_status,
})
return json.dumps({
"count": len(items),
"schedules": items,
}, ensure_ascii=False)
finally:
db.close()
except Exception as e:
logger.error(f"schedule_list 工具执行失败: {e}", exc_info=True)
return json.dumps({"error": f"查询定时任务失败: {e}"}, ensure_ascii=False)
async def schedule_delete_tool(schedule_id: str) -> str:
"""
删除指定的定时任务
Args:
schedule_id: 定时任务 ID
Returns:
删除结果
"""
try:
from app.core.database import SessionLocal
from app.models.agent_schedule import AgentSchedule
db = SessionLocal()
try:
schedule = db.query(AgentSchedule).filter(AgentSchedule.id == schedule_id).first()
if not schedule:
return json.dumps({"error": f"定时任务不存在: {schedule_id}"}, ensure_ascii=False)
name = schedule.name
db.delete(schedule)
db.commit()
return json.dumps({
"success": True,
"message": f"定时任务「{name}」已删除",
}, ensure_ascii=False)
finally:
db.close()
except Exception as e:
logger.error(f"schedule_delete 工具执行失败: {e}", exc_info=True)
return json.dumps({"error": f"删除定时任务失败: {e}"}, ensure_ascii=False)
CRYPTO_UTIL_SCHEMA = {
"type": "function",
"function": {
"name": "crypto_util",
"description": "加密/哈希/编码工具:生成 UUID、Base64 编解码、计算 MD5/SHA256 等哈希值。",
"parameters": {
"type": "object",
"properties": {
"operation": {
"type": "string",
"enum": ["uuid", "base64_encode", "base64_decode", "hash"],
"description": "操作uuid=生成唯一ID, base64_encode=编码, base64_decode=解码, hash=计算哈希",
"default": "uuid",
},
"data": {
"type": "string",
"description": "输入数据base64编解码或哈希时必填",
},
"algorithm": {
"type": "string",
"enum": ["md5", "sha1", "sha256", "sha512"],
"description": "哈希算法hash 操作时使用,默认 sha256",
"default": "sha256",
},
},
},
},
}
RANDOM_GENERATE_SCHEMA = {
"type": "function",
"function": {
"name": "random_generate",
"description": "安全随机数据生成:密码、随机字符串、随机整数、随机浮点数。使用 secrets 模块保证安全性。",
"parameters": {
"type": "object",
"properties": {
"generate_type": {
"type": "string",
"enum": ["password", "string", "number", "float"],
"description": "类型password=安全密码, string=随机字符串, number=随机整数, float=随机浮点数",
"default": "password",
},
"length": {
"type": "integer",
"description": "密码/字符串长度password 默认16最少8string 默认16",
"default": 16,
"minimum": 1,
"maximum": 256,
},
"count": {
"type": "integer",
"description": "生成数量默认1最多50",
"default": 1,
"minimum": 1,
"maximum": 50,
},
"min_val": {
"type": "number",
"description": "最小值number/float 类型使用)",
"default": 0,
},
"max_val": {
"type": "number",
"description": "最大值number/float 类型使用)",
"default": 100,
},
},
},
},
}
SEND_EMAIL_SCHEMA = {
"type": "function",
"function": {
"name": "send_email",
"description": "发送邮件。SMTP 配置可从环境变量读取SMTP_HOST/SMTP_USER/SMTP_PASSWORD也可在调用时直接传入。",
"parameters": {
"type": "object",
"properties": {
"to": {
"type": "string",
"description": "收件人邮箱地址,多个用逗号分隔,如 a@x.com,b@x.com",
},
"subject": {
"type": "string",
"description": "邮件主题",
},
"body": {
"type": "string",
"description": "邮件正文内容",
},
"smtp_host": {
"type": "string",
"description": "SMTP 服务器地址(如 smtp.qq.com留空从环境变量 SMTP_HOST 读取",
},
"smtp_port": {
"type": "integer",
"description": "SMTP 端口,默认 587",
"default": 587,
},
"smtp_user": {
"type": "string",
"description": "发件人邮箱 / SMTP 用户名,留空从环境变量 SMTP_USER 读取",
},
"smtp_password": {
"type": "string",
"description": "SMTP 授权码/密码,留空从环境变量 SMTP_PASSWORD 读取",
},
"from_name": {
"type": "string",
"description": "发件人显示名称(可选)",
},
"body_type": {
"type": "string",
"enum": ["plain", "html"],
"description": "正文类型,默认 plain",
"default": "plain",
},
},
"required": ["to", "subject", "body"],
},
},
}
URL_PARSE_SCHEMA = {
"type": "function",
"function": {
"name": "url_parse",
"description": "URL 解析与构建工具:解析 URL 各部分、提取查询参数、构建带参数的 URL。",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "要处理的 URL",
},
"operation": {
"type": "string",
"enum": ["parse", "get_param", "all_params", "build"],
"description": "操作parse=解析结构, get_param=取指定参数, all_params=所有参数, build=构建URL追加参数",
},
"key": {
"type": "string",
"description": "查询参数名get_param 操作时使用)",
},
"params": {
"type": "string",
"description": "参数 JSONbuild 操作时使用),如 {\"page\":\"1\"}",
},
},
"required": ["url"],
},
},
}
REGEX_TEST_SCHEMA = {
"type": "function",
"function": {
"name": "regex_test",
"description": "正则表达式测试工具:匹配查找、查找全部、替换、分割、验证等操作。",
"parameters": {
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "正则表达式模式,如 \\d{4}-\\d{2}-\\d{2}",
},
"test_string": {
"type": "string",
"description": "要测试的目标字符串",
},
"operation": {
"type": "string",
"enum": ["match", "findall", "replace", "split", "validate"],
"description": "操作match=首个匹配, findall=全部匹配, replace=替换, split=分割, validate=验证完整匹配",
"default": "match",
},
"flags": {
"type": "string",
"description": "标志组合i=忽略大小写, m=多行, s=点匹配换行(如 is 表示忽略大小写+点匹配换行)",
},
"replacement": {
"type": "string",
"description": "替换文本replace 操作时使用)",
},
},
"required": ["pattern", "test_string"],
},
},
}
SCHEDULE_CREATE_SCHEMA = {
"type": "function",
"function": {
"name": "schedule_create",
"description": "为 Agent 创建定时任务,按 cron 表达式周期自动执行。可用于设置每日推送、定时检查等。",
"parameters": {
"type": "object",
"properties": {
"agent_id": {
"type": "string",
"description": "Agent ID即当前对话所属的 Agent",
},
"name": {
"type": "string",
"description": "任务名称,如「每日早报推送」「每小时检查」",
},
"cron_expression": {
"type": "string",
"description": "标准 5 位 cron 表达式,空格分隔。常见:每分钟=* * * * *,每小时=0 * * * *每天9点=0 9 * * *每天18点=0 18 * * *每周一9点=0 9 * * 1每月1号=0 9 1 * *",
},
"input_message": {
"type": "string",
"description": "每次定时触发时发送给 Agent 的消息内容,如「帮我生成今日工作汇总并推送」",
},
"webhook_url": {
"type": "string",
"description": "可选,飞书机器人 Webhook URL执行完成后推送到飞书群",
},
},
"required": ["agent_id", "name", "cron_expression", "input_message"],
},
},
}
SCHEDULE_LIST_SCHEMA = {
"type": "function",
"function": {
"name": "schedule_list",
"description": "列出某个 Agent 的所有定时任务。",
"parameters": {
"type": "object",
"properties": {
"agent_id": {
"type": "string",
"description": "Agent ID要查询的 Agent",
},
},
"required": ["agent_id"],
},
},
}
SCHEDULE_DELETE_SCHEMA = {
"type": "function",
"function": {
"name": "schedule_delete",
"description": "删除指定的定时任务。",
"parameters": {
"type": "object",
"properties": {
"schedule_id": {
"type": "string",
"description": "定时任务 ID可从 schedule_list 查询获得)",
},
},
"required": ["schedule_id"],
},
},
}
# ── agent_call ─────────────────────────────────────────────────
async def agent_call_tool(
agent_name: str,
query: str,
max_iterations: int = 10,
) -> str:
"""
调用另一个 Agent 处理任务并返回结果
在数据库中按名称模糊匹配 Agent用其 workflow_config agent/llm 节点的
配置执行一次 ReAct 推理然后将结果返回给调用方如全能助手
Args:
agent_name: 目标 Agent 名称支持模糊匹配匹配到多个时取最接近的一个
query: 发给目标 Agent 的用户消息
max_iterations: 最大推理步数默认 10
"""
import asyncio as _asyncio
try:
from app.core.database import SessionLocal
from app.models.agent import Agent
from app.agent_runtime.core import AgentRuntime
from app.agent_runtime.schemas import (
AgentConfig,
AgentLLMConfig,
AgentToolConfig,
)
# 1. 查 DB模糊匹配 Agent
db = SessionLocal()
try:
candidates = (
db.query(Agent)
.filter(Agent.name.like(f"%{agent_name}%"))
.limit(5)
.all()
)
if not candidates:
return json.dumps(
{
"error": "agent_not_found",
"message": (
f"未找到匹配「{agent_name}」的 Agent。"
"请确认 Agent 名称是否正确,或在 Agent 管理中先创建目标 Agent。"
),
},
ensure_ascii=False,
)
# 精确匹配优先,否则取第一个
target = next(
(a for a in candidates if a.name == agent_name),
candidates[0],
)
finally:
db.close()
# 2. 从 workflow_config 提取 agent/llm 节点配置
wf = target.workflow_config or {}
nodes = wf.get("nodes", [])
agent_node = next(
(n for n in nodes if n.get("type") in ("agent", "llm")),
None,
)
if not agent_node:
return json.dumps(
{
"error": "no_agent_node",
"message": f"Agent「{target.name}」的工作流中未找到 Agent/LLM 节点,无法执行",
},
ensure_ascii=False,
)
nd = agent_node.get("data", {}) or {}
system_prompt = nd.get("system_prompt") or nd.get("prompt") or (
"你是一个有用的AI助手。"
)
model = nd.get("model", "deepseek-v4-flash")
provider = nd.get("provider", "deepseek")
temperature = float(nd.get("temperature", 0.7))
node_max_iter = int(nd.get("max_iterations") or 0)
if node_max_iter > 0:
max_iterations = min(max_iterations, node_max_iter)
# 3. 构建配置并执行
config = AgentConfig(
name=target.name or "sub_agent",
system_prompt=system_prompt,
llm=AgentLLMConfig(
provider=provider,
model=model,
temperature=temperature,
max_iterations=max_iterations,
),
tools=AgentToolConfig(
include_tools=nd.get("tools") or [],
exclude_tools=nd.get("exclude_tools") or [],
),
memory={
"enabled": nd.get("memory", True),
"persist_to_db": nd.get("memory", True),
},
)
runtime = AgentRuntime(config=config)
result = await runtime.run(query)
if result.success:
out = {
"agent": target.name,
"status": "success",
"iterations": result.iterations_used,
"tool_calls": result.tool_calls_made,
"reply": result.content,
}
else:
out = {
"agent": target.name,
"status": "error",
"error": result.error,
"reply": result.content or f"Agent 执行失败: {result.error}",
}
return json.dumps(out, ensure_ascii=False)
except Exception as e:
logger.error(f"agent_call 工具执行失败: {e}", exc_info=True)
return json.dumps(
{
"error": "execution_failed",
"message": f"调用 Agent 时出错: {e}",
},
ensure_ascii=False,
)
AGENT_CALL_SCHEMA = {
"type": "function",
"function": {
"name": "agent_call",
"description": (
"调用另一个已注册的 Agent 来处理任务并返回结果。适合用来将子任务委托给"
"具备特定专长的 Agent如家庭医生助手、代码助手等。会话不会被接管"
"结果会以文本形式返回给调用方用于整合回复。"
),
"parameters": {
"type": "object",
"properties": {
"agent_name": {
"type": "string",
"description": "目标 Agent 名称,支持模糊匹配(如「家庭医生」「代码助手」)",
},
"query": {
"type": "string",
"description": "发给目标 Agent 的查询内容,可以包含上下文信息",
},
"max_iterations": {
"type": "integer",
"description": "最大推理步数(默认 10控制 Agent 的思考深度",
"default": 10,
},
},
"required": ["agent_name", "query"],
},
},
}