feat: multi-backend web_search with Bing API + DuckDuckGo fallback
web_search_tool now tries Bing Web Search API first (works in China, free tier 1000 searches/month via Azure), then falls back to DuckDuckGo. Both backends support optional SEARCH_PROXY for network-restricted environments. New settings: - BING_SEARCH_API_KEY: Azure Bing Search API key - SEARCH_PROXY: HTTP proxy for outbound search requests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -37,6 +37,11 @@ class Settings(BaseSettings):
|
||||
# http_request 工具:写入 LLM 上下文的响应体最大字符数(HTML/JSON 过大时截断,避免超过模型 context)
|
||||
HTTP_REQUEST_MAX_BODY_CHARS: int = 32_000
|
||||
|
||||
# web_search 工具:Bing Search API Key(Azure 免费层 1000 次/月),留空则使用 DuckDuckGo
|
||||
BING_SEARCH_API_KEY: str = ""
|
||||
# web_search 工具:HTTP 代理地址(DuckDuckGo 在国内无法访问时使用),如 http://127.0.0.1:7890
|
||||
SEARCH_PROXY: str = ""
|
||||
|
||||
# 图片 OCR(file_read 对 png/jpg 等):Tesseract 可执行文件路径,Windows 示例 C:/Program Files/Tesseract-OCR/tesseract.exe
|
||||
TESSERACT_CMD: str = ""
|
||||
# 自定义 tessdata 目录(内含 chi_sim.traineddata 等)。留空时若 LOCAL_FILE_TOOLS_ROOT/tessdata 下存在 .traineddata 则自动使用
|
||||
|
||||
@@ -2571,67 +2571,123 @@ GIT_OPERATION_SCHEMA = {
|
||||
|
||||
# ── web_search ───────────────────────────────────────────────
|
||||
|
||||
async def web_search_tool(query: str, max_results: int = 8) -> str:
|
||||
"""搜索网页(使用 DuckDuckGo HTML 搜索,无需 API Key)。"""
|
||||
import asyncio as _asyncio
|
||||
async def _search_via_bing(query: str, max_results: int) -> str:
|
||||
"""通过 Bing Web Search API 搜索(需 BING_SEARCH_API_KEY)。"""
|
||||
from app.core.config import settings
|
||||
|
||||
try:
|
||||
url = f"https://html.duckduckgo.com/html/?q={httpx.URL(query).raw_path.decode() if hasattr(httpx.URL(query), 'raw_path') else query}"
|
||||
# 简单 URL 编码
|
||||
import urllib.parse
|
||||
safe_q = urllib.parse.quote(query, safe="")
|
||||
url = f"https://html.duckduckgo.com/html/?q={safe_q}"
|
||||
api_key = settings.BING_SEARCH_API_KEY
|
||||
if not api_key:
|
||||
return json.dumps({"error": "Bing Search API Key 未配置"}, ensure_ascii=False)
|
||||
|
||||
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
|
||||
resp = await client.get(url, headers={
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
})
|
||||
if resp.status_code != 200:
|
||||
return json.dumps({"error": f"搜索失败 HTTP {resp.status_code}"}, ensure_ascii=False)
|
||||
proxy = settings.SEARCH_PROXY or None
|
||||
client_kwargs = {"timeout": 15.0}
|
||||
if proxy:
|
||||
client_kwargs["proxy"] = proxy
|
||||
|
||||
html = resp.text
|
||||
|
||||
# 简单解析搜索结果
|
||||
results = []
|
||||
# 匹配每个结果块:标题、摘要、链接
|
||||
import re as _re
|
||||
# DuckDuckGo HTML 结果模式
|
||||
blocks = _re.findall(
|
||||
r'<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>(.*?)</a>.*?<a[^>]*class="result__snippet"[^>]*>(.*?)</a>',
|
||||
html, _re.DOTALL | _re.IGNORECASE,
|
||||
async with httpx.AsyncClient(**client_kwargs) as client:
|
||||
resp = await client.get(
|
||||
"https://api.bing.microsoft.com/v7.0/search",
|
||||
headers={"Ocp-Apim-Subscription-Key": api_key},
|
||||
params={"q": query, "count": min(max_results, 50), "mkt": "zh-CN", "textFormat": "Raw"},
|
||||
)
|
||||
if not blocks:
|
||||
# 备选模式
|
||||
blocks = _re.findall(
|
||||
r'<a[^>]*rel="nofollow"[^>]*class="result__url"[^>]*href="([^"]*)"[^>]*>.*?<a[^>]*class="result__a"[^>]*>(.*?)</a>.*?<a[^>]*class="result__snippet"[^>]*>(.*?)</a>',
|
||||
html, _re.DOTALL | _re.IGNORECASE,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return json.dumps({"error": f"Bing 搜索失败 HTTP {resp.status_code}: {resp.text[:200]}"}, ensure_ascii=False)
|
||||
|
||||
for i, (link, title, snippet) in enumerate(blocks[:max_results]):
|
||||
title_clean = _re.sub(r"<.*?>", "", title).strip()
|
||||
snippet_clean = _re.sub(r"<.*?>", "", snippet).strip()
|
||||
data = resp.json()
|
||||
web_pages = (data.get("webPages") or {}).get("value", [])
|
||||
if not web_pages:
|
||||
return json.dumps({"message": "Bing 未找到搜索结果,请尝试更具体的关键词", "results": []}, ensure_ascii=False)
|
||||
|
||||
results = []
|
||||
for i, page in enumerate(web_pages[:max_results]):
|
||||
results.append({
|
||||
"index": i + 1,
|
||||
"title": title_clean,
|
||||
"url": link,
|
||||
"snippet": snippet_clean[:300],
|
||||
"title": page.get("name", ""),
|
||||
"url": page.get("url", ""),
|
||||
"snippet": (page.get("snippet", "") or "")[:300],
|
||||
})
|
||||
return json.dumps({"results": results, "count": len(results), "source": "bing"}, ensure_ascii=False)
|
||||
|
||||
if not results:
|
||||
return json.dumps({"message": "未找到搜索结果,请尝试更具体的关键词", "results": []}, ensure_ascii=False)
|
||||
|
||||
return json.dumps({"results": results, "count": len(results)}, ensure_ascii=False)
|
||||
async def _search_via_duckduckgo(query: str, max_results: int) -> str:
|
||||
"""通过 DuckDuckGo HTML 搜索(无需 API Key,可配代理)。"""
|
||||
import re as _re
|
||||
import urllib.parse
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
safe_q = urllib.parse.quote(query, safe="")
|
||||
url = f"https://html.duckduckgo.com/html/?q={safe_q}"
|
||||
|
||||
proxy = settings.SEARCH_PROXY or None
|
||||
client_kwargs = {"timeout": 15.0, "follow_redirects": True}
|
||||
if proxy:
|
||||
client_kwargs["proxy"] = proxy
|
||||
|
||||
async with httpx.AsyncClient(**client_kwargs) as client:
|
||||
resp = await client.get(url, headers={
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
})
|
||||
if resp.status_code != 200:
|
||||
return json.dumps({"error": f"DuckDuckGo 搜索失败 HTTP {resp.status_code}"}, ensure_ascii=False)
|
||||
html = resp.text
|
||||
|
||||
results = []
|
||||
blocks = _re.findall(
|
||||
r'<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>(.*?)</a>.*?<a[^>]*class="result__snippet"[^>]*>(.*?)</a>',
|
||||
html, _re.DOTALL | _re.IGNORECASE,
|
||||
)
|
||||
if not blocks:
|
||||
blocks = _re.findall(
|
||||
r'<a[^>]*rel="nofollow"[^>]*class="result__url"[^>]*href="([^"]*)"[^>]*>.*?<a[^>]*class="result__a"[^>]*>(.*?)</a>.*?<a[^>]*class="result__snippet"[^>]*>(.*?)</a>',
|
||||
html, _re.DOTALL | _re.IGNORECASE,
|
||||
)
|
||||
|
||||
for i, (link, title, snippet) in enumerate(blocks[:max_results]):
|
||||
title_clean = _re.sub(r"<.*?>", "", title).strip()
|
||||
snippet_clean = _re.sub(r"<.*?>", "", snippet).strip()
|
||||
results.append({
|
||||
"index": i + 1,
|
||||
"title": title_clean,
|
||||
"url": link,
|
||||
"snippet": snippet_clean[:300],
|
||||
})
|
||||
|
||||
if not results:
|
||||
return json.dumps({"message": "DuckDuckGo 未找到搜索结果,请尝试更具体的关键词", "results": []}, ensure_ascii=False)
|
||||
|
||||
return json.dumps({"results": results, "count": len(results), "source": "duckduckgo"}, ensure_ascii=False)
|
||||
|
||||
|
||||
async def web_search_tool(query: str, max_results: int = 8) -> str:
|
||||
"""搜索网页(Bing 优先,DuckDuckGo 降级,支持代理)。"""
|
||||
from app.core.config import settings
|
||||
|
||||
# 1. 优先使用 Bing Search API(国内可用)
|
||||
if settings.BING_SEARCH_API_KEY:
|
||||
try:
|
||||
result = await _search_via_bing(query, max_results)
|
||||
parsed = json.loads(result)
|
||||
if "results" in parsed and parsed["results"]:
|
||||
return result
|
||||
# Bing 返回空结果时继续降级
|
||||
logger.info("Bing 搜索无结果,降级到 DuckDuckGo")
|
||||
except Exception as e:
|
||||
logger.warning("Bing 搜索异常,降级到 DuckDuckGo: %s", e)
|
||||
|
||||
# 2. 降级到 DuckDuckGo
|
||||
try:
|
||||
return await _search_via_duckduckgo(query, max_results)
|
||||
except Exception as e:
|
||||
logger.error(f"web_search 失败: {e}")
|
||||
return json.dumps({"error": f"搜索失败: {e}"}, ensure_ascii=False)
|
||||
logger.error(f"web_search 全部后端失败: {e}")
|
||||
return json.dumps({"error": f"搜索失败: 所有搜索后端均不可用(Bing/DuckDuckGo),请检查网络或配置 SEARCH_PROXY"}, ensure_ascii=False)
|
||||
|
||||
|
||||
WEB_SEARCH_SCHEMA = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_search",
|
||||
"description": "搜索互联网获取网页信息,返回标题、摘要和 URL 列表。适合查找最新文档、技术方案、新闻资讯。",
|
||||
"description": "搜索互联网获取网页信息(Bing 优先,DuckDuckGo 备用),返回标题、摘要和 URL 列表。适合查找最新文档、技术方案、新闻资讯。",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
Reference in New Issue
Block a user