diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index 7b5bf24..0bb0fbe 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -37,6 +37,11 @@ class Settings(BaseSettings):
# http_request 工具:写入 LLM 上下文的响应体最大字符数(HTML/JSON 过大时截断,避免超过模型 context)
HTTP_REQUEST_MAX_BODY_CHARS: int = 32_000
+ # web_search 工具:Bing Search API Key(Azure 免费层 1000 次/月),留空则使用 DuckDuckGo
+ BING_SEARCH_API_KEY: str = ""
+ # web_search 工具:HTTP 代理地址(DuckDuckGo 在国内无法访问时使用),如 http://127.0.0.1:7890
+ SEARCH_PROXY: str = ""
+
# 图片 OCR(file_read 对 png/jpg 等):Tesseract 可执行文件路径,Windows 示例 C:/Program Files/Tesseract-OCR/tesseract.exe
TESSERACT_CMD: str = ""
# 自定义 tessdata 目录(内含 chi_sim.traineddata 等)。留空时若 LOCAL_FILE_TOOLS_ROOT/tessdata 下存在 .traineddata 则自动使用
diff --git a/backend/app/services/builtin_tools.py b/backend/app/services/builtin_tools.py
index 11cad43..b3c657a 100644
--- a/backend/app/services/builtin_tools.py
+++ b/backend/app/services/builtin_tools.py
@@ -2571,67 +2571,123 @@ GIT_OPERATION_SCHEMA = {
# ── web_search ───────────────────────────────────────────────
-async def web_search_tool(query: str, max_results: int = 8) -> str:
- """搜索网页(使用 DuckDuckGo HTML 搜索,无需 API Key)。"""
- import asyncio as _asyncio
+async def _search_via_bing(query: str, max_results: int) -> str:
+ """通过 Bing Web Search API 搜索(需 BING_SEARCH_API_KEY)。"""
+ from app.core.config import settings
- try:
- url = f"https://html.duckduckgo.com/html/?q={httpx.URL(query).raw_path.decode() if hasattr(httpx.URL(query), 'raw_path') else query}"
- # 简单 URL 编码
- import urllib.parse
- safe_q = urllib.parse.quote(query, safe="")
- url = f"https://html.duckduckgo.com/html/?q={safe_q}"
+ api_key = settings.BING_SEARCH_API_KEY
+ if not api_key:
+ return json.dumps({"error": "Bing Search API Key 未配置"}, ensure_ascii=False)
- async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
- resp = await client.get(url, headers={
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
- })
- if resp.status_code != 200:
- return json.dumps({"error": f"搜索失败 HTTP {resp.status_code}"}, ensure_ascii=False)
+ proxy = settings.SEARCH_PROXY or None
+ client_kwargs = {"timeout": 15.0}
+ if proxy:
+ client_kwargs["proxy"] = proxy
- html = resp.text
-
- # 简单解析搜索结果
- results = []
- # 匹配每个结果块:标题、摘要、链接
- import re as _re
- # DuckDuckGo HTML 结果模式
- blocks = _re.findall(
- r']*class="result__a"[^>]*href="([^"]*)"[^>]*>(.*?).*?]*class="result__snippet"[^>]*>(.*?)',
- html, _re.DOTALL | _re.IGNORECASE,
+ async with httpx.AsyncClient(**client_kwargs) as client:
+ resp = await client.get(
+ "https://api.bing.microsoft.com/v7.0/search",
+ headers={"Ocp-Apim-Subscription-Key": api_key},
+ params={"q": query, "count": min(max_results, 50), "mkt": "zh-CN", "textFormat": "Raw"},
)
- if not blocks:
- # 备选模式
- blocks = _re.findall(
- r']*rel="nofollow"[^>]*class="result__url"[^>]*href="([^"]*)"[^>]*>.*?]*class="result__a"[^>]*>(.*?).*?]*class="result__snippet"[^>]*>(.*?)',
- html, _re.DOTALL | _re.IGNORECASE,
- )
+ if resp.status_code != 200:
+ return json.dumps({"error": f"Bing 搜索失败 HTTP {resp.status_code}: {resp.text[:200]}"}, ensure_ascii=False)
- for i, (link, title, snippet) in enumerate(blocks[:max_results]):
- title_clean = _re.sub(r"<.*?>", "", title).strip()
- snippet_clean = _re.sub(r"<.*?>", "", snippet).strip()
+ data = resp.json()
+ web_pages = (data.get("webPages") or {}).get("value", [])
+ if not web_pages:
+ return json.dumps({"message": "Bing 未找到搜索结果,请尝试更具体的关键词", "results": []}, ensure_ascii=False)
+
+ results = []
+ for i, page in enumerate(web_pages[:max_results]):
results.append({
"index": i + 1,
- "title": title_clean,
- "url": link,
- "snippet": snippet_clean[:300],
+ "title": page.get("name", ""),
+ "url": page.get("url", ""),
+ "snippet": (page.get("snippet", "") or "")[:300],
})
+ return json.dumps({"results": results, "count": len(results), "source": "bing"}, ensure_ascii=False)
- if not results:
- return json.dumps({"message": "未找到搜索结果,请尝试更具体的关键词", "results": []}, ensure_ascii=False)
- return json.dumps({"results": results, "count": len(results)}, ensure_ascii=False)
+async def _search_via_duckduckgo(query: str, max_results: int) -> str:
+ """通过 DuckDuckGo HTML 搜索(无需 API Key,可配代理)。"""
+ import re as _re
+ import urllib.parse
+ from app.core.config import settings
+
+ safe_q = urllib.parse.quote(query, safe="")
+ url = f"https://html.duckduckgo.com/html/?q={safe_q}"
+
+ proxy = settings.SEARCH_PROXY or None
+ client_kwargs = {"timeout": 15.0, "follow_redirects": True}
+ if proxy:
+ client_kwargs["proxy"] = proxy
+
+ async with httpx.AsyncClient(**client_kwargs) as client:
+ resp = await client.get(url, headers={
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+ })
+ if resp.status_code != 200:
+ return json.dumps({"error": f"DuckDuckGo 搜索失败 HTTP {resp.status_code}"}, ensure_ascii=False)
+ html = resp.text
+
+ results = []
+ blocks = _re.findall(
+ r']*class="result__a"[^>]*href="([^"]*)"[^>]*>(.*?).*?]*class="result__snippet"[^>]*>(.*?)',
+ html, _re.DOTALL | _re.IGNORECASE,
+ )
+ if not blocks:
+ blocks = _re.findall(
+ r']*rel="nofollow"[^>]*class="result__url"[^>]*href="([^"]*)"[^>]*>.*?]*class="result__a"[^>]*>(.*?).*?]*class="result__snippet"[^>]*>(.*?)',
+ html, _re.DOTALL | _re.IGNORECASE,
+ )
+
+ for i, (link, title, snippet) in enumerate(blocks[:max_results]):
+ title_clean = _re.sub(r"<.*?>", "", title).strip()
+ snippet_clean = _re.sub(r"<.*?>", "", snippet).strip()
+ results.append({
+ "index": i + 1,
+ "title": title_clean,
+ "url": link,
+ "snippet": snippet_clean[:300],
+ })
+
+ if not results:
+ return json.dumps({"message": "DuckDuckGo 未找到搜索结果,请尝试更具体的关键词", "results": []}, ensure_ascii=False)
+
+ return json.dumps({"results": results, "count": len(results), "source": "duckduckgo"}, ensure_ascii=False)
+
+
+async def web_search_tool(query: str, max_results: int = 8) -> str:
+ """搜索网页(Bing 优先,DuckDuckGo 降级,支持代理)。"""
+ from app.core.config import settings
+
+ # 1. 优先使用 Bing Search API(国内可用)
+ if settings.BING_SEARCH_API_KEY:
+ try:
+ result = await _search_via_bing(query, max_results)
+ parsed = json.loads(result)
+ if "results" in parsed and parsed["results"]:
+ return result
+ # Bing 返回空结果时继续降级
+ logger.info("Bing 搜索无结果,降级到 DuckDuckGo")
+ except Exception as e:
+ logger.warning("Bing 搜索异常,降级到 DuckDuckGo: %s", e)
+
+ # 2. 降级到 DuckDuckGo
+ try:
+ return await _search_via_duckduckgo(query, max_results)
except Exception as e:
- logger.error(f"web_search 失败: {e}")
- return json.dumps({"error": f"搜索失败: {e}"}, ensure_ascii=False)
+ logger.error(f"web_search 全部后端失败: {e}")
+ return json.dumps({"error": f"搜索失败: 所有搜索后端均不可用(Bing/DuckDuckGo),请检查网络或配置 SEARCH_PROXY"}, ensure_ascii=False)
WEB_SEARCH_SCHEMA = {
"type": "function",
"function": {
"name": "web_search",
- "description": "搜索互联网获取网页信息,返回标题、摘要和 URL 列表。适合查找最新文档、技术方案、新闻资讯。",
+ "description": "搜索互联网获取网页信息(Bing 优先,DuckDuckGo 备用),返回标题、摘要和 URL 列表。适合查找最新文档、技术方案、新闻资讯。",
"parameters": {
"type": "object",
"properties": {