chore: remove langchain in tools (#3247)
This commit is contained in:
@@ -1,16 +1,187 @@
|
||||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from typing import Any
|
||||
|
||||
from langchain.tools import PubmedQueryRun
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
from core.tools.tool.builtin_tool import BuiltinTool
|
||||
|
||||
|
||||
class PubMedAPIWrapper(BaseModel):
|
||||
"""
|
||||
Wrapper around PubMed API.
|
||||
|
||||
This wrapper will use the PubMed API to conduct searches and fetch
|
||||
document summaries. By default, it will return the document summaries
|
||||
of the top-k results of an input search.
|
||||
|
||||
Parameters:
|
||||
top_k_results: number of the top-scored document used for the PubMed tool
|
||||
load_max_docs: a limit to the number of loaded documents
|
||||
load_all_available_meta:
|
||||
if True: the `metadata` of the loaded Documents gets all available meta info
|
||||
(see https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch)
|
||||
if False: the `metadata` gets only the most informative fields.
|
||||
"""
|
||||
|
||||
base_url_esearch = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?"
|
||||
base_url_efetch = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"
|
||||
max_retry = 5
|
||||
sleep_time = 0.2
|
||||
|
||||
# Default values for the parameters
|
||||
top_k_results: int = 3
|
||||
load_max_docs: int = 25
|
||||
ARXIV_MAX_QUERY_LENGTH = 300
|
||||
doc_content_chars_max: int = 2000
|
||||
load_all_available_meta: bool = False
|
||||
email: str = "your_email@example.com"
|
||||
|
||||
def run(self, query: str) -> str:
|
||||
"""
|
||||
Run PubMed search and get the article meta information.
|
||||
See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
|
||||
It uses only the most informative fields of article meta information.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Retrieve the top-k results for the query
|
||||
docs = [
|
||||
f"Published: {result['pub_date']}\nTitle: {result['title']}\n"
|
||||
f"Summary: {result['summary']}"
|
||||
for result in self.load(query[: self.ARXIV_MAX_QUERY_LENGTH])
|
||||
]
|
||||
|
||||
# Join the results and limit the character count
|
||||
return (
|
||||
"\n\n".join(docs)[:self.doc_content_chars_max]
|
||||
if docs
|
||||
else "No good PubMed Result was found"
|
||||
)
|
||||
except Exception as ex:
|
||||
return f"PubMed exception: {ex}"
|
||||
|
||||
def load(self, query: str) -> list[dict]:
|
||||
"""
|
||||
Search PubMed for documents matching the query.
|
||||
Return a list of dictionaries containing the document metadata.
|
||||
"""
|
||||
|
||||
url = (
|
||||
self.base_url_esearch
|
||||
+ "db=pubmed&term="
|
||||
+ str({urllib.parse.quote(query)})
|
||||
+ f"&retmode=json&retmax={self.top_k_results}&usehistory=y"
|
||||
)
|
||||
result = urllib.request.urlopen(url)
|
||||
text = result.read().decode("utf-8")
|
||||
json_text = json.loads(text)
|
||||
|
||||
articles = []
|
||||
webenv = json_text["esearchresult"]["webenv"]
|
||||
for uid in json_text["esearchresult"]["idlist"]:
|
||||
article = self.retrieve_article(uid, webenv)
|
||||
articles.append(article)
|
||||
|
||||
# Convert the list of articles to a JSON string
|
||||
return articles
|
||||
|
||||
def retrieve_article(self, uid: str, webenv: str) -> dict:
|
||||
url = (
|
||||
self.base_url_efetch
|
||||
+ "db=pubmed&retmode=xml&id="
|
||||
+ uid
|
||||
+ "&webenv="
|
||||
+ webenv
|
||||
)
|
||||
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
result = urllib.request.urlopen(url)
|
||||
break
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 429 and retry < self.max_retry:
|
||||
# Too Many Requests error
|
||||
# wait for an exponentially increasing amount of time
|
||||
print(
|
||||
f"Too Many Requests, "
|
||||
f"waiting for {self.sleep_time:.2f} seconds..."
|
||||
)
|
||||
time.sleep(self.sleep_time)
|
||||
self.sleep_time *= 2
|
||||
retry += 1
|
||||
else:
|
||||
raise e
|
||||
|
||||
xml_text = result.read().decode("utf-8")
|
||||
|
||||
# Get title
|
||||
title = ""
|
||||
if "<ArticleTitle>" in xml_text and "</ArticleTitle>" in xml_text:
|
||||
start_tag = "<ArticleTitle>"
|
||||
end_tag = "</ArticleTitle>"
|
||||
title = xml_text[
|
||||
xml_text.index(start_tag) + len(start_tag) : xml_text.index(end_tag)
|
||||
]
|
||||
|
||||
# Get abstract
|
||||
abstract = ""
|
||||
if "<AbstractText>" in xml_text and "</AbstractText>" in xml_text:
|
||||
start_tag = "<AbstractText>"
|
||||
end_tag = "</AbstractText>"
|
||||
abstract = xml_text[
|
||||
xml_text.index(start_tag) + len(start_tag) : xml_text.index(end_tag)
|
||||
]
|
||||
|
||||
# Get publication date
|
||||
pub_date = ""
|
||||
if "<PubDate>" in xml_text and "</PubDate>" in xml_text:
|
||||
start_tag = "<PubDate>"
|
||||
end_tag = "</PubDate>"
|
||||
pub_date = xml_text[
|
||||
xml_text.index(start_tag) + len(start_tag) : xml_text.index(end_tag)
|
||||
]
|
||||
|
||||
# Return article as dictionary
|
||||
article = {
|
||||
"uid": uid,
|
||||
"title": title,
|
||||
"summary": abstract,
|
||||
"pub_date": pub_date,
|
||||
}
|
||||
return article
|
||||
|
||||
|
||||
class PubmedQueryRun(BaseModel):
|
||||
"""Tool that searches the PubMed API."""
|
||||
|
||||
name = "PubMed"
|
||||
description = (
|
||||
"A wrapper around PubMed.org "
|
||||
"Useful for when you need to answer questions about Physics, Mathematics, "
|
||||
"Computer Science, Quantitative Biology, Quantitative Finance, Statistics, "
|
||||
"Electrical Engineering, and Economics "
|
||||
"from scientific articles on PubMed.org. "
|
||||
"Input should be a search query."
|
||||
)
|
||||
api_wrapper: PubMedAPIWrapper = Field(default_factory=PubMedAPIWrapper)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
) -> str:
|
||||
"""Use the Arxiv tool."""
|
||||
return self.api_wrapper.run(query)
|
||||
|
||||
|
||||
class PubMedInput(BaseModel):
|
||||
query: str = Field(..., description="Search query.")
|
||||
|
||||
|
||||
class PubMedSearchTool(BuiltinTool):
|
||||
"""
|
||||
Tool for performing a search using PubMed search engine.
|
||||
@@ -34,7 +205,7 @@ class PubMedSearchTool(BuiltinTool):
|
||||
|
||||
tool = PubmedQueryRun(args_schema=PubMedInput)
|
||||
|
||||
result = tool.run(query)
|
||||
result = tool._run(query)
|
||||
|
||||
return self.create_text_message(self.summary(user_id=user_id, content=result))
|
||||
|
||||
Reference in New Issue
Block a user