Feat/assistant app (#2086)
Co-authored-by: chenhe <guchenhe@gmail.com> Co-authored-by: Pascal M <11357019+perzeuss@users.noreply.github.com>
This commit is contained in:
222
api/core/tools/tool/api_tool.py
Normal file
222
api/core/tools/tool/api_tool.py
Normal file
@@ -0,0 +1,222 @@
|
||||
from typing import Any, Dict, List, Union
|
||||
from json import dumps
|
||||
|
||||
from core.tools.entities.tool_bundle import ApiBasedToolBundle
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||
from core.tools.tool.tool import Tool
|
||||
from core.tools.errors import ToolProviderCredentialValidationError
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
|
||||
class ApiTool(Tool):
|
||||
api_bundle: ApiBasedToolBundle
|
||||
|
||||
"""
|
||||
Api tool
|
||||
"""
|
||||
def fork_tool_runtime(self, meta: Dict[str, Any]) -> 'Tool':
|
||||
"""
|
||||
fork a new tool with meta data
|
||||
|
||||
:param meta: the meta data of a tool call processing, tenant_id is required
|
||||
:return: the new tool
|
||||
"""
|
||||
return self.__class__(
|
||||
identity=self.identity.copy() if self.identity else None,
|
||||
parameters=self.parameters.copy() if self.parameters else None,
|
||||
description=self.description.copy() if self.description else None,
|
||||
api_bundle=self.api_bundle.copy() if self.api_bundle else None,
|
||||
runtime=Tool.Runtime(**meta)
|
||||
)
|
||||
|
||||
def validate_credentials(self, credentails: Dict[str, Any], parameters: Dict[str, Any], format_only: bool = False) -> None:
|
||||
"""
|
||||
validate the credentials for Api tool
|
||||
"""
|
||||
# assemble validate request and request parameters
|
||||
headers = self.assembling_request(parameters)
|
||||
|
||||
if format_only:
|
||||
return
|
||||
|
||||
response = self.do_http_request(self.api_bundle.server_url, self.api_bundle.method, headers, parameters)
|
||||
# validate response
|
||||
self.validate_and_parse_response(response)
|
||||
|
||||
def assembling_request(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||
headers = {}
|
||||
credentials = self.runtime.credentials or {}
|
||||
|
||||
if 'auth_type' not in credentials:
|
||||
raise ToolProviderCredentialValidationError('Missing auth_type')
|
||||
|
||||
if credentials['auth_type'] == 'api_key':
|
||||
api_key_header = 'api_key'
|
||||
|
||||
if 'api_key_header' in credentials:
|
||||
api_key_header = credentials['api_key_header']
|
||||
|
||||
if 'api_key_value' not in credentials:
|
||||
raise ToolProviderCredentialValidationError('Missing api_key_value')
|
||||
|
||||
headers[api_key_header] = credentials['api_key_value']
|
||||
|
||||
needed_parameters = [parameter for parameter in self.api_bundle.parameters if parameter.required]
|
||||
for parameter in needed_parameters:
|
||||
if parameter.required and parameter.name not in parameters:
|
||||
raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter.name}")
|
||||
|
||||
if parameter.default is not None and parameter.name not in parameters:
|
||||
parameters[parameter.name] = parameter.default
|
||||
|
||||
return headers
|
||||
|
||||
def validate_and_parse_response(self, response: Union[httpx.Response, requests.Response]) -> str:
|
||||
"""
|
||||
validate the response
|
||||
"""
|
||||
if isinstance(response, httpx.Response):
|
||||
if response.status_code >= 400:
|
||||
raise ToolProviderCredentialValidationError(f"Request failed with status code {response.status_code}")
|
||||
return response.text
|
||||
elif isinstance(response, requests.Response):
|
||||
if not response.ok:
|
||||
raise ToolProviderCredentialValidationError(f"Request failed with status code {response.status_code}")
|
||||
return response.text
|
||||
else:
|
||||
raise ValueError(f'Invalid response type {type(response)}')
|
||||
|
||||
def do_http_request(self, url: str, method: str, headers: Dict[str, Any], parameters: Dict[str, Any]) -> httpx.Response:
|
||||
"""
|
||||
do http request depending on api bundle
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
params = {}
|
||||
path_params = {}
|
||||
body = {}
|
||||
cookies = {}
|
||||
|
||||
# check parameters
|
||||
for parameter in self.api_bundle.openapi.get('parameters', []):
|
||||
if parameter['in'] == 'path':
|
||||
value = ''
|
||||
if parameter['name'] in parameters:
|
||||
value = parameters[parameter['name']]
|
||||
elif parameter['required']:
|
||||
raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter['name']}")
|
||||
path_params[parameter['name']] = value
|
||||
|
||||
elif parameter['in'] == 'query':
|
||||
value = ''
|
||||
if parameter['name'] in parameters:
|
||||
value = parameters[parameter['name']]
|
||||
elif parameter['required']:
|
||||
raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter['name']}")
|
||||
params[parameter['name']] = value
|
||||
|
||||
elif parameter['in'] == 'cookie':
|
||||
value = ''
|
||||
if parameter['name'] in parameters:
|
||||
value = parameters[parameter['name']]
|
||||
elif parameter['required']:
|
||||
raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter['name']}")
|
||||
cookies[parameter['name']] = value
|
||||
|
||||
elif parameter['in'] == 'header':
|
||||
value = ''
|
||||
if parameter['name'] in parameters:
|
||||
value = parameters[parameter['name']]
|
||||
elif parameter['required']:
|
||||
raise ToolProviderCredentialValidationError(f"Missing required parameter {parameter['name']}")
|
||||
headers[parameter['name']] = value
|
||||
|
||||
# check if there is a request body and handle it
|
||||
if 'requestBody' in self.api_bundle.openapi and self.api_bundle.openapi['requestBody'] is not None:
|
||||
# handle json request body
|
||||
if 'content' in self.api_bundle.openapi['requestBody']:
|
||||
for content_type in self.api_bundle.openapi['requestBody']['content']:
|
||||
headers['Content-Type'] = content_type
|
||||
body_schema = self.api_bundle.openapi['requestBody']['content'][content_type]['schema']
|
||||
required = body_schema['required'] if 'required' in body_schema else []
|
||||
properties = body_schema['properties'] if 'properties' in body_schema else {}
|
||||
for name, property in properties.items():
|
||||
if name in parameters:
|
||||
# convert type
|
||||
try:
|
||||
value = parameters[name]
|
||||
if property['type'] == 'integer':
|
||||
value = int(value)
|
||||
elif property['type'] == 'number':
|
||||
# check if it is a float
|
||||
if '.' in value:
|
||||
value = float(value)
|
||||
else:
|
||||
value = int(value)
|
||||
elif property['type'] == 'boolean':
|
||||
value = bool(value)
|
||||
body[name] = value
|
||||
except ValueError as e:
|
||||
body[name] = parameters[name]
|
||||
elif name in required:
|
||||
raise ToolProviderCredentialValidationError(
|
||||
f"Missing required parameter {name} in operation {self.api_bundle.operation_id}"
|
||||
)
|
||||
elif 'default' in property:
|
||||
body[name] = property['default']
|
||||
else:
|
||||
body[name] = None
|
||||
break
|
||||
|
||||
# replace path parameters
|
||||
for name, value in path_params.items():
|
||||
url = url.replace(f'{{{name}}}', value)
|
||||
|
||||
# parse http body data if needed, for GET/HEAD/OPTIONS/TRACE, the body is ignored
|
||||
if 'Content-Type' in headers:
|
||||
if headers['Content-Type'] == 'application/json':
|
||||
body = dumps(body)
|
||||
else:
|
||||
body = body
|
||||
|
||||
# do http request
|
||||
if method == 'get':
|
||||
response = httpx.get(url, params=params, headers=headers, cookies=cookies, timeout=10, follow_redirects=True)
|
||||
elif method == 'post':
|
||||
response = httpx.post(url, params=params, headers=headers, cookies=cookies, data=body, timeout=10, follow_redirects=True)
|
||||
elif method == 'put':
|
||||
response = httpx.put(url, params=params, headers=headers, cookies=cookies, data=body, timeout=10, follow_redirects=True)
|
||||
elif method == 'delete':
|
||||
"""
|
||||
request body data is unsupported for DELETE method in standard http protocol
|
||||
however, OpenAPI 3.0 supports request body data for DELETE method, so we support it here by using requests
|
||||
"""
|
||||
response = requests.delete(url, params=params, headers=headers, cookies=cookies, data=body, timeout=10, allow_redirects=True)
|
||||
elif method == 'patch':
|
||||
response = httpx.patch(url, params=params, headers=headers, cookies=cookies, data=body, timeout=10, follow_redirects=True)
|
||||
elif method == 'head':
|
||||
response = httpx.head(url, params=params, headers=headers, cookies=cookies, timeout=10, follow_redirects=True)
|
||||
elif method == 'options':
|
||||
response = httpx.options(url, params=params, headers=headers, cookies=cookies, timeout=10, follow_redirects=True)
|
||||
else:
|
||||
raise ValueError(f'Invalid http method {method}')
|
||||
|
||||
return response
|
||||
|
||||
def _invoke(self, user_id: str, tool_paramters: Dict[str, Any]) -> ToolInvokeMessage | List[ToolInvokeMessage]:
|
||||
"""
|
||||
invoke http request
|
||||
"""
|
||||
# assemble request
|
||||
headers = self.assembling_request(tool_paramters)
|
||||
|
||||
# do http request
|
||||
response = self.do_http_request(self.api_bundle.server_url, self.api_bundle.method, headers, tool_paramters)
|
||||
|
||||
# validate response
|
||||
response = self.validate_and_parse_response(response)
|
||||
|
||||
# assemble invoke message
|
||||
return self.create_text_message(response)
|
||||
|
||||
140
api/core/tools/tool/builtin_tool.py
Normal file
140
api/core/tools/tool/builtin_tool.py
Normal file
@@ -0,0 +1,140 @@
|
||||
from core.tools.tool.tool import Tool
|
||||
from core.tools.model.tool_model_manager import ToolModelManager
|
||||
from core.model_runtime.entities.message_entities import PromptMessage
|
||||
from core.model_runtime.entities.llm_entities import LLMResult
|
||||
from core.model_runtime.entities.message_entities import SystemPromptMessage, UserPromptMessage
|
||||
from core.tools.utils.web_reader_tool import get_url
|
||||
|
||||
from typing import List
|
||||
from enum import Enum
|
||||
|
||||
_SUMMARY_PROMPT = """You are a professional language researcher, you are interested in the language
|
||||
and you can quickly aimed at the main point of an webpage and reproduce it in your own words but
|
||||
retain the original meaning and keep the key points.
|
||||
however, the text you got is too long, what you got is possible a part of the text.
|
||||
Please summarize the text you got.
|
||||
"""
|
||||
|
||||
|
||||
class BuiltinTool(Tool):
|
||||
"""
|
||||
Builtin tool
|
||||
|
||||
:param meta: the meta data of a tool call processing
|
||||
"""
|
||||
|
||||
def invoke_model(
|
||||
self, user_id: str, prompt_messages: List[PromptMessage], stop: List[str]
|
||||
) -> LLMResult:
|
||||
"""
|
||||
invoke model
|
||||
|
||||
:param model_config: the model config
|
||||
:param prompt_messages: the prompt messages
|
||||
:param stop: the stop words
|
||||
:return: the model result
|
||||
"""
|
||||
# invoke model
|
||||
return ToolModelManager.invoke(
|
||||
user_id=user_id,
|
||||
tenant_id=self.runtime.tenant_id,
|
||||
tool_type='builtin',
|
||||
tool_name=self.identity.name,
|
||||
prompt_messages=prompt_messages,
|
||||
)
|
||||
|
||||
def get_max_tokens(self) -> int:
|
||||
"""
|
||||
get max tokens
|
||||
|
||||
:param model_config: the model config
|
||||
:return: the max tokens
|
||||
"""
|
||||
return ToolModelManager.get_max_llm_context_tokens(
|
||||
tenant_id=self.runtime.tenant_id,
|
||||
)
|
||||
|
||||
def get_prompt_tokens(self, prompt_messages: List[PromptMessage]) -> int:
|
||||
"""
|
||||
get prompt tokens
|
||||
|
||||
:param prompt_messages: the prompt messages
|
||||
:return: the tokens
|
||||
"""
|
||||
return ToolModelManager.calculate_tokens(
|
||||
tenant_id=self.runtime.tenant_id,
|
||||
prompt_messages=prompt_messages
|
||||
)
|
||||
|
||||
def summary(self, user_id: str, content: str) -> str:
|
||||
max_tokens = self.get_max_tokens()
|
||||
|
||||
if self.get_prompt_tokens(prompt_messages=[
|
||||
UserPromptMessage(content=content)
|
||||
]) < max_tokens * 0.6:
|
||||
return content
|
||||
|
||||
def get_prompt_tokens(content: str) -> int:
|
||||
return self.get_prompt_tokens(prompt_messages=[
|
||||
SystemPromptMessage(content=_SUMMARY_PROMPT),
|
||||
UserPromptMessage(content=content)
|
||||
])
|
||||
|
||||
def summarize(content: str) -> str:
|
||||
summary = self.invoke_model(user_id=user_id, prompt_messages=[
|
||||
SystemPromptMessage(content=_SUMMARY_PROMPT),
|
||||
UserPromptMessage(content=content)
|
||||
], stop=[])
|
||||
|
||||
return summary.message.content
|
||||
|
||||
lines = content.split('\n')
|
||||
new_lines = []
|
||||
# split long line into multiple lines
|
||||
for i in range(len(lines)):
|
||||
line = lines[i]
|
||||
if not line.strip():
|
||||
continue
|
||||
if len(line) < max_tokens * 0.5:
|
||||
new_lines.append(line)
|
||||
elif get_prompt_tokens(line) > max_tokens * 0.7:
|
||||
while get_prompt_tokens(line) > max_tokens * 0.7:
|
||||
new_lines.append(line[:int(max_tokens * 0.5)])
|
||||
line = line[int(max_tokens * 0.5):]
|
||||
new_lines.append(line)
|
||||
else:
|
||||
new_lines.append(line)
|
||||
|
||||
# merge lines into messages with max tokens
|
||||
messages: List[str] = []
|
||||
for i in new_lines:
|
||||
if len(messages) == 0:
|
||||
messages.append(i)
|
||||
else:
|
||||
if len(messages[-1]) + len(i) < max_tokens * 0.5:
|
||||
messages[-1] += i
|
||||
if get_prompt_tokens(messages[-1] + i) > max_tokens * 0.7:
|
||||
messages.append(i)
|
||||
else:
|
||||
messages[-1] += i
|
||||
|
||||
summaries = []
|
||||
for i in range(len(messages)):
|
||||
message = messages[i]
|
||||
summary = summarize(message)
|
||||
summaries.append(summary)
|
||||
|
||||
result = '\n'.join(summaries)
|
||||
|
||||
if self.get_prompt_tokens(prompt_messages=[
|
||||
UserPromptMessage(content=result)
|
||||
]) > max_tokens * 0.7:
|
||||
return self.summary(user_id=user_id, content=result)
|
||||
|
||||
return result
|
||||
|
||||
def get_url(self, url: str, user_agent: str = None) -> str:
|
||||
"""
|
||||
get url
|
||||
"""
|
||||
return get_url(url, user_agent=user_agent)
|
||||
@@ -0,0 +1,249 @@
|
||||
import json
|
||||
import threading
|
||||
from typing import List, Optional, Type
|
||||
|
||||
from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCallbackHandler
|
||||
from core.embedding.cached_embedding import CacheEmbedding
|
||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||
from core.index.keyword_table_index.keyword_table_index import KeywordTableConfig, KeywordTableIndex
|
||||
from core.model_manager import ModelManager
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.rerank.rerank import RerankRunner
|
||||
from extensions.ext_database import db
|
||||
from flask import Flask, current_app
|
||||
from langchain.tools import BaseTool
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from pydantic import BaseModel, Field
|
||||
from services.retrieval_service import RetrievalService
|
||||
|
||||
default_retrieval_model = {
|
||||
'search_method': 'semantic_search',
|
||||
'reranking_enable': False,
|
||||
'reranking_model': {
|
||||
'reranking_provider_name': '',
|
||||
'reranking_model_name': ''
|
||||
},
|
||||
'top_k': 2,
|
||||
'score_threshold_enabled': False
|
||||
}
|
||||
|
||||
|
||||
class DatasetMultiRetrieverToolInput(BaseModel):
|
||||
query: str = Field(..., description="dataset multi retriever and rerank")
|
||||
|
||||
|
||||
class DatasetMultiRetrieverTool(BaseTool):
|
||||
"""Tool for querying multi dataset."""
|
||||
name: str = "dataset-"
|
||||
args_schema: Type[BaseModel] = DatasetMultiRetrieverToolInput
|
||||
description: str = "dataset multi retriever and rerank. "
|
||||
tenant_id: str
|
||||
dataset_ids: List[str]
|
||||
top_k: int = 2
|
||||
score_threshold: Optional[float] = None
|
||||
reranking_provider_name: str
|
||||
reranking_model_name: str
|
||||
return_resource: bool
|
||||
retriever_from: str
|
||||
hit_callbacks: List[DatasetIndexToolCallbackHandler] = []
|
||||
|
||||
@classmethod
|
||||
def from_dataset(cls, dataset_ids: List[str], tenant_id: str, **kwargs):
|
||||
return cls(
|
||||
name=f'dataset-{tenant_id}',
|
||||
tenant_id=tenant_id,
|
||||
dataset_ids=dataset_ids,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
threads = []
|
||||
all_documents = []
|
||||
for dataset_id in self.dataset_ids:
|
||||
retrieval_thread = threading.Thread(target=self._retriever, kwargs={
|
||||
'flask_app': current_app._get_current_object(),
|
||||
'dataset_id': dataset_id,
|
||||
'query': query,
|
||||
'all_documents': all_documents,
|
||||
'hit_callbacks': self.hit_callbacks
|
||||
})
|
||||
threads.append(retrieval_thread)
|
||||
retrieval_thread.start()
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
# do rerank for searched documents
|
||||
model_manager = ModelManager()
|
||||
rerank_model_instance = model_manager.get_model_instance(
|
||||
tenant_id=self.tenant_id,
|
||||
provider=self.reranking_provider_name,
|
||||
model_type=ModelType.RERANK,
|
||||
model=self.reranking_model_name
|
||||
)
|
||||
|
||||
rerank_runner = RerankRunner(rerank_model_instance)
|
||||
all_documents = rerank_runner.run(query, all_documents, self.score_threshold, self.top_k)
|
||||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.on_tool_end(all_documents)
|
||||
|
||||
document_score_list = {}
|
||||
for item in all_documents:
|
||||
if 'score' in item.metadata and item.metadata['score']:
|
||||
document_score_list[item.metadata['doc_id']] = item.metadata['score']
|
||||
|
||||
document_context_list = []
|
||||
index_node_ids = [document.metadata['doc_id'] for document in all_documents]
|
||||
segments = DocumentSegment.query.filter(
|
||||
DocumentSegment.dataset_id.in_(self.dataset_ids),
|
||||
DocumentSegment.completed_at.isnot(None),
|
||||
DocumentSegment.status == 'completed',
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.index_node_id.in_(index_node_ids)
|
||||
).all()
|
||||
|
||||
if segments:
|
||||
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
|
||||
sorted_segments = sorted(segments,
|
||||
key=lambda segment: index_node_id_to_position.get(segment.index_node_id,
|
||||
float('inf')))
|
||||
for segment in sorted_segments:
|
||||
if segment.answer:
|
||||
document_context_list.append(f'question:{segment.content} answer:{segment.answer}')
|
||||
else:
|
||||
document_context_list.append(segment.content)
|
||||
if self.return_resource:
|
||||
context_list = []
|
||||
resource_number = 1
|
||||
for segment in sorted_segments:
|
||||
dataset = Dataset.query.filter_by(
|
||||
id=segment.dataset_id
|
||||
).first()
|
||||
document = Document.query.filter(Document.id == segment.document_id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
).first()
|
||||
if dataset and document:
|
||||
source = {
|
||||
'position': resource_number,
|
||||
'dataset_id': dataset.id,
|
||||
'dataset_name': dataset.name,
|
||||
'document_id': document.id,
|
||||
'document_name': document.name,
|
||||
'data_source_type': document.data_source_type,
|
||||
'segment_id': segment.id,
|
||||
'retriever_from': self.retriever_from,
|
||||
'score': document_score_list.get(segment.index_node_id, None)
|
||||
}
|
||||
|
||||
if self.retriever_from == 'dev':
|
||||
source['hit_count'] = segment.hit_count
|
||||
source['word_count'] = segment.word_count
|
||||
source['segment_position'] = segment.position
|
||||
source['index_node_hash'] = segment.index_node_hash
|
||||
if segment.answer:
|
||||
source['content'] = f'question:{segment.content} \nanswer:{segment.answer}'
|
||||
else:
|
||||
source['content'] = segment.content
|
||||
context_list.append(source)
|
||||
resource_number += 1
|
||||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.return_retriever_resource_info(context_list)
|
||||
|
||||
return str("\n".join(document_context_list))
|
||||
|
||||
async def _arun(self, tool_input: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
def _retriever(self, flask_app: Flask, dataset_id: str, query: str, all_documents: List,
|
||||
hit_callbacks: List[DatasetIndexToolCallbackHandler]):
|
||||
with flask_app.app_context():
|
||||
dataset = db.session.query(Dataset).filter(
|
||||
Dataset.tenant_id == self.tenant_id,
|
||||
Dataset.id == dataset_id
|
||||
).first()
|
||||
|
||||
if not dataset:
|
||||
return []
|
||||
|
||||
for hit_callback in hit_callbacks:
|
||||
hit_callback.on_query(query, dataset.id)
|
||||
|
||||
# get retrieval model , if the model is not setting , using default
|
||||
retrieval_model = dataset.retrieval_model if dataset.retrieval_model else default_retrieval_model
|
||||
|
||||
if dataset.indexing_technique == "economy":
|
||||
# use keyword table query
|
||||
kw_table_index = KeywordTableIndex(
|
||||
dataset=dataset,
|
||||
config=KeywordTableConfig(
|
||||
max_keywords_per_chunk=5
|
||||
)
|
||||
)
|
||||
|
||||
documents = kw_table_index.search(query, search_kwargs={'k': self.top_k})
|
||||
if documents:
|
||||
all_documents.extend(documents)
|
||||
else:
|
||||
|
||||
try:
|
||||
model_manager = ModelManager()
|
||||
embedding_model = model_manager.get_model_instance(
|
||||
tenant_id=dataset.tenant_id,
|
||||
provider=dataset.embedding_model_provider,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model=dataset.embedding_model
|
||||
)
|
||||
except LLMBadRequestError:
|
||||
return []
|
||||
except ProviderTokenNotInitError:
|
||||
return []
|
||||
|
||||
embeddings = CacheEmbedding(embedding_model)
|
||||
|
||||
documents = []
|
||||
threads = []
|
||||
if self.top_k > 0:
|
||||
# retrieval_model source with semantic
|
||||
if retrieval_model['search_method'] == 'semantic_search' or retrieval_model[
|
||||
'search_method'] == 'hybrid_search':
|
||||
embedding_thread = threading.Thread(target=RetrievalService.embedding_search, kwargs={
|
||||
'flask_app': current_app._get_current_object(),
|
||||
'dataset_id': str(dataset.id),
|
||||
'query': query,
|
||||
'top_k': self.top_k,
|
||||
'score_threshold': self.score_threshold,
|
||||
'reranking_model': None,
|
||||
'all_documents': documents,
|
||||
'search_method': 'hybrid_search',
|
||||
'embeddings': embeddings
|
||||
})
|
||||
threads.append(embedding_thread)
|
||||
embedding_thread.start()
|
||||
|
||||
# retrieval_model source with full text
|
||||
if retrieval_model['search_method'] == 'full_text_search' or retrieval_model[
|
||||
'search_method'] == 'hybrid_search':
|
||||
full_text_index_thread = threading.Thread(target=RetrievalService.full_text_index_search,
|
||||
kwargs={
|
||||
'flask_app': current_app._get_current_object(),
|
||||
'dataset_id': str(dataset.id),
|
||||
'query': query,
|
||||
'search_method': 'hybrid_search',
|
||||
'embeddings': embeddings,
|
||||
'score_threshold': retrieval_model[
|
||||
'score_threshold'] if retrieval_model[
|
||||
'score_threshold_enabled'] else None,
|
||||
'top_k': self.top_k,
|
||||
'reranking_model': retrieval_model[
|
||||
'reranking_model'] if retrieval_model[
|
||||
'reranking_enable'] else None,
|
||||
'all_documents': documents
|
||||
})
|
||||
threads.append(full_text_index_thread)
|
||||
full_text_index_thread.start()
|
||||
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
all_documents.extend(documents)
|
||||
236
api/core/tools/tool/dataset_retriever/dataset_retriever_tool.py
Normal file
236
api/core/tools/tool/dataset_retriever/dataset_retriever_tool.py
Normal file
@@ -0,0 +1,236 @@
|
||||
import threading
|
||||
from typing import List, Optional, Type
|
||||
|
||||
from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCallbackHandler
|
||||
from core.embedding.cached_embedding import CacheEmbedding
|
||||
from core.index.keyword_table_index.keyword_table_index import KeywordTableConfig, KeywordTableIndex
|
||||
from core.model_manager import ModelManager
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError
|
||||
from core.rerank.rerank import RerankRunner
|
||||
from extensions.ext_database import db
|
||||
from flask import current_app
|
||||
from langchain.tools import BaseTool
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from pydantic import BaseModel, Field
|
||||
from services.retrieval_service import RetrievalService
|
||||
|
||||
default_retrieval_model = {
|
||||
'search_method': 'semantic_search',
|
||||
'reranking_enable': False,
|
||||
'reranking_model': {
|
||||
'reranking_provider_name': '',
|
||||
'reranking_model_name': ''
|
||||
},
|
||||
'top_k': 2,
|
||||
'score_threshold_enabled': False
|
||||
}
|
||||
|
||||
|
||||
class DatasetRetrieverToolInput(BaseModel):
|
||||
query: str = Field(..., description="Query for the dataset to be used to retrieve the dataset.")
|
||||
|
||||
|
||||
class DatasetRetrieverTool(BaseTool):
|
||||
"""Tool for querying a Dataset."""
|
||||
name: str = "dataset"
|
||||
args_schema: Type[BaseModel] = DatasetRetrieverToolInput
|
||||
description: str = "use this to retrieve a dataset. "
|
||||
|
||||
tenant_id: str
|
||||
dataset_id: str
|
||||
top_k: int = 2
|
||||
score_threshold: Optional[float] = None
|
||||
hit_callbacks: List[DatasetIndexToolCallbackHandler] = []
|
||||
return_resource: bool
|
||||
retriever_from: str
|
||||
|
||||
@classmethod
|
||||
def from_dataset(cls, dataset: Dataset, **kwargs):
|
||||
description = dataset.description
|
||||
if not description:
|
||||
description = 'useful for when you want to answer queries about the ' + dataset.name
|
||||
|
||||
description = description.replace('\n', '').replace('\r', '')
|
||||
return cls(
|
||||
name=f'dataset-{dataset.id}',
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
description=description,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
dataset = db.session.query(Dataset).filter(
|
||||
Dataset.tenant_id == self.tenant_id,
|
||||
Dataset.id == self.dataset_id
|
||||
).first()
|
||||
|
||||
if not dataset:
|
||||
return ''
|
||||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.on_query(query, dataset.id)
|
||||
|
||||
# get retrieval model , if the model is not setting , using default
|
||||
retrieval_model = dataset.retrieval_model if dataset.retrieval_model else default_retrieval_model
|
||||
if dataset.indexing_technique == "economy":
|
||||
# use keyword table query
|
||||
kw_table_index = KeywordTableIndex(
|
||||
dataset=dataset,
|
||||
config=KeywordTableConfig(
|
||||
max_keywords_per_chunk=5
|
||||
)
|
||||
)
|
||||
|
||||
documents = kw_table_index.search(query, search_kwargs={'k': self.top_k})
|
||||
return str("\n".join([document.page_content for document in documents]))
|
||||
else:
|
||||
# get embedding model instance
|
||||
try:
|
||||
model_manager = ModelManager()
|
||||
embedding_model = model_manager.get_model_instance(
|
||||
tenant_id=dataset.tenant_id,
|
||||
provider=dataset.embedding_model_provider,
|
||||
model_type=ModelType.TEXT_EMBEDDING,
|
||||
model=dataset.embedding_model
|
||||
)
|
||||
except InvokeAuthorizationError:
|
||||
return ''
|
||||
|
||||
embeddings = CacheEmbedding(embedding_model)
|
||||
|
||||
documents = []
|
||||
threads = []
|
||||
if self.top_k > 0:
|
||||
# retrieval source with semantic
|
||||
if retrieval_model['search_method'] == 'semantic_search' or retrieval_model['search_method'] == 'hybrid_search':
|
||||
embedding_thread = threading.Thread(target=RetrievalService.embedding_search, kwargs={
|
||||
'flask_app': current_app._get_current_object(),
|
||||
'dataset_id': str(dataset.id),
|
||||
'query': query,
|
||||
'top_k': self.top_k,
|
||||
'score_threshold': retrieval_model['score_threshold'] if retrieval_model[
|
||||
'score_threshold_enabled'] else None,
|
||||
'reranking_model': retrieval_model['reranking_model'] if retrieval_model[
|
||||
'reranking_enable'] else None,
|
||||
'all_documents': documents,
|
||||
'search_method': retrieval_model['search_method'],
|
||||
'embeddings': embeddings
|
||||
})
|
||||
threads.append(embedding_thread)
|
||||
embedding_thread.start()
|
||||
|
||||
# retrieval_model source with full text
|
||||
if retrieval_model['search_method'] == 'full_text_search' or retrieval_model['search_method'] == 'hybrid_search':
|
||||
full_text_index_thread = threading.Thread(target=RetrievalService.full_text_index_search, kwargs={
|
||||
'flask_app': current_app._get_current_object(),
|
||||
'dataset_id': str(dataset.id),
|
||||
'query': query,
|
||||
'search_method': retrieval_model['search_method'],
|
||||
'embeddings': embeddings,
|
||||
'score_threshold': retrieval_model['score_threshold'] if retrieval_model[
|
||||
'score_threshold_enabled'] else None,
|
||||
'top_k': self.top_k,
|
||||
'reranking_model': retrieval_model['reranking_model'] if retrieval_model[
|
||||
'reranking_enable'] else None,
|
||||
'all_documents': documents
|
||||
})
|
||||
threads.append(full_text_index_thread)
|
||||
full_text_index_thread.start()
|
||||
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
# hybrid search: rerank after all documents have been searched
|
||||
if retrieval_model['search_method'] == 'hybrid_search':
|
||||
# get rerank model instance
|
||||
try:
|
||||
model_manager = ModelManager()
|
||||
rerank_model_instance = model_manager.get_model_instance(
|
||||
tenant_id=dataset.tenant_id,
|
||||
provider=retrieval_model['reranking_model']['reranking_provider_name'],
|
||||
model_type=ModelType.RERANK,
|
||||
model=retrieval_model['reranking_model']['reranking_model_name']
|
||||
)
|
||||
except InvokeAuthorizationError:
|
||||
return ''
|
||||
|
||||
rerank_runner = RerankRunner(rerank_model_instance)
|
||||
documents = rerank_runner.run(
|
||||
query=query,
|
||||
documents=documents,
|
||||
score_threshold=retrieval_model['score_threshold'] if retrieval_model[
|
||||
'score_threshold_enabled'] else None,
|
||||
top_n=self.top_k
|
||||
)
|
||||
else:
|
||||
documents = []
|
||||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.on_tool_end(documents)
|
||||
document_score_list = {}
|
||||
if dataset.indexing_technique != "economy":
|
||||
for item in documents:
|
||||
if 'score' in item.metadata and item.metadata['score']:
|
||||
document_score_list[item.metadata['doc_id']] = item.metadata['score']
|
||||
document_context_list = []
|
||||
index_node_ids = [document.metadata['doc_id'] for document in documents]
|
||||
segments = DocumentSegment.query.filter(DocumentSegment.dataset_id == self.dataset_id,
|
||||
DocumentSegment.completed_at.isnot(None),
|
||||
DocumentSegment.status == 'completed',
|
||||
DocumentSegment.enabled == True,
|
||||
DocumentSegment.index_node_id.in_(index_node_ids)
|
||||
).all()
|
||||
|
||||
if segments:
|
||||
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
|
||||
sorted_segments = sorted(segments,
|
||||
key=lambda segment: index_node_id_to_position.get(segment.index_node_id,
|
||||
float('inf')))
|
||||
for segment in sorted_segments:
|
||||
if segment.answer:
|
||||
document_context_list.append(f'question:{segment.content} answer:{segment.answer}')
|
||||
else:
|
||||
document_context_list.append(segment.content)
|
||||
if self.return_resource:
|
||||
context_list = []
|
||||
resource_number = 1
|
||||
for segment in sorted_segments:
|
||||
context = {}
|
||||
document = Document.query.filter(Document.id == segment.document_id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
).first()
|
||||
if dataset and document:
|
||||
source = {
|
||||
'position': resource_number,
|
||||
'dataset_id': dataset.id,
|
||||
'dataset_name': dataset.name,
|
||||
'document_id': document.id,
|
||||
'document_name': document.name,
|
||||
'data_source_type': document.data_source_type,
|
||||
'segment_id': segment.id,
|
||||
'retriever_from': self.retriever_from,
|
||||
'score': document_score_list.get(segment.index_node_id, None)
|
||||
|
||||
}
|
||||
if self.retriever_from == 'dev':
|
||||
source['hit_count'] = segment.hit_count
|
||||
source['word_count'] = segment.word_count
|
||||
source['segment_position'] = segment.position
|
||||
source['index_node_hash'] = segment.index_node_hash
|
||||
if segment.answer:
|
||||
source['content'] = f'question:{segment.content} \nanswer:{segment.answer}'
|
||||
else:
|
||||
source['content'] = segment.content
|
||||
context_list.append(source)
|
||||
resource_number += 1
|
||||
|
||||
for hit_callback in self.hit_callbacks:
|
||||
hit_callback.return_retriever_resource_info(context_list)
|
||||
|
||||
return str("\n".join(document_context_list))
|
||||
|
||||
async def _arun(self, tool_input: str) -> str:
|
||||
raise NotImplementedError()
|
||||
95
api/core/tools/tool/dataset_retriever_tool.py
Normal file
95
api/core/tools/tool/dataset_retriever_tool.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from typing import Any, Dict, List, Union
|
||||
from core.features.dataset_retrieval import DatasetRetrievalFeature
|
||||
from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParamter, ToolIdentity, ToolDescription
|
||||
from core.tools.tool.tool import Tool
|
||||
from core.tools.entities.common_entities import I18nObject
|
||||
from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCallbackHandler
|
||||
from core.entities.application_entities import DatasetRetrieveConfigEntity, InvokeFrom
|
||||
|
||||
from langchain.tools import BaseTool
|
||||
|
||||
class DatasetRetrieverTool(Tool):
|
||||
langchain_tool: BaseTool
|
||||
|
||||
@staticmethod
|
||||
def get_dataset_tools(tenant_id: str,
|
||||
dataset_ids: list[str],
|
||||
retrieve_config: DatasetRetrieveConfigEntity,
|
||||
return_resource: bool,
|
||||
invoke_from: InvokeFrom,
|
||||
hit_callback: DatasetIndexToolCallbackHandler
|
||||
) -> List['DatasetRetrieverTool']:
|
||||
"""
|
||||
get dataset tool
|
||||
"""
|
||||
# check if retrieve_config is valid
|
||||
if dataset_ids is None or len(dataset_ids) == 0:
|
||||
return []
|
||||
if retrieve_config is None:
|
||||
return []
|
||||
|
||||
feature = DatasetRetrievalFeature()
|
||||
|
||||
# save original retrieve strategy, and set retrieve strategy to SINGLE
|
||||
# Agent only support SINGLE mode
|
||||
original_retriever_mode = retrieve_config.retrieve_strategy
|
||||
retrieve_config.retrieve_strategy = DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE
|
||||
langchain_tools = feature.to_dataset_retriever_tool(
|
||||
tenant_id=tenant_id,
|
||||
dataset_ids=dataset_ids,
|
||||
retrieve_config=retrieve_config,
|
||||
return_resource=return_resource,
|
||||
invoke_from=invoke_from,
|
||||
hit_callback=hit_callback
|
||||
)
|
||||
# restore retrieve strategy
|
||||
retrieve_config.retrieve_strategy = original_retriever_mode
|
||||
|
||||
# convert langchain tools to Tools
|
||||
tools = []
|
||||
for langchain_tool in langchain_tools:
|
||||
tool = DatasetRetrieverTool(
|
||||
langchain_tool=langchain_tool,
|
||||
identity=ToolIdentity(author='', name=langchain_tool.name, label=I18nObject(en_US='', zh_Hans='')),
|
||||
parameters=[],
|
||||
is_team_authorization=True,
|
||||
description=ToolDescription(
|
||||
human=I18nObject(en_US='', zh_Hans=''),
|
||||
llm=langchain_tool.description),
|
||||
runtime=DatasetRetrieverTool.Runtime()
|
||||
)
|
||||
|
||||
tools.append(tool)
|
||||
|
||||
return tools
|
||||
|
||||
def get_runtime_parameters(self) -> List[ToolParamter]:
|
||||
return [
|
||||
ToolParamter(name='query',
|
||||
label=I18nObject(en_US='', zh_Hans=''),
|
||||
human_description=I18nObject(en_US='', zh_Hans=''),
|
||||
type=ToolParamter.ToolParameterType.STRING,
|
||||
form=ToolParamter.ToolParameterForm.LLM,
|
||||
llm_description='Query for the dataset to be used to retrieve the dataset.',
|
||||
required=True,
|
||||
default=''),
|
||||
]
|
||||
|
||||
def _invoke(self, user_id: str, tool_paramters: Dict[str, Any]) -> ToolInvokeMessage | List[ToolInvokeMessage]:
|
||||
"""
|
||||
invoke dataset retriever tool
|
||||
"""
|
||||
query = tool_paramters.get('query', None)
|
||||
if not query:
|
||||
return self.create_text_message(text='please input query')
|
||||
|
||||
# invoke dataset retriever tool
|
||||
result = self.langchain_tool._run(query=query)
|
||||
|
||||
return self.create_text_message(text=result)
|
||||
|
||||
def validate_credentials(self, credentails: Dict[str, Any], parameters: Dict[str, Any]) -> None:
|
||||
"""
|
||||
validate the credentials for dataset retriever tool
|
||||
"""
|
||||
pass
|
||||
302
api/core/tools/tool/tool.py
Normal file
302
api/core/tools/tool/tool.py
Normal file
@@ -0,0 +1,302 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
from typing import List, Dict, Any, Union, Optional
|
||||
from abc import abstractmethod, ABC
|
||||
from enum import Enum
|
||||
|
||||
from core.tools.entities.tool_entities import ToolIdentity, ToolInvokeMessage,\
|
||||
ToolParamter, ToolDescription, ToolRuntimeVariablePool, ToolRuntimeVariable, ToolRuntimeImageVariable
|
||||
from core.tools.tool_file_manager import ToolFileManager
|
||||
from core.callback_handler.agent_tool_callback_handler import DifyAgentCallbackHandler
|
||||
|
||||
class Tool(BaseModel, ABC):
|
||||
identity: ToolIdentity = None
|
||||
parameters: Optional[List[ToolParamter]] = None
|
||||
description: ToolDescription = None
|
||||
is_team_authorization: bool = False
|
||||
agent_callback: Optional[DifyAgentCallbackHandler] = None
|
||||
use_callback: bool = False
|
||||
|
||||
class Runtime(BaseModel):
|
||||
"""
|
||||
Meta data of a tool call processing
|
||||
"""
|
||||
def __init__(self, **data: Any):
|
||||
super().__init__(**data)
|
||||
if not self.runtime_parameters:
|
||||
self.runtime_parameters = {}
|
||||
|
||||
tenant_id: str = None
|
||||
tool_id: str = None
|
||||
credentials: Dict[str, Any] = None
|
||||
runtime_parameters: Dict[str, Any] = None
|
||||
|
||||
runtime: Runtime = None
|
||||
variables: ToolRuntimeVariablePool = None
|
||||
|
||||
def __init__(self, **data: Any):
|
||||
super().__init__(**data)
|
||||
|
||||
if not self.agent_callback:
|
||||
self.use_callback = False
|
||||
else:
|
||||
self.use_callback = True
|
||||
|
||||
class VARIABLE_KEY(Enum):
|
||||
IMAGE = 'image'
|
||||
|
||||
def fork_tool_runtime(self, meta: Dict[str, Any], agent_callback: DifyAgentCallbackHandler = None) -> 'Tool':
|
||||
"""
|
||||
fork a new tool with meta data
|
||||
|
||||
:param meta: the meta data of a tool call processing, tenant_id is required
|
||||
:return: the new tool
|
||||
"""
|
||||
return self.__class__(
|
||||
identity=self.identity.copy() if self.identity else None,
|
||||
parameters=self.parameters.copy() if self.parameters else None,
|
||||
description=self.description.copy() if self.description else None,
|
||||
runtime=Tool.Runtime(**meta),
|
||||
agent_callback=agent_callback
|
||||
)
|
||||
|
||||
def load_variables(self, variables: ToolRuntimeVariablePool):
|
||||
"""
|
||||
load variables from database
|
||||
|
||||
:param conversation_id: the conversation id
|
||||
"""
|
||||
self.variables = variables
|
||||
|
||||
def set_image_variable(self, variable_name: str, image_key: str) -> None:
|
||||
"""
|
||||
set an image variable
|
||||
"""
|
||||
if not self.variables:
|
||||
return
|
||||
|
||||
self.variables.set_file(self.identity.name, variable_name, image_key)
|
||||
|
||||
def set_text_variable(self, variable_name: str, text: str) -> None:
|
||||
"""
|
||||
set a text variable
|
||||
"""
|
||||
if not self.variables:
|
||||
return
|
||||
|
||||
self.variables.set_text(self.identity.name, variable_name, text)
|
||||
|
||||
def get_variable(self, name: Union[str, Enum]) -> Optional[ToolRuntimeVariable]:
|
||||
"""
|
||||
get a variable
|
||||
|
||||
:param name: the name of the variable
|
||||
:return: the variable
|
||||
"""
|
||||
if not self.variables:
|
||||
return None
|
||||
|
||||
if isinstance(name, Enum):
|
||||
name = name.value
|
||||
|
||||
for variable in self.variables.pool:
|
||||
if variable.name == name:
|
||||
return variable
|
||||
|
||||
return None
|
||||
|
||||
def get_default_image_variable(self) -> Optional[ToolRuntimeVariable]:
|
||||
"""
|
||||
get the default image variable
|
||||
|
||||
:return: the image variable
|
||||
"""
|
||||
if not self.variables:
|
||||
return None
|
||||
|
||||
return self.get_variable(self.VARIABLE_KEY.IMAGE)
|
||||
|
||||
def get_variable_file(self, name: Union[str, Enum]) -> Optional[bytes]:
|
||||
"""
|
||||
get a variable file
|
||||
|
||||
:param name: the name of the variable
|
||||
:return: the variable file
|
||||
"""
|
||||
variable = self.get_variable(name)
|
||||
if not variable:
|
||||
return None
|
||||
|
||||
if not isinstance(variable, ToolRuntimeImageVariable):
|
||||
return None
|
||||
|
||||
message_file_id = variable.value
|
||||
# get file binary
|
||||
file_binary = ToolFileManager.get_file_binary_by_message_file_id(message_file_id)
|
||||
if not file_binary:
|
||||
return None
|
||||
|
||||
return file_binary[0]
|
||||
|
||||
def list_variables(self) -> List[ToolRuntimeVariable]:
|
||||
"""
|
||||
list all variables
|
||||
|
||||
:return: the variables
|
||||
"""
|
||||
if not self.variables:
|
||||
return []
|
||||
|
||||
return self.variables.pool
|
||||
|
||||
def list_default_image_variables(self) -> List[ToolRuntimeVariable]:
|
||||
"""
|
||||
list all image variables
|
||||
|
||||
:return: the image variables
|
||||
"""
|
||||
if not self.variables:
|
||||
return []
|
||||
|
||||
result = []
|
||||
|
||||
for variable in self.variables.pool:
|
||||
if variable.name.startswith(self.VARIABLE_KEY.IMAGE.value):
|
||||
result.append(variable)
|
||||
|
||||
return result
|
||||
|
||||
def invoke(self, user_id: str, tool_paramters: Dict[str, Any]) -> List[ToolInvokeMessage]:
|
||||
# update tool_paramters
|
||||
if self.runtime.runtime_parameters:
|
||||
tool_paramters.update(self.runtime.runtime_parameters)
|
||||
|
||||
# hit callback
|
||||
if self.use_callback:
|
||||
self.agent_callback.on_tool_start(
|
||||
tool_name=self.identity.name,
|
||||
tool_inputs=tool_paramters
|
||||
)
|
||||
|
||||
try:
|
||||
result = self._invoke(
|
||||
user_id=user_id,
|
||||
tool_paramters=tool_paramters,
|
||||
)
|
||||
except Exception as e:
|
||||
if self.use_callback:
|
||||
self.agent_callback.on_tool_error(e)
|
||||
raise e
|
||||
|
||||
if not isinstance(result, list):
|
||||
result = [result]
|
||||
|
||||
# hit callback
|
||||
if self.use_callback:
|
||||
self.agent_callback.on_tool_end(
|
||||
tool_name=self.identity.name,
|
||||
tool_inputs=tool_paramters,
|
||||
tool_outputs=self._convert_tool_response_to_str(result)
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _convert_tool_response_to_str(self, tool_response: List[ToolInvokeMessage]) -> str:
|
||||
"""
|
||||
Handle tool response
|
||||
"""
|
||||
result = ''
|
||||
for response in tool_response:
|
||||
if response.type == ToolInvokeMessage.MessageType.TEXT:
|
||||
result += response.message
|
||||
elif response.type == ToolInvokeMessage.MessageType.LINK:
|
||||
result += f"result link: {response.message}. please dirct user to check it."
|
||||
elif response.type == ToolInvokeMessage.MessageType.IMAGE_LINK or \
|
||||
response.type == ToolInvokeMessage.MessageType.IMAGE:
|
||||
result += f"image has been created and sent to user already, you should tell user to check it now."
|
||||
elif response.type == ToolInvokeMessage.MessageType.BLOB:
|
||||
if len(response.message) > 114:
|
||||
result += str(response.message[:114]) + '...'
|
||||
else:
|
||||
result += str(response.message)
|
||||
else:
|
||||
result += f"tool response: {response.message}."
|
||||
|
||||
return result
|
||||
|
||||
@abstractmethod
|
||||
def _invoke(self, user_id: str, tool_paramters: Dict[str, Any]) -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
|
||||
pass
|
||||
|
||||
def validate_credentials(self, credentails: Dict[str, Any], parameters: Dict[str, Any]) -> None:
|
||||
"""
|
||||
validate the credentials
|
||||
|
||||
:param credentails: the credentials
|
||||
:param parameters: the parameters
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_runtime_parameters(self) -> List[ToolParamter]:
|
||||
"""
|
||||
get the runtime parameters
|
||||
|
||||
interface for developer to dynamic change the parameters of a tool depends on the variables pool
|
||||
|
||||
:return: the runtime parameters
|
||||
"""
|
||||
return self.parameters
|
||||
|
||||
def is_tool_avaliable(self) -> bool:
|
||||
"""
|
||||
check if the tool is avaliable
|
||||
|
||||
:return: if the tool is avaliable
|
||||
"""
|
||||
return True
|
||||
|
||||
def create_image_message(self, image: str, save_as: str = '') -> ToolInvokeMessage:
|
||||
"""
|
||||
create an image message
|
||||
|
||||
:param image: the url of the image
|
||||
:return: the image message
|
||||
"""
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE,
|
||||
message=image,
|
||||
save_as=save_as)
|
||||
|
||||
def create_link_message(self, link: str, save_as: str = '') -> ToolInvokeMessage:
|
||||
"""
|
||||
create a link message
|
||||
|
||||
:param link: the url of the link
|
||||
:return: the link message
|
||||
"""
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK,
|
||||
message=link,
|
||||
save_as=save_as)
|
||||
|
||||
def create_text_message(self, text: str, save_as: str = '') -> ToolInvokeMessage:
|
||||
"""
|
||||
create a text message
|
||||
|
||||
:param text: the text
|
||||
:return: the text message
|
||||
"""
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.TEXT,
|
||||
message=text,
|
||||
save_as=save_as
|
||||
)
|
||||
|
||||
def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage:
|
||||
"""
|
||||
create a blob message
|
||||
|
||||
:param blob: the blob
|
||||
:return: the blob message
|
||||
"""
|
||||
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB,
|
||||
message=blob, meta=meta,
|
||||
save_as=save_as
|
||||
)
|
||||
Reference in New Issue
Block a user