Feat/huggingface embedding support (#1211)

Co-authored-by: StyleZhang <jasonapring2015@outlook.com>
2023-09-22 13:59:02 +08:00
parent 32d9b6181c
commit e409895c02
10 changed files with 416 additions and 28 deletions
--- a/api/core/model_providers/models/embedding/huggingface_embedding.py
+++ b/api/core/model_providers/models/embedding/huggingface_embedding.py
@@ -0,0 +1,22 @@
+from core.model_providers.error import LLMBadRequestError
+from core.model_providers.providers.base import BaseModelProvider
+from core.third_party.langchain.embeddings.huggingface_hub_embedding import HuggingfaceHubEmbeddings
+from core.model_providers.models.embedding.base import BaseEmbedding
+
+
+class HuggingfaceEmbedding(BaseEmbedding):
+    def __init__(self, model_provider: BaseModelProvider, name: str):
+        credentials = model_provider.get_model_credentials(
+            model_name=name,
+            model_type=self.type
+        )
+
+        client = HuggingfaceHubEmbeddings(
+            model=name,
+            **credentials
+        )
+
+        super().__init__(model_provider, client, name)
+
+    def handle_exceptions(self, ex: Exception) -> Exception:
+        return LLMBadRequestError(f"Huggingface embedding: {str(ex)}")
--- a/api/core/model_providers/providers/huggingface_hub_provider.py
+++ b/api/core/model_providers/providers/huggingface_hub_provider.py
@@ -1,5 +1,6 @@
 import json
 from typing import Type
+import requests

 from huggingface_hub import HfApi

@@ -10,8 +11,12 @@ from core.model_providers.providers.base import BaseModelProvider, CredentialsVa

 from core.model_providers.models.base import BaseProviderModel
 from core.third_party.langchain.llms.huggingface_endpoint_llm import HuggingFaceEndpointLLM
+from core.third_party.langchain.embeddings.huggingface_hub_embedding import HuggingfaceHubEmbeddings
+from core.model_providers.models.embedding.huggingface_embedding import HuggingfaceEmbedding
 from models.provider import ProviderType

+HUGGINGFACE_ENDPOINT_API = 'https://api.endpoints.huggingface.cloud/v2/endpoint/'
+

 class HuggingfaceHubProvider(BaseModelProvider):
    @property
@@ -33,6 +38,8 @@ class HuggingfaceHubProvider(BaseModelProvider):
        """
        if model_type == ModelType.TEXT_GENERATION:
            model_class = HuggingfaceHubModel
+        elif model_type == ModelType.EMBEDDINGS:
+            model_class = HuggingfaceEmbedding
        else:
            raise NotImplementedError

@@ -63,7 +70,7 @@ class HuggingfaceHubProvider(BaseModelProvider):
        :param model_type:
        :param credentials:
        """
-        if model_type != ModelType.TEXT_GENERATION:
+        if model_type not in [ModelType.TEXT_GENERATION, ModelType.EMBEDDINGS]:
            raise NotImplementedError

        if 'huggingfacehub_api_type' not in credentials \
@@ -88,19 +95,15 @@ class HuggingfaceHubProvider(BaseModelProvider):
            if 'task_type' not in credentials:
                raise CredentialsValidateFailedError('Task Type must be provided.')

-            if credentials['task_type'] not in ("text2text-generation", "text-generation", "summarization"):
+            if credentials['task_type'] not in ("text2text-generation", "text-generation", 'feature-extraction'):
                raise CredentialsValidateFailedError('Task Type must be one of text2text-generation, '
-                                                     'text-generation, summarization.')
+                                                     'text-generation, feature-extraction.')

            try:
-                llm = HuggingFaceEndpointLLM(
-                    endpoint_url=credentials['huggingfacehub_endpoint_url'],
-                    task=credentials['task_type'],
-                    model_kwargs={"temperature": 0.5, "max_new_tokens": 200},
-                    huggingfacehub_api_token=credentials['huggingfacehub_api_token']
-                )
-
-                llm("ping")
+                if credentials['task_type'] == 'feature-extraction':
+                    cls.check_embedding_valid(credentials, model_name)
+                else:
+                    cls.check_llm_valid(credentials)    
            except Exception as e:
                raise CredentialsValidateFailedError(f"{e.__class__.__name__}:{str(e)}")
        else:
@@ -112,13 +115,64 @@ class HuggingfaceHubProvider(BaseModelProvider):
                if 'inference' in model_info.cardData and not model_info.cardData['inference']:
                    raise ValueError(f'Inference API has been turned off for this model {model_name}.')

-                VALID_TASKS = ("text2text-generation", "text-generation", "summarization")
+                VALID_TASKS = ("text2text-generation", "text-generation", "feature-extraction")
                if model_info.pipeline_tag not in VALID_TASKS:
                    raise ValueError(f"Model {model_name} is not a valid task, "
                                     f"must be one of {VALID_TASKS}.")
            except Exception as e:
                raise CredentialsValidateFailedError(f"{e.__class__.__name__}:{str(e)}")

+    @classmethod
+    def check_llm_valid(cls, credentials: dict):
+        llm = HuggingFaceEndpointLLM(
+            endpoint_url=credentials['huggingfacehub_endpoint_url'],
+            task=credentials['task_type'],
+            model_kwargs={"temperature": 0.5, "max_new_tokens": 200},
+            huggingfacehub_api_token=credentials['huggingfacehub_api_token']
+        )
+
+        llm("ping")
+
+    @classmethod
+    def check_embedding_valid(cls, credentials: dict, model_name: str):
+
+        cls.check_endpoint_url_model_repository_name(credentials, model_name)
+        
+        embedding_model = HuggingfaceHubEmbeddings(
+            model=model_name,
+            **credentials
+        )
+
+        embedding_model.embed_query("ping")
+
+    @classmethod
+    def check_endpoint_url_model_repository_name(cls, credentials: dict, model_name: str):
+        try:
+            url = f'{HUGGINGFACE_ENDPOINT_API}{credentials["huggingface_namespace"]}'
+            headers = {
+                'Authorization': f'Bearer {credentials["huggingfacehub_api_token"]}',
+                'Content-Type': 'application/json'
+            }
+
+            response =requests.get(url=url, headers=headers)
+
+            if response.status_code != 200:
+                raise ValueError('User Name or Organization Name is invalid.')
+
+            model_repository_name = ''
+
+            for item in response.json().get("items", []):
+                if item.get("status", {}).get("url") == credentials['huggingfacehub_endpoint_url']:
+                    model_repository_name = item.get("model", {}).get("repository")
+                    break
+            
+            if model_repository_name != model_name:
+                raise ValueError(f'Model Name {model_name} is invalid. Please check it on the inference endpoints console.')
+
+        except Exception as e:
+            raise ValueError(str(e))
+        
+
    @classmethod
    def encrypt_model_credentials(cls, tenant_id: str, model_name: str, model_type: ModelType,
                                  credentials: dict) -> dict:
--- a/api/core/third_party/langchain/embeddings/huggingface_hub_embedding.py
+++ b/api/core/third_party/langchain/embeddings/huggingface_hub_embedding.py
@@ -0,0 +1,74 @@
+from typing import Any, Dict, List, Optional
+import json
+import numpy as np
+
+from pydantic import BaseModel, Extra, root_validator
+
+from langchain.embeddings.base import Embeddings
+from langchain.utils import get_from_dict_or_env
+from huggingface_hub import InferenceClient
+
+HOSTED_INFERENCE_API = 'hosted_inference_api'
+INFERENCE_ENDPOINTS = 'inference_endpoints'
+
+
+class HuggingfaceHubEmbeddings(BaseModel, Embeddings):
+    client: Any
+    model: str
+
+    huggingface_namespace: Optional[str] = None
+    task_type: Optional[str] = None
+    huggingfacehub_api_type: Optional[str] = None
+    huggingfacehub_api_token: Optional[str] = None
+    huggingfacehub_endpoint_url: Optional[str] = None
+
+    class Config:
+        extra = Extra.forbid
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        values['huggingfacehub_api_token'] = get_from_dict_or_env(
+            values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN"
+        )
+
+        values['client'] = InferenceClient(token=values['huggingfacehub_api_token'])
+
+        return values
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        model = ''
+
+        if self.huggingfacehub_api_type == HOSTED_INFERENCE_API:
+            model = self.model
+        else:
+            model = self.huggingfacehub_endpoint_url
+
+        output = self.client.post(
+            json={
+                "inputs": texts,
+                "options": {
+                    "wait_for_model": False,
+                    "use_cache": False
+                }
+            }, model=model)
+        
+        embeddings =  json.loads(output.decode())
+        return self.mean_pooling(embeddings)
+
+    def embed_query(self, text: str) -> List[float]:
+        return self.embed_documents([text])[0]
+    
+    # https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
+    # Returned values are a list of floats, or a list of list of floats 
+    # (depending on if you sent a string or a list of string, 
+    # and if the automatic reduction, usually mean_pooling for instance was applied for you or not. 
+    # This should be explained on the model's README.)
+    def mean_pooling(self, embeddings: List) -> List[float]:
+        # If automatic reduction by giving model, no need to mean_pooling.
+        # For example one: List[List[float]]
+        if not isinstance(embeddings[0][0], list):
+            return embeddings
+
+        # For example two: List[List[List[float]]], need to mean_pooling.
+        sentence_embeddings = [np.mean(embedding[0], axis=0).tolist() for embedding in embeddings]
+        return sentence_embeddings
--- a/api/core/third_party/langchain/llms/huggingface_hub_llm.py
+++ b/api/core/third_party/langchain/llms/huggingface_hub_llm.py
@@ -16,7 +16,7 @@ class HuggingFaceHubLLM(HuggingFaceHub):
    environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass
    it as a named parameter to the constructor.

-    Only supports `text-generation`, `text2text-generation` and `summarization` for now.
+    Only supports `text-generation`, `text2text-generation` for now.

    Example:
        .. code-block:: python