feat: add jina embedding (#1647)

Co-authored-by: takatost <takatost@gmail.com>
This commit is contained in:
zxhlyh
2023-11-29 14:58:11 +08:00
committed by GitHub
parent 454577c6b1
commit 451af66be0
22 changed files with 662 additions and 4 deletions

View File

@@ -53,4 +53,7 @@ OPENLLM_SERVER_URL=
LOCALAI_SERVER_URL=
# Cohere Credentials
COHERE_API_KEY=
COHERE_API_KEY=
# Jina Credentials
JINA_API_KEY=

View File

@@ -0,0 +1,42 @@
import json
import os
from unittest.mock import patch
from core.model_providers.models.embedding.jina_embedding import JinaEmbedding
from core.model_providers.providers.jina_provider import JinaProvider
from models.provider import Provider, ProviderType
def get_mock_provider(valid_api_key):
return Provider(
id='provider_id',
tenant_id='tenant_id',
provider_name='jina',
provider_type=ProviderType.CUSTOM.value,
encrypted_config=json.dumps({
'api_key': valid_api_key
}),
is_valid=True,
)
def get_mock_embedding_model():
model_name = 'jina-embeddings-v2-small-en'
valid_api_key = os.environ['JINA_API_KEY']
provider = JinaProvider(provider=get_mock_provider(valid_api_key))
return JinaEmbedding(
model_provider=provider,
name=model_name
)
def decrypt_side_effect(tenant_id, encrypted_api_key):
return encrypted_api_key
@patch('core.helper.encrypter.decrypt_token', side_effect=decrypt_side_effect)
def test_embedding(mock_decrypt):
embedding_model = get_mock_embedding_model()
rst = embedding_model.client.embed_query('test')
assert isinstance(rst, list)
assert len(rst) == 512

View File

@@ -0,0 +1,88 @@
import pytest
from unittest.mock import patch
import json
from core.model_providers.providers.base import CredentialsValidateFailedError
from core.model_providers.providers.jina_provider import JinaProvider
from models.provider import ProviderType, Provider
PROVIDER_NAME = 'jina'
MODEL_PROVIDER_CLASS = JinaProvider
VALIDATE_CREDENTIAL = {
'api_key': 'valid_key'
}
def encrypt_side_effect(tenant_id, encrypt_key):
return f'encrypted_{encrypt_key}'
def decrypt_side_effect(tenant_id, encrypted_key):
return encrypted_key.replace('encrypted_', '')
def test_is_provider_credentials_valid_or_raise_valid(mocker):
mocker.patch('core.third_party.langchain.embeddings.jina_embedding.JinaEmbeddings.embed_query',
return_value=[1, 2])
MODEL_PROVIDER_CLASS.is_provider_credentials_valid_or_raise(VALIDATE_CREDENTIAL)
def test_is_provider_credentials_valid_or_raise_invalid():
# raise CredentialsValidateFailedError if api_key is not in credentials
with pytest.raises(CredentialsValidateFailedError):
MODEL_PROVIDER_CLASS.is_provider_credentials_valid_or_raise({})
credential = VALIDATE_CREDENTIAL.copy()
credential['api_key'] = 'invalid_key'
# raise CredentialsValidateFailedError if api_key is invalid
with pytest.raises(CredentialsValidateFailedError):
MODEL_PROVIDER_CLASS.is_provider_credentials_valid_or_raise(credential)
@patch('core.helper.encrypter.encrypt_token', side_effect=encrypt_side_effect)
def test_encrypt_credentials(mock_encrypt):
api_key = 'valid_key'
result = MODEL_PROVIDER_CLASS.encrypt_provider_credentials('tenant_id', VALIDATE_CREDENTIAL.copy())
mock_encrypt.assert_called_with('tenant_id', api_key)
assert result['api_key'] == f'encrypted_{api_key}'
@patch('core.helper.encrypter.decrypt_token', side_effect=decrypt_side_effect)
def test_get_credentials_custom(mock_decrypt):
encrypted_credential = VALIDATE_CREDENTIAL.copy()
encrypted_credential['api_key'] = 'encrypted_' + encrypted_credential['api_key']
provider = Provider(
id='provider_id',
tenant_id='tenant_id',
provider_name=PROVIDER_NAME,
provider_type=ProviderType.CUSTOM.value,
encrypted_config=json.dumps(encrypted_credential),
is_valid=True,
)
model_provider = MODEL_PROVIDER_CLASS(provider=provider)
result = model_provider.get_provider_credentials()
assert result['api_key'] == 'valid_key'
@patch('core.helper.encrypter.decrypt_token', side_effect=decrypt_side_effect)
def test_get_credentials_obfuscated(mock_decrypt):
encrypted_credential = VALIDATE_CREDENTIAL.copy()
encrypted_credential['api_key'] = 'encrypted_' + encrypted_credential['api_key']
provider = Provider(
id='provider_id',
tenant_id='tenant_id',
provider_name=PROVIDER_NAME,
provider_type=ProviderType.CUSTOM.value,
encrypted_config=json.dumps(encrypted_credential),
is_valid=True,
)
model_provider = MODEL_PROVIDER_CLASS(provider=provider)
result = model_provider.get_provider_credentials(obfuscated=True)
middle_token = result['api_key'][6:-2]
assert len(middle_token) == max(len(VALIDATE_CREDENTIAL['api_key']) - 8, 0)
assert all(char == '*' for char in middle_token)