Model Runtime (#1858)

Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: Garfield Dai <dai.hai@foxmail.com> Co-authored-by: chenhe <guchenhe@gmail.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn>
2024-01-02 23:42:00 +08:00
parent e91dd28a76
commit d069c668f8
807 changed files with 171310 additions and 23806 deletions
--- a/api/tests/integration_tests/model_runtime/__mock/anthropic.py
+++ b/api/tests/integration_tests/model_runtime/__mock/anthropic.py
@@ -0,0 +1,68 @@
+import anthropic
+from anthropic import Anthropic
+from anthropic.resources.completions import Completions
+from anthropic.types import completion_create_params, Completion
+from anthropic._types import NOT_GIVEN, NotGiven, Headers, Query, Body
+
+from _pytest.monkeypatch import MonkeyPatch
+
+from typing import List, Union, Literal, Any, Generator
+from time import sleep
+
+import pytest
+import os
+
+MOCK = os.getenv('MOCK_SWITCH', 'false') == 'true'
+
+class MockAnthropicClass(object):
+    @staticmethod
+    def mocked_anthropic_chat_create_sync(model: str) -> Completion:
+        return Completion(
+            completion='hello, I\'m a chatbot from anthropic',
+            model=model,
+            stop_reason='stop_sequence'
+        )
+
+    @staticmethod
+    def mocked_anthropic_chat_create_stream(model: str) -> Generator[Completion, None, None]:
+        full_response_text = "hello, I'm a chatbot from anthropic"
+
+        for i in range(0, len(full_response_text) + 1):
+            sleep(0.1)
+            if i == len(full_response_text):
+                yield Completion(
+                    completion='',
+                    model=model,
+                    stop_reason='stop_sequence'
+                )
+            else:
+                yield Completion(
+                    completion=full_response_text[i],
+                    model=model,
+                    stop_reason=''
+                )
+
+    def mocked_anthropic(self: Completions, *,
+        max_tokens_to_sample: int,
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
+        prompt: str,
+        stream: Literal[True],
+        **kwargs: Any
+    ) -> Union[Completion, Generator[Completion, None, None]]:
+        if len(self._client.api_key) < 18:
+            raise anthropic.AuthenticationError('Invalid API key')
+
+        if stream:
+            return MockAnthropicClass.mocked_anthropic_chat_create_stream(model=model)
+        else:
+            return MockAnthropicClass.mocked_anthropic_chat_create_sync(model=model)
+
+@pytest.fixture
+def setup_anthropic_mock(request, monkeypatch: MonkeyPatch):
+    if MOCK:
+        monkeypatch.setattr(Completions, 'create', MockAnthropicClass.mocked_anthropic)
+
+    yield
+
+    if MOCK:
+        monkeypatch.undo()
--- a/api/tests/integration_tests/model_runtime/__mock/google.py
+++ b/api/tests/integration_tests/model_runtime/__mock/google.py
@@ -0,0 +1,127 @@
+from google.generativeai import GenerativeModel
+from google.generativeai.types import GenerateContentResponse
+from google.generativeai.types.generation_types import BaseGenerateContentResponse
+import google.generativeai.types.generation_types as generation_config_types
+import google.generativeai.types.content_types as content_types
+import google.generativeai.types.safety_types as safety_types
+from google.generativeai.client import _ClientManager, configure
+
+from google.ai import generativelanguage as glm
+
+from typing import Generator, List
+from _pytest.monkeypatch import MonkeyPatch
+
+import pytest
+
+current_api_key = ''
+
+class MockGoogleResponseClass(object):
+    _done = False
+
+    def __iter__(self):
+        full_response_text = 'it\'s google!'
+
+        for i in range(0, len(full_response_text) + 1, 1):
+            if i == len(full_response_text):
+                self._done = True
+                yield GenerateContentResponse(
+                    done=True,
+                    iterator=None,
+                    result=glm.GenerateContentResponse({
+
+                    }),
+                    chunks=[]
+                )                
+            else:
+                yield GenerateContentResponse(
+                    done=False,
+                    iterator=None,
+                    result=glm.GenerateContentResponse({
+
+                    }),
+                    chunks=[]
+                )
+
+class MockGoogleResponseCandidateClass(object):
+    finish_reason = 'stop'
+
+class MockGoogleClass(object):
+    @staticmethod
+    def generate_content_sync() -> GenerateContentResponse:
+        return GenerateContentResponse(
+            done=True,
+            iterator=None,
+            result=glm.GenerateContentResponse({
+
+            }),
+            chunks=[]
+        )
+
+    @staticmethod
+    def generate_content_stream() -> Generator[GenerateContentResponse, None, None]:
+        return MockGoogleResponseClass()
+
+    def generate_content(self: GenerativeModel,
+        contents: content_types.ContentsType,
+        *,
+        generation_config: generation_config_types.GenerationConfigType | None = None,
+        safety_settings: safety_types.SafetySettingOptions | None = None,
+        stream: bool = False,
+        **kwargs,
+    ) -> GenerateContentResponse:
+        global current_api_key
+
+        if len(current_api_key) < 16:
+            raise Exception('Invalid API key')
+
+        if stream:
+            return MockGoogleClass.generate_content_stream()
+        
+        return MockGoogleClass.generate_content_sync()
+    
+    @property
+    def generative_response_text(self) -> str:
+        return 'it\'s google!'
+    
+    @property
+    def generative_response_candidates(self) -> List[MockGoogleResponseCandidateClass]:
+        return [MockGoogleResponseCandidateClass()]
+    
+    def make_client(self: _ClientManager, name: str):
+        global current_api_key
+
+        if name.endswith("_async"):
+            name = name.split("_")[0]
+            cls = getattr(glm, name.title() + "ServiceAsyncClient")
+        else:
+            cls = getattr(glm, name.title() + "ServiceClient")
+
+        # Attempt to configure using defaults.
+        if not self.client_config:
+            configure()
+
+        client_options = self.client_config.get("client_options", None)
+        if client_options:
+            current_api_key = client_options.api_key
+
+        def nop(self, *args, **kwargs):
+            pass
+
+        original_init = cls.__init__
+        cls.__init__ = nop
+        client: glm.GenerativeServiceClient = cls(**self.client_config)
+        cls.__init__ = original_init
+
+        if not self.default_metadata:
+            return client
+    
+@pytest.fixture
+def setup_google_mock(request, monkeypatch: MonkeyPatch):
+    monkeypatch.setattr(BaseGenerateContentResponse, "text", MockGoogleClass.generative_response_text)
+    monkeypatch.setattr(BaseGenerateContentResponse, "candidates", MockGoogleClass.generative_response_candidates)
+    monkeypatch.setattr(GenerativeModel, "generate_content", MockGoogleClass.generate_content)
+    monkeypatch.setattr(_ClientManager, "make_client", MockGoogleClass.make_client)
+
+    yield
+
+    monkeypatch.undo()
--- a/api/tests/integration_tests/model_runtime/__mock/huggingface.py
+++ b/api/tests/integration_tests/model_runtime/__mock/huggingface.py
@@ -0,0 +1,21 @@
+from tests.integration_tests.model_runtime.__mock.huggingface_chat import MockHuggingfaceChatClass
+
+from huggingface_hub import InferenceClient
+
+from _pytest.monkeypatch import MonkeyPatch
+from typing import List, Dict, Any
+
+import pytest
+import os
+
+MOCK = os.getenv('MOCK_SWITCH', 'false').lower() == 'true'
+
+@pytest.fixture
+def setup_huggingface_mock(request, monkeypatch: MonkeyPatch):
+    if MOCK:
+        monkeypatch.setattr(InferenceClient, "text_generation", MockHuggingfaceChatClass.text_generation)
+    
+    yield
+
+    if MOCK:
+        monkeypatch.undo()
--- a/api/tests/integration_tests/model_runtime/__mock/huggingface_chat.py
+++ b/api/tests/integration_tests/model_runtime/__mock/huggingface_chat.py
@@ -0,0 +1,54 @@
+from huggingface_hub import InferenceClient
+from huggingface_hub.inference._text_generation import TextGenerationResponse, TextGenerationStreamResponse, Details, StreamDetails, Token
+from huggingface_hub.utils import BadRequestError
+
+from typing import Literal, Optional, List, Generator, Union, Any
+from _pytest.monkeypatch import MonkeyPatch
+
+import re
+
+class MockHuggingfaceChatClass(object):
+    @staticmethod
+    def generate_create_sync(model: str) -> TextGenerationResponse:
+        response = TextGenerationResponse(
+            generated_text="You can call me Miku Miku o~e~o~",
+            details=Details(
+                finish_reason="length",
+                generated_tokens=6,
+                tokens=[
+                    Token(id=0, text="You", logprob=0.0, special=False) for i in range(0, 6)
+                ]
+            )
+        )
+
+        return response
+
+    @staticmethod
+    def generate_create_stream(model: str) -> Generator[TextGenerationStreamResponse, None, None]:
+        full_text = "You can call me Miku Miku o~e~o~"
+
+        for i in range(0, len(full_text)):
+            response = TextGenerationStreamResponse(
+                token = Token(id=i, text=full_text[i], logprob=0.0, special=False),
+            )
+            response.generated_text = full_text[i]
+            response.details = StreamDetails(finish_reason='stop_sequence', generated_tokens=1)
+
+            yield response
+
+    def text_generation(self: InferenceClient, prompt: str, *,
+        stream: Literal[False] = ...,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> Union[TextGenerationResponse, Generator[TextGenerationStreamResponse, None, None]]:
+        # check if key is valid
+        if not re.match(r'Bearer\shf\-[a-zA-Z0-9]{16,}', self.headers['authorization']):
+            raise BadRequestError('Invalid API key')
+        
+        if model is None:
+            raise BadRequestError('Invalid model')
+        
+        if stream:
+            return MockHuggingfaceChatClass.generate_create_stream(model)
+        return MockHuggingfaceChatClass.generate_create_sync(model)
+
--- a/api/tests/integration_tests/model_runtime/__mock/openai.py
+++ b/api/tests/integration_tests/model_runtime/__mock/openai.py
@@ -0,0 +1,63 @@
+from tests.integration_tests.model_runtime.__mock.openai_completion import MockCompletionsClass
+from tests.integration_tests.model_runtime.__mock.openai_chat import MockChatClass
+from tests.integration_tests.model_runtime.__mock.openai_remote import MockModelClass
+from tests.integration_tests.model_runtime.__mock.openai_moderation import MockModerationClass
+from tests.integration_tests.model_runtime.__mock.openai_speech2text import MockSpeech2TextClass
+from tests.integration_tests.model_runtime.__mock.openai_embeddings import MockEmbeddingsClass
+from openai.resources.completions import Completions
+from openai.resources.chat import Completions as ChatCompletions
+from openai.resources.models import Models
+from openai.resources.moderations import Moderations
+from openai.resources.audio.transcriptions import Transcriptions
+from openai.resources.embeddings import Embeddings
+
+# import monkeypatch
+from _pytest.monkeypatch import MonkeyPatch
+from typing import Literal, Callable, List
+
+import os
+import pytest
+
+def mock_openai(monkeypatch: MonkeyPatch, methods: List[Literal["completion", "chat", "remote", "moderation", "speech2text", "text_embedding"]]) -> Callable[[], None]:
+    """
+        mock openai module
+
+        :param monkeypatch: pytest monkeypatch fixture
+        :return: unpatch function
+    """
+    def unpatch() -> None:
+        monkeypatch.undo()
+
+    if "completion" in methods:
+        monkeypatch.setattr(Completions, "create", MockCompletionsClass.completion_create)
+
+    if "chat" in methods:
+        monkeypatch.setattr(ChatCompletions, "create", MockChatClass.chat_create)
+
+    if "remote" in methods:
+        monkeypatch.setattr(Models, "list", MockModelClass.list)
+
+    if "moderation" in methods:
+        monkeypatch.setattr(Moderations, "create", MockModerationClass.moderation_create)
+
+    if "speech2text" in methods:
+        monkeypatch.setattr(Transcriptions, "create", MockSpeech2TextClass.speech2text_create)
+
+    if "text_embedding" in methods:
+        monkeypatch.setattr(Embeddings, "create", MockEmbeddingsClass.create_embeddings)
+
+    return unpatch
+
+
+MOCK = os.getenv('MOCK_SWITCH', 'false').lower() == 'true'
+
+@pytest.fixture
+def setup_openai_mock(request, monkeypatch):
+    methods = request.param if hasattr(request, 'param') else []
+    if MOCK:
+        unpatch = mock_openai(monkeypatch, methods=methods)
+    
+    yield
+
+    if MOCK:
+        unpatch()
--- a/api/tests/integration_tests/model_runtime/__mock/openai_chat.py
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_chat.py
@@ -0,0 +1,235 @@
+from openai import OpenAI
+from openai.types import Completion as CompletionMessage
+from openai._types import NotGiven, NOT_GIVEN
+from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageParam, \
+    ChatCompletionToolChoiceOptionParam, ChatCompletionToolParam, ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall, ChoiceDeltaFunctionCall,\
+    Choice, ChoiceDelta, ChoiceDeltaToolCallFunction
+from openai.types.chat.chat_completion import Choice as _ChatCompletionChoice, ChatCompletion as _ChatCompletion
+from openai.types.chat.chat_completion_message import FunctionCall, ChatCompletionMessage
+from openai.types.chat.chat_completion_message_tool_call import Function
+from openai.types.completion_usage import CompletionUsage
+from openai.resources.chat.completions import Completions
+from openai import AzureOpenAI
+
+import openai.types.chat.completion_create_params as completion_create_params
+
+# import monkeypatch
+from typing import List, Any, Generator, Union, Optional, Literal
+from time import time, sleep
+from json import dumps, loads
+
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+
+import re
+
+class MockChatClass(object):
+    @staticmethod
+    def generate_function_call(
+        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+    ) -> Optional[FunctionCall]:
+        if not functions or len(functions) == 0:
+            return None
+        function: completion_create_params.Function = functions[0]
+        function_name = function['name']
+        function_description = function['description']
+        function_parameters = function['parameters']
+        function_parameters_type = function_parameters['type']
+        if function_parameters_type != 'object':
+            return None
+        function_parameters_properties = function_parameters['properties']
+        function_parameters_required = function_parameters['required']
+        parameters = {}
+        for parameter_name, parameter in function_parameters_properties.items():
+            if parameter_name not in function_parameters_required:
+                continue
+            parameter_type = parameter['type']
+            if parameter_type == 'string':
+                if 'enum' in parameter:
+                    if len(parameter['enum']) == 0:
+                        continue
+                    parameters[parameter_name] = parameter['enum'][0]
+                else:
+                    parameters[parameter_name] = 'kawaii'
+            elif parameter_type == 'integer':
+                parameters[parameter_name] = 114514
+            elif parameter_type == 'number':
+                parameters[parameter_name] = 1919810.0
+            elif parameter_type == 'boolean':
+                parameters[parameter_name] = True
+
+        return FunctionCall(name=function_name, arguments=dumps(parameters))
+        
+    @staticmethod
+    def generate_tool_calls(
+        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+    ) -> Optional[List[ChatCompletionMessageToolCall]]:
+        list_tool_calls = []
+        if not tools or len(tools) == 0:
+            return None
+        tool: ChatCompletionToolParam = tools[0]
+
+        if tools['type'] != 'function':
+            return None
+        
+        function = tool['function']
+
+        function_call = MockChatClass.generate_function_call(functions=[function])
+        if function_call is None:
+            return None
+        
+        list_tool_calls.append(ChatCompletionMessageToolCall(
+            id='sakurajima-mai',
+            function=Function(
+                name=function_call.name,
+                arguments=function_call.arguments,
+            ),
+            type='function'
+        ))
+
+        return list_tool_calls
+    
+    @staticmethod
+    def mocked_openai_chat_create_sync(
+        model: str,
+        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+    ) -> CompletionMessage:
+        tool_calls = []
+        function_call = MockChatClass.generate_function_call(functions=functions)
+        if not function_call:
+            tool_calls = MockChatClass.generate_tool_calls(tools=tools)
+
+        sleep(1)
+        return _ChatCompletion(
+            id='cmpl-3QJQa5jXJ5Z5X',
+            choices=[
+                _ChatCompletionChoice(
+                    finish_reason='content_filter',
+                    index=0,
+                    message=ChatCompletionMessage(
+                        content='elaina',
+                        role='assistant',
+                        function_call=function_call,
+                        tool_calls=tool_calls
+                    )
+                )
+            ],
+            created=int(time()),
+            model=model,
+            object='chat.completion',
+            system_fingerprint='',
+            usage=CompletionUsage(
+                prompt_tokens=2,
+                completion_tokens=1,
+                total_tokens=3,
+            )
+        )
+    
+    @staticmethod
+    def mocked_openai_chat_create_stream(
+        model: str,
+        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+    ) -> Generator[ChatCompletionChunk, None, None]:
+        tool_calls = []
+        function_call = MockChatClass.generate_function_call(functions=functions)
+        if not function_call:
+            tool_calls = MockChatClass.generate_tool_calls(tools=tools)
+
+        full_text = "Hello, world!\n\n```python\nprint('Hello, world!')\n```"
+        for i in range(0, len(full_text) + 1):
+            sleep(0.1)
+            if i == len(full_text):
+                yield ChatCompletionChunk(
+                    id='cmpl-3QJQa5jXJ5Z5X',
+                    choices=[
+                        Choice(
+                            delta=ChoiceDelta(
+                                content='',
+                                function_call=ChoiceDeltaFunctionCall(
+                                    name=function_call.name,
+                                    arguments=function_call.arguments,
+                                ) if function_call else None,
+                                role='assistant',
+                                tool_calls=[
+                                    ChoiceDeltaToolCall(
+                                        index=0,
+                                        id='misaka-mikoto',
+                                        function=ChoiceDeltaToolCallFunction(
+                                            name=tool_calls[0].function.name,
+                                            arguments=tool_calls[0].function.arguments,
+                                        ),
+                                        type='function'
+                                    )
+                                ] if tool_calls and len(tool_calls) > 0 else None
+                            ),
+                            finish_reason='function_call',
+                            index=0,
+                        )
+                    ],
+                    created=int(time()),
+                    model=model,
+                    object='chat.completion.chunk',
+                    system_fingerprint='',
+                    usage=CompletionUsage(
+                        prompt_tokens=2,
+                        completion_tokens=17,
+                        total_tokens=19,
+                    ),
+                )
+            else:
+                yield ChatCompletionChunk(
+                    id='cmpl-3QJQa5jXJ5Z5X',
+                    choices=[
+                        Choice(
+                            delta=ChoiceDelta(
+                                content=full_text[i],
+                                role='assistant',
+                            ),
+                            finish_reason='content_filter',
+                            index=0,
+                        )
+                    ],
+                    created=int(time()),
+                    model=model,
+                    object='chat.completion.chunk',
+                    system_fingerprint='',
+                )
+
+    def chat_create(self: Completions, *,
+        messages: List[ChatCompletionMessageParam],
+        model: Union[str,Literal[
+            "gpt-4-1106-preview", "gpt-4-vision-preview", "gpt-4", "gpt-4-0314", "gpt-4-0613",
+            "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613",
+            "gpt-3.5-turbo-1106", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0301",
+            "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613"],
+        ],
+        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        **kwargs: Any,
+    ):
+        openai_models = [
+            "gpt-4-1106-preview", "gpt-4-vision-preview", "gpt-4", "gpt-4-0314", "gpt-4-0613",
+            "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613",
+            "gpt-3.5-turbo-1106", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0301",
+            "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",
+        ]
+        azure_openai_models = [
+            "gpt35", "gpt-4v", "gpt-35-turbo"
+        ]
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._client.base_url.__str__()):
+            raise InvokeAuthorizationError('Invalid base url')
+        if model in openai_models + azure_openai_models:
+            if not re.match(r'sk-[a-zA-Z0-9]{24,}$', self._client.api_key) and type(self._client) == OpenAI:
+                # sometime, provider use OpenAI compatible API will not have api key or have different api key format
+                # so we only check if model is in openai_models
+                raise InvokeAuthorizationError('Invalid api key')
+            if len(self._client.api_key) < 18 and type(self._client) == AzureOpenAI:
+                raise InvokeAuthorizationError('Invalid api key')
+        if stream:
+            return MockChatClass.mocked_openai_chat_create_stream(model=model, functions=functions, tools=tools)
+        
+        return MockChatClass.mocked_openai_chat_create_sync(model=model, functions=functions, tools=tools)
--- a/api/tests/integration_tests/model_runtime/__mock/openai_completion.py
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_completion.py
@@ -0,0 +1,121 @@
+from openai import BadRequestError, OpenAI, AzureOpenAI
+from openai.types import Completion as CompletionMessage
+from openai._types import NotGiven, NOT_GIVEN
+from openai.types.completion import CompletionChoice
+from openai.types.completion_usage import CompletionUsage
+from openai.resources.completions import Completions
+
+# import monkeypatch
+from typing import List, Any, Generator, Union, Optional, Literal
+from time import time, sleep
+
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+
+import re
+
+class MockCompletionsClass(object):
+    @staticmethod
+    def mocked_openai_completion_create_sync(
+        model: str
+    ) -> CompletionMessage:
+        sleep(1)
+        return CompletionMessage(
+            id="cmpl-3QJQa5jXJ5Z5X",
+            object="text_completion",
+            created=int(time()),
+            model=model,
+            system_fingerprint="",
+            choices=[
+                CompletionChoice(
+                    text="mock",
+                    index=0,
+                    logprobs=None,
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=2,
+                completion_tokens=1,
+                total_tokens=3,
+            )
+        )
+    
+    @staticmethod
+    def mocked_openai_completion_create_stream(
+        model: str
+    ) -> Generator[CompletionMessage, None, None]:
+        full_text = "Hello, world!\n\n```python\nprint('Hello, world!')\n```"
+        for i in range(0, len(full_text) + 1):
+            sleep(0.1)
+            if i == len(full_text):
+                yield CompletionMessage(
+                    id="cmpl-3QJQa5jXJ5Z5X",
+                    object="text_completion",
+                    created=int(time()),
+                    model=model,
+                    system_fingerprint="",
+                    choices=[
+                        CompletionChoice(
+                            text="",
+                            index=0,
+                            logprobs=None,
+                            finish_reason="stop",
+                        )
+                    ],
+                    usage=CompletionUsage(
+                        prompt_tokens=2,
+                        completion_tokens=17,
+                        total_tokens=19,
+                    ),
+                )
+            else:
+                yield CompletionMessage(
+                    id="cmpl-3QJQa5jXJ5Z5X",
+                    object="text_completion",
+                    created=int(time()),
+                    model=model,
+                    system_fingerprint="",
+                    choices=[
+                        CompletionChoice(
+                            text=full_text[i],
+                            index=0,
+                            logprobs=None,
+                            finish_reason="content_filter"
+                        )
+                    ],
+                )
+
+    def completion_create(self: Completions, *, model: Union[
+            str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo-instruct",
+                "text-davinci-003", "text-davinci-002", "text-davinci-001",
+                "code-davinci-002", "text-curie-001", "text-babbage-001",
+                "text-ada-001"],
+        ],
+        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        **kwargs: Any
+    ):
+        openai_models = [
+            "babbage-002", "davinci-002", "gpt-3.5-turbo-instruct", "text-davinci-003", "text-davinci-002", "text-davinci-001",
+            "code-davinci-002", "text-curie-001", "text-babbage-001", "text-ada-001",
+        ]
+        azure_openai_models = [
+            "gpt-35-turbo-instruct"
+        ]
+
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._client.base_url.__str__()):
+            raise InvokeAuthorizationError('Invalid base url')
+        if model in openai_models + azure_openai_models:
+            if not re.match(r'sk-[a-zA-Z0-9]{24,}$', self._client.api_key) and type(self._client) == OpenAI:
+                # sometime, provider use OpenAI compatible API will not have api key or have different api key format
+                # so we only check if model is in openai_models
+                raise InvokeAuthorizationError('Invalid api key')
+            if len(self._client.api_key) < 18 and type(self._client) == AzureOpenAI:
+                raise InvokeAuthorizationError('Invalid api key')
+            
+        if not prompt:
+            raise BadRequestError('Invalid prompt')
+        if stream:
+            return MockCompletionsClass.mocked_openai_completion_create_stream(model=model)
+        
+        return MockCompletionsClass.mocked_openai_completion_create_sync(model=model)
--- a/api/tests/integration_tests/model_runtime/__mock/openai_embeddings.py
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_embeddings.py
--- a/api/tests/integration_tests/model_runtime/__mock/openai_moderation.py
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_moderation.py
@@ -0,0 +1,67 @@
+from openai.resources.moderations import Moderations
+from openai.types import ModerationCreateResponse
+from openai.types.moderation import Moderation, Categories, CategoryScores
+from openai._types import NotGiven, NOT_GIVEN
+
+from typing import Union, List, Literal, Any
+
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+
+import re
+
+class MockModerationClass(object):
+    def moderation_create(self: Moderations,*,
+        input: Union[str, List[str]],
+        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
+        **kwargs: Any
+    ) -> ModerationCreateResponse:
+        if isinstance(input, str):
+            input = [input]
+
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._client.base_url.__str__()):
+            raise InvokeAuthorizationError('Invalid base url')
+        
+        if len(self._client.api_key) < 18:
+            raise InvokeAuthorizationError('Invalid API key')
+
+        for text in input:
+            result = []
+            if 'kill' in text:
+                moderation_categories = {
+                    'harassment': False, 'harassment/threatening': False, 'hate': False, 'hate/threatening': False,
+                    'self-harm': False, 'self-harm/instructions': False, 'self-harm/intent': False, 'sexual': False,
+                    'sexual/minors': False, 'violence': False, 'violence/graphic': False
+                }
+                moderation_categories_scores = {
+                    'harassment': 1.0, 'harassment/threatening': 1.0, 'hate': 1.0, 'hate/threatening': 1.0,
+                    'self-harm': 1.0, 'self-harm/instructions': 1.0, 'self-harm/intent': 1.0, 'sexual': 1.0,
+                    'sexual/minors': 1.0, 'violence': 1.0, 'violence/graphic': 1.0
+                }
+
+                result.append(Moderation(
+                    flagged=True,
+                    categories=Categories(**moderation_categories),
+                    category_scores=CategoryScores(**moderation_categories_scores)
+                ))
+            else:
+                moderation_categories = {
+                    'harassment': False, 'harassment/threatening': False, 'hate': False, 'hate/threatening': False,
+                    'self-harm': False, 'self-harm/instructions': False, 'self-harm/intent': False, 'sexual': False,
+                    'sexual/minors': False, 'violence': False, 'violence/graphic': False
+                }
+                moderation_categories_scores = {
+                    'harassment': 0.0, 'harassment/threatening': 0.0, 'hate': 0.0, 'hate/threatening': 0.0,
+                    'self-harm': 0.0, 'self-harm/instructions': 0.0, 'self-harm/intent': 0.0, 'sexual': 0.0,
+                    'sexual/minors': 0.0, 'violence': 0.0, 'violence/graphic': 0.0
+                }
+                result.append(Moderation(
+                    flagged=False,
+                    categories=Categories(**moderation_categories),
+                    category_scores=CategoryScores(**moderation_categories_scores)
+                ))
+
+        return ModerationCreateResponse(
+            id='shiroii kuloko',
+            model=model,
+            results=result
+        )
--- a/api/tests/integration_tests/model_runtime/__mock/openai_remote.py
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_remote.py
@@ -0,0 +1,22 @@
+from openai.resources.models import Models
+from openai.types.model import Model
+
+from typing import List
+from time import time
+
+class MockModelClass(object):
+    """
+        mock class for openai.models.Models
+    """
+    def list(
+        self,
+        **kwargs,
+    ) -> List[Model]:
+        return [
+            Model(
+                id='ft:gpt-3.5-turbo-0613:personal::8GYJLPDQ',
+                created=int(time()),
+                object='model',
+                owned_by='organization:org-123',
+            )
+        ]
--- a/api/tests/integration_tests/model_runtime/__mock/openai_speech2text.py
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_speech2text.py
@@ -0,0 +1,30 @@
+from openai.resources.audio.transcriptions import Transcriptions
+from openai._types import NotGiven, NOT_GIVEN, FileTypes
+from openai.types.audio.transcription import Transcription
+
+from typing import Union, List, Literal, Any
+
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+
+import re
+
+class MockSpeech2TextClass(object):
+    def speech2text_create(self: Transcriptions,
+        *,
+        file: FileTypes,
+        model: Union[str, Literal["whisper-1"]],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        **kwargs: Any
+    ) -> Transcription:
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._client.base_url.__str__()):
+            raise InvokeAuthorizationError('Invalid base url')
+        
+        if len(self._client.api_key) < 18:
+            raise InvokeAuthorizationError('Invalid API key')
+        
+        return Transcription(
+            text='1, 2, 3, 4, 5, 6, 7, 8, 9, 10'
+        )
--- a/api/tests/integration_tests/model_runtime/__mock/xinference.py
+++ b/api/tests/integration_tests/model_runtime/__mock/xinference.py
@@ -0,0 +1,142 @@
+from xinference_client.client.restful.restful_client import Client, \
+    RESTfulChatModelHandle, RESTfulGenerateModelHandle, RESTfulChatglmCppChatModelHandle, \
+    RESTfulEmbeddingModelHandle, RESTfulRerankModelHandle
+from xinference_client.types import Embedding, EmbeddingData, EmbeddingUsage
+
+from requests.sessions import Session
+from requests import Response
+from requests.exceptions import ConnectionError
+from typing import Union, List
+
+from _pytest.monkeypatch import MonkeyPatch
+import pytest
+import os
+import re
+
+class MockXinferenceClass(object):
+    def get_chat_model(self: Client, model_uid: str) -> Union[RESTfulChatglmCppChatModelHandle, RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
+        if not re.match(r'https?:\/\/[^\s\/$.?#].[^\s]*$', self.base_url):
+            raise RuntimeError('404 Not Found')
+        
+        if 'generate' == model_uid:
+            return RESTfulGenerateModelHandle(model_uid, base_url=self.base_url)
+        if 'chat' == model_uid:
+            return RESTfulChatModelHandle(model_uid, base_url=self.base_url)
+        if 'embedding' == model_uid:
+            return RESTfulEmbeddingModelHandle(model_uid, base_url=self.base_url)
+        if 'rerank' == model_uid:
+            return RESTfulRerankModelHandle(model_uid, base_url=self.base_url)
+        raise RuntimeError('404 Not Found')
+        
+    def get(self: Session, url: str, **kwargs):
+        if '/v1/models/' in url:
+            response = Response()
+            
+            # get model uid
+            model_uid = url.split('/')[-1]
+            if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', model_uid) and \
+                model_uid not in ['generate', 'chat', 'embedding', 'rerank']:
+                response.status_code = 404
+                raise ConnectionError('404 Not Found')
+
+            # check if url is valid
+            if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', url):
+                response.status_code = 404
+                raise ConnectionError('404 Not Found')
+
+            response.status_code = 200
+            response._content = b'''{
+    "model_type": "LLM",
+    "address": "127.0.0.1:43877",
+    "accelerators": [
+        "0",
+        "1"
+    ],
+    "model_name": "chatglm3-6b",
+    "model_lang": [
+        "en"
+    ],
+    "model_ability": [
+        "generate",
+        "chat"
+    ],
+    "model_description": "latest chatglm3",
+    "model_format": "pytorch",
+    "model_size_in_billions": 7,
+    "quantization": "none",
+    "model_hub": "huggingface",
+    "revision": null,
+    "context_length": 2048,
+    "replica": 1
+}'''
+            return response
+        
+    def rerank(self: RESTfulRerankModelHandle, documents: List[str], query: str, top_n: int) -> dict:
+        # check if self._model_uid is a valid uuid
+        if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', self._model_uid) and \
+            self._model_uid != 'rerank':
+            raise RuntimeError('404 Not Found')
+        
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._base_url):
+            raise RuntimeError('404 Not Found')
+
+        if top_n is None:
+            top_n = 1
+
+        return {
+            'results': [
+                {
+                    'index': i,
+                    'document': doc,
+                    'relevance_score': 0.9
+                }
+                for i, doc in enumerate(documents[:top_n])
+            ]
+        }
+        
+    def create_embedding(
+        self: RESTfulGenerateModelHandle,
+        input: Union[str, List[str]],
+        **kwargs
+    ) -> dict:
+        # check if self._model_uid is a valid uuid
+        if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', self._model_uid) and \
+            self._model_uid != 'embedding':
+            raise RuntimeError('404 Not Found')
+
+        if isinstance(input, str):
+            input = [input]
+        ipt_len = len(input)
+
+        embedding = Embedding(
+            object="list",
+            model=self._model_uid,
+            data=[
+                EmbeddingData(
+                    index=i,
+                    object="embedding",
+                    embedding=[1919.810 for _ in range(768)]
+                )
+                for i in range(ipt_len)
+            ],
+            usage=EmbeddingUsage(
+                prompt_tokens=ipt_len,
+                total_tokens=ipt_len
+            )
+        )
+
+        return embedding
+
+MOCK = os.getenv('MOCK_SWITCH', 'false').lower() == 'true'
+
+@pytest.fixture
+def setup_xinference_mock(request, monkeypatch: MonkeyPatch):
+    if MOCK:
+        monkeypatch.setattr(Client, 'get_model', MockXinferenceClass.get_chat_model)
+        monkeypatch.setattr(Session, 'get', MockXinferenceClass.get)
+        monkeypatch.setattr(RESTfulEmbeddingModelHandle, 'create_embedding', MockXinferenceClass.create_embedding)
+        monkeypatch.setattr(RESTfulRerankModelHandle, 'rerank', MockXinferenceClass.rerank)
+    yield
+
+    if MOCK:
+        monkeypatch.undo()