feat: support LLM process document file (#10966)

Co-authored-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
非法操作
2024-11-22 19:32:44 +08:00
committed by GitHub
parent 556de444e8
commit 08ac36812b
37 changed files with 233 additions and 88 deletions

View File

@@ -3,7 +3,6 @@ from typing import Optional
from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
from core.file import file_manager
from core.file.models import FileType
from core.model_manager import ModelInstance
from core.model_runtime.entities import (
AssistantPromptMessage,
@@ -103,12 +102,11 @@ class TokenBufferMemory:
prompt_message_contents: list[PromptMessageContent] = []
prompt_message_contents.append(TextPromptMessageContent(data=message.query))
for file in file_objs:
if file.type in {FileType.IMAGE, FileType.AUDIO}:
prompt_message = file_manager.to_prompt_message_content(
file,
image_detail_config=detail,
)
prompt_message_contents.append(prompt_message)
prompt_message = file_manager.to_prompt_message_content(
file,
image_detail_config=detail,
)
prompt_message_contents.append(prompt_message)
prompt_messages.append(UserPromptMessage(content=prompt_message_contents))

View File

@@ -49,7 +49,7 @@ class PromptMessageFunction(BaseModel):
function: PromptMessageTool
class PromptMessageContentType(Enum):
class PromptMessageContentType(str, Enum):
"""
Enum class for prompt message content type.
"""

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152

View File

@@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 32767

View File

@@ -16,6 +16,7 @@ from PIL import Image
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
DocumentPromptMessageContent,
ImagePromptMessageContent,
PromptMessage,
PromptMessageContentType,
@@ -35,6 +36,21 @@ from core.model_runtime.errors.invoke import (
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
GOOGLE_AVAILABLE_MIMETYPE = [
"application/pdf",
"application/x-javascript",
"text/javascript",
"application/x-python",
"text/x-python",
"text/plain",
"text/html",
"text/css",
"text/md",
"text/csv",
"text/xml",
"text/rtf",
]
class GoogleLargeLanguageModel(LargeLanguageModel):
def _invoke(
@@ -370,6 +386,12 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
blob = {"inline_data": {"mime_type": mime_type, "data": base64_data}}
glm_content["parts"].append(blob)
elif c.type == PromptMessageContentType.DOCUMENT:
message_content = cast(DocumentPromptMessageContent, c)
if message_content.mime_type not in GOOGLE_AVAILABLE_MIMETYPE:
raise ValueError(f"Unsupported mime type {message_content.mime_type}")
blob = {"inline_data": {"mime_type": message_content.mime_type, "data": message_content.data}}
glm_content["parts"].append(blob)
return glm_content
elif isinstance(message, AssistantPromptMessage):

View File

@@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32000

View File

@@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32000

View File

@@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32768

View File

@@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 8000

View File

@@ -6,6 +6,7 @@ model_properties:
mode: chat
features:
- vision
- video
parameter_rules:
- name: temperature
use_template: temperature

View File

@@ -26,9 +26,15 @@ class NoPromptFoundError(LLMNodeError):
"""Raised when no prompt is found in the LLM configuration."""
class NotSupportedPromptTypeError(LLMNodeError):
"""Raised when the prompt type is not supported."""
class TemplateTypeNotSupportError(LLMNodeError):
def __init__(self, *, type_name: str):
super().__init__(f"Prompt type {type_name} is not supported.")
class MemoryRolePrefixRequiredError(LLMNodeError):
"""Raised when memory role prefix is required for completion model."""
class FileTypeNotSupportError(LLMNodeError):
def __init__(self, *, type_name: str):
super().__init__(f"{type_name} type is not supported by this model")

View File

@@ -65,6 +65,7 @@ from .entities import (
ModelConfig,
)
from .exc import (
FileTypeNotSupportError,
InvalidContextStructureError,
InvalidVariableTypeError,
LLMModeRequiredError,
@@ -72,7 +73,7 @@ from .exc import (
MemoryRolePrefixRequiredError,
ModelNotExistError,
NoPromptFoundError,
NotSupportedPromptTypeError,
TemplateTypeNotSupportError,
VariableNotFoundError,
)
@@ -621,9 +622,7 @@ class LLMNode(BaseNode[LLMNodeData]):
prompt_content = prompt_messages[0].content.replace("#sys.query#", user_query)
prompt_messages[0].content = prompt_content
else:
errmsg = f"Prompt type {type(prompt_template)} is not supported"
logger.warning(errmsg)
raise NotSupportedPromptTypeError(errmsg)
raise TemplateTypeNotSupportError(type_name=str(type(prompt_template)))
if vision_enabled and user_files:
file_prompts = []
@@ -671,7 +670,7 @@ class LLMNode(BaseNode[LLMNodeData]):
and ModelFeature.AUDIO not in model_config.model_schema.features
)
):
continue
raise FileTypeNotSupportError(type_name=content_item.type)
prompt_message_content.append(content_item)
if len(prompt_message_content) == 1 and prompt_message_content[0].type == PromptMessageContentType.TEXT:
prompt_message.content = prompt_message_content[0].data