feat: support LLM process document file (#10966)
Co-authored-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
@@ -3,7 +3,6 @@ from typing import Optional
|
||||
|
||||
from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
|
||||
from core.file import file_manager
|
||||
from core.file.models import FileType
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities import (
|
||||
AssistantPromptMessage,
|
||||
@@ -103,12 +102,11 @@ class TokenBufferMemory:
|
||||
prompt_message_contents: list[PromptMessageContent] = []
|
||||
prompt_message_contents.append(TextPromptMessageContent(data=message.query))
|
||||
for file in file_objs:
|
||||
if file.type in {FileType.IMAGE, FileType.AUDIO}:
|
||||
prompt_message = file_manager.to_prompt_message_content(
|
||||
file,
|
||||
image_detail_config=detail,
|
||||
)
|
||||
prompt_message_contents.append(prompt_message)
|
||||
prompt_message = file_manager.to_prompt_message_content(
|
||||
file,
|
||||
image_detail_config=detail,
|
||||
)
|
||||
prompt_message_contents.append(prompt_message)
|
||||
|
||||
prompt_messages.append(UserPromptMessage(content=prompt_message_contents))
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ class PromptMessageFunction(BaseModel):
|
||||
function: PromptMessageTool
|
||||
|
||||
|
||||
class PromptMessageContentType(Enum):
|
||||
class PromptMessageContentType(str, Enum):
|
||||
"""
|
||||
Enum class for prompt message content type.
|
||||
"""
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
|
||||
@@ -7,6 +7,7 @@ features:
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
|
||||
@@ -16,6 +16,7 @@ from PIL import Image
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
DocumentPromptMessageContent,
|
||||
ImagePromptMessageContent,
|
||||
PromptMessage,
|
||||
PromptMessageContentType,
|
||||
@@ -35,6 +36,21 @@ from core.model_runtime.errors.invoke import (
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
|
||||
|
||||
GOOGLE_AVAILABLE_MIMETYPE = [
|
||||
"application/pdf",
|
||||
"application/x-javascript",
|
||||
"text/javascript",
|
||||
"application/x-python",
|
||||
"text/x-python",
|
||||
"text/plain",
|
||||
"text/html",
|
||||
"text/css",
|
||||
"text/md",
|
||||
"text/csv",
|
||||
"text/xml",
|
||||
"text/rtf",
|
||||
]
|
||||
|
||||
|
||||
class GoogleLargeLanguageModel(LargeLanguageModel):
|
||||
def _invoke(
|
||||
@@ -370,6 +386,12 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
|
||||
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
|
||||
blob = {"inline_data": {"mime_type": mime_type, "data": base64_data}}
|
||||
glm_content["parts"].append(blob)
|
||||
elif c.type == PromptMessageContentType.DOCUMENT:
|
||||
message_content = cast(DocumentPromptMessageContent, c)
|
||||
if message_content.mime_type not in GOOGLE_AVAILABLE_MIMETYPE:
|
||||
raise ValueError(f"Unsupported mime type {message_content.mime_type}")
|
||||
blob = {"inline_data": {"mime_type": message_content.mime_type, "data": message_content.data}}
|
||||
glm_content["parts"].append(blob)
|
||||
|
||||
return glm_content
|
||||
elif isinstance(message, AssistantPromptMessage):
|
||||
|
||||
@@ -6,6 +6,7 @@ model_type: llm
|
||||
features:
|
||||
- vision
|
||||
- agent-thought
|
||||
- video
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
|
||||
@@ -6,6 +6,7 @@ model_type: llm
|
||||
features:
|
||||
- vision
|
||||
- agent-thought
|
||||
- video
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
|
||||
@@ -6,6 +6,7 @@ model_type: llm
|
||||
features:
|
||||
- vision
|
||||
- agent-thought
|
||||
- video
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
|
||||
@@ -6,6 +6,7 @@ model_type: llm
|
||||
features:
|
||||
- vision
|
||||
- agent-thought
|
||||
- video
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8000
|
||||
|
||||
@@ -6,6 +6,7 @@ model_properties:
|
||||
mode: chat
|
||||
features:
|
||||
- vision
|
||||
- video
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
|
||||
@@ -26,9 +26,15 @@ class NoPromptFoundError(LLMNodeError):
|
||||
"""Raised when no prompt is found in the LLM configuration."""
|
||||
|
||||
|
||||
class NotSupportedPromptTypeError(LLMNodeError):
|
||||
"""Raised when the prompt type is not supported."""
|
||||
class TemplateTypeNotSupportError(LLMNodeError):
|
||||
def __init__(self, *, type_name: str):
|
||||
super().__init__(f"Prompt type {type_name} is not supported.")
|
||||
|
||||
|
||||
class MemoryRolePrefixRequiredError(LLMNodeError):
|
||||
"""Raised when memory role prefix is required for completion model."""
|
||||
|
||||
|
||||
class FileTypeNotSupportError(LLMNodeError):
|
||||
def __init__(self, *, type_name: str):
|
||||
super().__init__(f"{type_name} type is not supported by this model")
|
||||
|
||||
@@ -65,6 +65,7 @@ from .entities import (
|
||||
ModelConfig,
|
||||
)
|
||||
from .exc import (
|
||||
FileTypeNotSupportError,
|
||||
InvalidContextStructureError,
|
||||
InvalidVariableTypeError,
|
||||
LLMModeRequiredError,
|
||||
@@ -72,7 +73,7 @@ from .exc import (
|
||||
MemoryRolePrefixRequiredError,
|
||||
ModelNotExistError,
|
||||
NoPromptFoundError,
|
||||
NotSupportedPromptTypeError,
|
||||
TemplateTypeNotSupportError,
|
||||
VariableNotFoundError,
|
||||
)
|
||||
|
||||
@@ -621,9 +622,7 @@ class LLMNode(BaseNode[LLMNodeData]):
|
||||
prompt_content = prompt_messages[0].content.replace("#sys.query#", user_query)
|
||||
prompt_messages[0].content = prompt_content
|
||||
else:
|
||||
errmsg = f"Prompt type {type(prompt_template)} is not supported"
|
||||
logger.warning(errmsg)
|
||||
raise NotSupportedPromptTypeError(errmsg)
|
||||
raise TemplateTypeNotSupportError(type_name=str(type(prompt_template)))
|
||||
|
||||
if vision_enabled and user_files:
|
||||
file_prompts = []
|
||||
@@ -671,7 +670,7 @@ class LLMNode(BaseNode[LLMNodeData]):
|
||||
and ModelFeature.AUDIO not in model_config.model_schema.features
|
||||
)
|
||||
):
|
||||
continue
|
||||
raise FileTypeNotSupportError(type_name=content_item.type)
|
||||
prompt_message_content.append(content_item)
|
||||
if len(prompt_message_content) == 1 and prompt_message_content[0].type == PromptMessageContentType.TEXT:
|
||||
prompt_message.content = prompt_message_content[0].data
|
||||
|
||||
Reference in New Issue
Block a user