refactor(api): Separate SegmentType for Integer/Float to Enable Pydantic Serialization (#22025)

refactor(api): Separate SegmentType for Integer/Float to Enable Pydantic Serialization (#22025) This PR addresses serialization issues in the VariablePool model by separating the `value_type` tags for `IntegerSegment`/`FloatSegment` and `IntegerVariable`/`FloatVariable`. Previously, both Integer and Float types shared the same `SegmentType.NUMBER` tag, causing conflicts during serialization. Key changes: - Introduce distinct `value_type` tags for Integer and Float segments/variables - Add `VariableUnion` and `SegmentUnion` types for proper type discrimination - Leverage Pydantic's discriminated union feature for seamless serialization/deserialization - Enable accurate serialization of data structures containing these types Closes #22024.
2025-07-16 12:31:37 +08:00
parent 229b4d621e
commit 2c1ab4879f
58 changed files with 2325 additions and 328 deletions
--- a/api/core/app/apps/advanced_chat/app_runner.py
+++ b/api/core/app/apps/advanced_chat/app_runner.py
@@ -16,9 +16,10 @@ from core.app.entities.queue_entities import (
    QueueTextChunkEvent,
 )
 from core.moderation.base import ModerationError
+from core.variables.variables import VariableUnion
 from core.workflow.callbacks import WorkflowCallback, WorkflowLoggingCallback
 from core.workflow.entities.variable_pool import VariablePool
-from core.workflow.enums import SystemVariableKey
+from core.workflow.system_variable import SystemVariable
 from core.workflow.variable_loader import VariableLoader
 from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_database import db
@@ -64,7 +65,7 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
        if not workflow:
            raise ValueError("Workflow not initialized")

-        user_id = None
+        user_id: str | None = None
        if self.application_generate_entity.invoke_from in {InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API}:
            end_user = db.session.query(EndUser).filter(EndUser.id == self.application_generate_entity.user_id).first()
            if end_user:
@@ -136,23 +137,25 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
                session.commit()

            # Create a variable pool.
-            system_inputs = {
-                SystemVariableKey.QUERY: query,
-                SystemVariableKey.FILES: files,
-                SystemVariableKey.CONVERSATION_ID: self.conversation.id,
-                SystemVariableKey.USER_ID: user_id,
-                SystemVariableKey.DIALOGUE_COUNT: self._dialogue_count,
-                SystemVariableKey.APP_ID: app_config.app_id,
-                SystemVariableKey.WORKFLOW_ID: app_config.workflow_id,
-                SystemVariableKey.WORKFLOW_EXECUTION_ID: self.application_generate_entity.workflow_run_id,
-            }
+            system_inputs = SystemVariable(
+                query=query,
+                files=files,
+                conversation_id=self.conversation.id,
+                user_id=user_id,
+                dialogue_count=self._dialogue_count,
+                app_id=app_config.app_id,
+                workflow_id=app_config.workflow_id,
+                workflow_execution_id=self.application_generate_entity.workflow_run_id,
+            )

            # init variable pool
            variable_pool = VariablePool(
                system_variables=system_inputs,
                user_inputs=inputs,
                environment_variables=workflow.environment_variables,
-                conversation_variables=conversation_variables,
+                # Based on the definition of `VariableUnion`,
+                # `list[Variable]` can be safely used as `list[VariableUnion]` since they are compatible.
+                conversation_variables=cast(list[VariableUnion], conversation_variables),
            )

            # init graph
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@@ -61,12 +61,12 @@ from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk
 from core.model_runtime.entities.llm_entities import LLMUsage
 from core.ops.ops_trace_manager import TraceQueueManager
 from core.workflow.entities.workflow_execution import WorkflowExecutionStatus, WorkflowType
-from core.workflow.enums import SystemVariableKey
 from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState
 from core.workflow.nodes import NodeType
 from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
+from core.workflow.system_variable import SystemVariable
 from core.workflow.workflow_cycle_manager import CycleManagerWorkflowInfo, WorkflowCycleManager
 from events.message_event import message_was_created
 from extensions.ext_database import db
@@ -116,16 +116,16 @@ class AdvancedChatAppGenerateTaskPipeline:

        self._workflow_cycle_manager = WorkflowCycleManager(
            application_generate_entity=application_generate_entity,
-            workflow_system_variables={
-                SystemVariableKey.QUERY: message.query,
-                SystemVariableKey.FILES: application_generate_entity.files,
-                SystemVariableKey.CONVERSATION_ID: conversation.id,
-                SystemVariableKey.USER_ID: user_session_id,
-                SystemVariableKey.DIALOGUE_COUNT: dialogue_count,
-                SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id,
-                SystemVariableKey.WORKFLOW_ID: workflow.id,
-                SystemVariableKey.WORKFLOW_EXECUTION_ID: application_generate_entity.workflow_run_id,
-            },
+            workflow_system_variables=SystemVariable(
+                query=message.query,
+                files=application_generate_entity.files,
+                conversation_id=conversation.id,
+                user_id=user_session_id,
+                dialogue_count=dialogue_count,
+                app_id=application_generate_entity.app_config.app_id,
+                workflow_id=workflow.id,
+                workflow_execution_id=application_generate_entity.workflow_run_id,
+            ),
            workflow_info=CycleManagerWorkflowInfo(
                workflow_id=workflow.id,
                workflow_type=WorkflowType(workflow.type),
--- a/api/core/app/apps/workflow/app_runner.py
+++ b/api/core/app/apps/workflow/app_runner.py
@@ -11,7 +11,7 @@ from core.app.entities.app_invoke_entities import (
 )
 from core.workflow.callbacks import WorkflowCallback, WorkflowLoggingCallback
 from core.workflow.entities.variable_pool import VariablePool
-from core.workflow.enums import SystemVariableKey
+from core.workflow.system_variable import SystemVariable
 from core.workflow.variable_loader import VariableLoader
 from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_database import db
@@ -95,13 +95,14 @@ class WorkflowAppRunner(WorkflowBasedAppRunner):
            files = self.application_generate_entity.files

            # Create a variable pool.
-            system_inputs = {
-                SystemVariableKey.FILES: files,
-                SystemVariableKey.USER_ID: user_id,
-                SystemVariableKey.APP_ID: app_config.app_id,
-                SystemVariableKey.WORKFLOW_ID: app_config.workflow_id,
-                SystemVariableKey.WORKFLOW_EXECUTION_ID: self.application_generate_entity.workflow_execution_id,
-            }
+
+            system_inputs = SystemVariable(
+                files=files,
+                user_id=user_id,
+                app_id=app_config.app_id,
+                workflow_id=app_config.workflow_id,
+                workflow_execution_id=self.application_generate_entity.workflow_execution_id,
+            )

            variable_pool = VariablePool(
                system_variables=system_inputs,
--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@@ -54,10 +54,10 @@ from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTas
 from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk
 from core.ops.ops_trace_manager import TraceQueueManager
 from core.workflow.entities.workflow_execution import WorkflowExecution, WorkflowExecutionStatus, WorkflowType
-from core.workflow.enums import SystemVariableKey
 from core.workflow.repositories.draft_variable_repository import DraftVariableSaverFactory
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
+from core.workflow.system_variable import SystemVariable
 from core.workflow.workflow_cycle_manager import CycleManagerWorkflowInfo, WorkflowCycleManager
 from extensions.ext_database import db
 from models.account import Account
@@ -107,13 +107,13 @@ class WorkflowAppGenerateTaskPipeline:

        self._workflow_cycle_manager = WorkflowCycleManager(
            application_generate_entity=application_generate_entity,
-            workflow_system_variables={
-                SystemVariableKey.FILES: application_generate_entity.files,
-                SystemVariableKey.USER_ID: user_session_id,
-                SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id,
-                SystemVariableKey.WORKFLOW_ID: workflow.id,
-                SystemVariableKey.WORKFLOW_EXECUTION_ID: application_generate_entity.workflow_execution_id,
-            },
+            workflow_system_variables=SystemVariable(
+                files=application_generate_entity.files,
+                user_id=user_session_id,
+                app_id=application_generate_entity.app_config.app_id,
+                workflow_id=workflow.id,
+                workflow_execution_id=application_generate_entity.workflow_execution_id,
+            ),
            workflow_info=CycleManagerWorkflowInfo(
                workflow_id=workflow.id,
                workflow_type=WorkflowType(workflow.type),
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@@ -62,6 +62,7 @@ from core.workflow.graph_engine.entities.event import (
 from core.workflow.graph_engine.entities.graph import Graph
 from core.workflow.nodes import NodeType
 from core.workflow.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
+from core.workflow.system_variable import SystemVariable
 from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool
 from core.workflow.workflow_entry import WorkflowEntry
 from extensions.ext_database import db
@@ -166,7 +167,7 @@ class WorkflowBasedAppRunner(AppRunner):

        # init variable pool
        variable_pool = VariablePool(
-            system_variables={},
+            system_variables=SystemVariable.empty(),
            user_inputs={},
            environment_variables=workflow.environment_variables,
        )
@@ -263,7 +264,7 @@ class WorkflowBasedAppRunner(AppRunner):

        # init variable pool
        variable_pool = VariablePool(
-            system_variables={},
+            system_variables=SystemVariable.empty(),
            user_inputs={},
            environment_variables=workflow.environment_variables,
        )
--- a/api/core/prompt/advanced_prompt_transform.py
+++ b/api/core/prompt/advanced_prompt_transform.py
@@ -158,7 +158,7 @@ class AdvancedPromptTransform(PromptTransform):

            if prompt_item.edition_type == "basic" or not prompt_item.edition_type:
                if self.with_variable_tmpl:
-                    vp = VariablePool()
+                    vp = VariablePool.empty()
                    for k, v in inputs.items():
                        if k.startswith("#"):
                            vp.add(k[1:-1].split("."), v)
--- a/api/core/variables/segments.py
+++ b/api/core/variables/segments.py
@@ -1,9 +1,9 @@
 import json
 import sys
 from collections.abc import Mapping, Sequence
-from typing import Any
+from typing import Annotated, Any, TypeAlias

-from pydantic import BaseModel, ConfigDict, field_validator
+from pydantic import BaseModel, ConfigDict, Discriminator, Tag, field_validator

 from core.file import File

@@ -11,6 +11,11 @@ from .types import SegmentType


 class Segment(BaseModel):
+    """Segment is runtime type used during the execution of workflow.
+
+    Note: this class is abstract, you should use subclasses of this class instead.
+    """
+
    model_config = ConfigDict(frozen=True)

    value_type: SegmentType
@@ -73,7 +78,7 @@ class StringSegment(Segment):


 class FloatSegment(Segment):
-    value_type: SegmentType = SegmentType.NUMBER
+    value_type: SegmentType = SegmentType.FLOAT
    value: float
    # NOTE(QuantumGhost): seems that the equality for FloatSegment with `NaN` value has some problems.
    # The following tests cannot pass.
@@ -92,7 +97,7 @@ class FloatSegment(Segment):


 class IntegerSegment(Segment):
-    value_type: SegmentType = SegmentType.NUMBER
+    value_type: SegmentType = SegmentType.INTEGER
    value: int


@@ -181,3 +186,46 @@ class ArrayFileSegment(ArraySegment):
    @property
    def text(self) -> str:
        return ""
+
+
+def get_segment_discriminator(v: Any) -> SegmentType | None:
+    if isinstance(v, Segment):
+        return v.value_type
+    elif isinstance(v, dict):
+        value_type = v.get("value_type")
+        if value_type is None:
+            return None
+        try:
+            seg_type = SegmentType(value_type)
+        except ValueError:
+            return None
+        return seg_type
+    else:
+        # return None if the discriminator value isn't found
+        return None
+
+
+# The `SegmentUnion`` type is used to enable serialization and deserialization with Pydantic.
+# Use `Segment` for type hinting when serialization is not required.
+#
+# Note:
+# - All variants in `SegmentUnion` must inherit from the `Segment` class.
+# - The union must include all non-abstract subclasses of `Segment`, except:
+#   - `SegmentGroup`, which is not added to the variable pool.
+#   - `Variable` and its subclasses, which are handled by `VariableUnion`.
+SegmentUnion: TypeAlias = Annotated[
+    (
+        Annotated[NoneSegment, Tag(SegmentType.NONE)]
+        | Annotated[StringSegment, Tag(SegmentType.STRING)]
+        | Annotated[FloatSegment, Tag(SegmentType.FLOAT)]
+        | Annotated[IntegerSegment, Tag(SegmentType.INTEGER)]
+        | Annotated[ObjectSegment, Tag(SegmentType.OBJECT)]
+        | Annotated[FileSegment, Tag(SegmentType.FILE)]
+        | Annotated[ArrayAnySegment, Tag(SegmentType.ARRAY_ANY)]
+        | Annotated[ArrayStringSegment, Tag(SegmentType.ARRAY_STRING)]
+        | Annotated[ArrayNumberSegment, Tag(SegmentType.ARRAY_NUMBER)]
+        | Annotated[ArrayObjectSegment, Tag(SegmentType.ARRAY_OBJECT)]
+        | Annotated[ArrayFileSegment, Tag(SegmentType.ARRAY_FILE)]
+    ),
+    Discriminator(get_segment_discriminator),
+]
--- a/api/core/variables/types.py
+++ b/api/core/variables/types.py
@@ -1,8 +1,27 @@
+from collections.abc import Mapping
 from enum import StrEnum
+from typing import Any, Optional
+
+from core.file.models import File
+
+
+class ArrayValidation(StrEnum):
+    """Strategy for validating array elements"""
+
+    # Skip element validation (only check array container)
+    NONE = "none"
+
+    # Validate the first element (if array is non-empty)
+    FIRST = "first"
+
+    # Validate all elements in the array.
+    ALL = "all"


 class SegmentType(StrEnum):
    NUMBER = "number"
+    INTEGER = "integer"
+    FLOAT = "float"
    STRING = "string"
    OBJECT = "object"
    SECRET = "secret"
@@ -19,16 +38,141 @@ class SegmentType(StrEnum):

    GROUP = "group"

-    def is_array_type(self):
+    def is_array_type(self) -> bool:
        return self in _ARRAY_TYPES

+    @classmethod
+    def infer_segment_type(cls, value: Any) -> Optional["SegmentType"]:
+        """
+        Attempt to infer the `SegmentType` based on the Python type of the `value` parameter.
+
+        Returns `None` if no appropriate `SegmentType` can be determined for the given `value`.
+        For example, this may occur if the input is a generic Python object of type `object`.
+        """
+
+        if isinstance(value, list):
+            elem_types: set[SegmentType] = set()
+            for i in value:
+                segment_type = cls.infer_segment_type(i)
+                if segment_type is None:
+                    return None
+
+                elem_types.add(segment_type)
+
+            if len(elem_types) != 1:
+                if elem_types.issubset(_NUMERICAL_TYPES):
+                    return SegmentType.ARRAY_NUMBER
+                return SegmentType.ARRAY_ANY
+            elif all(i.is_array_type() for i in elem_types):
+                return SegmentType.ARRAY_ANY
+            match elem_types.pop():
+                case SegmentType.STRING:
+                    return SegmentType.ARRAY_STRING
+                case SegmentType.NUMBER | SegmentType.INTEGER | SegmentType.FLOAT:
+                    return SegmentType.ARRAY_NUMBER
+                case SegmentType.OBJECT:
+                    return SegmentType.ARRAY_OBJECT
+                case SegmentType.FILE:
+                    return SegmentType.ARRAY_FILE
+                case SegmentType.NONE:
+                    return SegmentType.ARRAY_ANY
+                case _:
+                    # This should be unreachable.
+                    raise ValueError(f"not supported value {value}")
+        if value is None:
+            return SegmentType.NONE
+        elif isinstance(value, int) and not isinstance(value, bool):
+            return SegmentType.INTEGER
+        elif isinstance(value, float):
+            return SegmentType.FLOAT
+        elif isinstance(value, str):
+            return SegmentType.STRING
+        elif isinstance(value, dict):
+            return SegmentType.OBJECT
+        elif isinstance(value, File):
+            return SegmentType.FILE
+        elif isinstance(value, str):
+            return SegmentType.STRING
+        else:
+            return None
+
+    def _validate_array(self, value: Any, array_validation: ArrayValidation) -> bool:
+        if not isinstance(value, list):
+            return False
+        # Skip element validation if array is empty
+        if len(value) == 0:
+            return True
+        if self == SegmentType.ARRAY_ANY:
+            return True
+        element_type = _ARRAY_ELEMENT_TYPES_MAPPING[self]
+
+        if array_validation == ArrayValidation.NONE:
+            return True
+        elif array_validation == ArrayValidation.FIRST:
+            return element_type.is_valid(value[0])
+        else:
+            return all([element_type.is_valid(i, array_validation=ArrayValidation.NONE)] for i in value)
+
+    def is_valid(self, value: Any, array_validation: ArrayValidation = ArrayValidation.FIRST) -> bool:
+        """
+        Check if a value matches the segment type.
+        Users of `SegmentType` should call this method, instead of using
+        `isinstance` manually.
+
+        Args:
+            value: The value to validate
+            array_validation: Validation strategy for array types (ignored for non-array types)
+
+        Returns:
+            True if the value matches the type under the given validation strategy
+        """
+        if self.is_array_type():
+            return self._validate_array(value, array_validation)
+        elif self == SegmentType.NUMBER:
+            return isinstance(value, (int, float))
+        elif self == SegmentType.STRING:
+            return isinstance(value, str)
+        elif self == SegmentType.OBJECT:
+            return isinstance(value, dict)
+        elif self == SegmentType.SECRET:
+            return isinstance(value, str)
+        elif self == SegmentType.FILE:
+            return isinstance(value, File)
+        elif self == SegmentType.NONE:
+            return value is None
+        else:
+            raise AssertionError("this statement should be unreachable.")
+
+    def exposed_type(self) -> "SegmentType":
+        """Returns the type exposed to the frontend.
+
+        The frontend treats `INTEGER` and `FLOAT` as `NUMBER`, so these are returned as `NUMBER` here.
+        """
+        if self in (SegmentType.INTEGER, SegmentType.FLOAT):
+            return SegmentType.NUMBER
+        return self
+
+
+_ARRAY_ELEMENT_TYPES_MAPPING: Mapping[SegmentType, SegmentType] = {
+    # ARRAY_ANY does not have correpond element type.
+    SegmentType.ARRAY_STRING: SegmentType.STRING,
+    SegmentType.ARRAY_NUMBER: SegmentType.NUMBER,
+    SegmentType.ARRAY_OBJECT: SegmentType.OBJECT,
+    SegmentType.ARRAY_FILE: SegmentType.FILE,
+}

 _ARRAY_TYPES = frozenset(
-    [
+    list(_ARRAY_ELEMENT_TYPES_MAPPING.keys())
+    + [
        SegmentType.ARRAY_ANY,
-        SegmentType.ARRAY_STRING,
-        SegmentType.ARRAY_NUMBER,
-        SegmentType.ARRAY_OBJECT,
-        SegmentType.ARRAY_FILE,
+    ]
+)
+
+
+_NUMERICAL_TYPES = frozenset(
+    [
+        SegmentType.NUMBER,
+        SegmentType.INTEGER,
+        SegmentType.FLOAT,
    ]
 )
--- a/api/core/variables/variables.py
+++ b/api/core/variables/variables.py
@@ -1,8 +1,8 @@
 from collections.abc import Sequence
-from typing import cast
+from typing import Annotated, TypeAlias, cast
 from uuid import uuid4

-from pydantic import Field
+from pydantic import Discriminator, Field, Tag

 from core.helper import encrypter

@@ -20,6 +20,7 @@ from .segments import (
    ObjectSegment,
    Segment,
    StringSegment,
+    get_segment_discriminator,
 )
 from .types import SegmentType

@@ -27,6 +28,10 @@ from .types import SegmentType
 class Variable(Segment):
    """
    A variable is a segment that has a name.
+
+    It is mainly used to store segments and their selector in VariablePool.
+
+    Note: this class is abstract, you should use subclasses of this class instead.
    """

    id: str = Field(
@@ -93,3 +98,28 @@ class FileVariable(FileSegment, Variable):

 class ArrayFileVariable(ArrayFileSegment, ArrayVariable):
    pass
+
+
+# The `VariableUnion`` type is used to enable serialization and deserialization with Pydantic.
+# Use `Variable` for type hinting when serialization is not required.
+#
+# Note:
+# - All variants in `VariableUnion` must inherit from the `Variable` class.
+# - The union must include all non-abstract subclasses of `Segment`, except:
+VariableUnion: TypeAlias = Annotated[
+    (
+        Annotated[NoneVariable, Tag(SegmentType.NONE)]
+        | Annotated[StringVariable, Tag(SegmentType.STRING)]
+        | Annotated[FloatVariable, Tag(SegmentType.FLOAT)]
+        | Annotated[IntegerVariable, Tag(SegmentType.INTEGER)]
+        | Annotated[ObjectVariable, Tag(SegmentType.OBJECT)]
+        | Annotated[FileVariable, Tag(SegmentType.FILE)]
+        | Annotated[ArrayAnyVariable, Tag(SegmentType.ARRAY_ANY)]
+        | Annotated[ArrayStringVariable, Tag(SegmentType.ARRAY_STRING)]
+        | Annotated[ArrayNumberVariable, Tag(SegmentType.ARRAY_NUMBER)]
+        | Annotated[ArrayObjectVariable, Tag(SegmentType.ARRAY_OBJECT)]
+        | Annotated[ArrayFileVariable, Tag(SegmentType.ARRAY_FILE)]
+        | Annotated[SecretVariable, Tag(SegmentType.SECRET)]
+    ),
+    Discriminator(get_segment_discriminator),
+]
--- a/api/core/workflow/entities/variable_pool.py
+++ b/api/core/workflow/entities/variable_pool.py
@@ -1,7 +1,7 @@
 import re
 from collections import defaultdict
 from collections.abc import Mapping, Sequence
-from typing import Any, Union
+from typing import Annotated, Any, Union, cast

 from pydantic import BaseModel, Field

@@ -9,8 +9,9 @@ from core.file import File, FileAttribute, file_manager
 from core.variables import Segment, SegmentGroup, Variable
 from core.variables.consts import MIN_SELECTORS_LENGTH
 from core.variables.segments import FileSegment, NoneSegment
+from core.variables.variables import VariableUnion
 from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID
-from core.workflow.enums import SystemVariableKey
+from core.workflow.system_variable import SystemVariable
 from factories import variable_factory

 VariableValue = Union[str, int, float, dict, list, File]
@@ -23,31 +24,31 @@ class VariablePool(BaseModel):
    # The first element of the selector is the node id, it's the first-level key in the dictionary.
    # Other elements of the selector are the keys in the second-level dictionary. To get the key, we hash the
    # elements of the selector except the first one.
-    variable_dictionary: dict[str, dict[int, Segment]] = Field(
+    variable_dictionary: defaultdict[str, Annotated[dict[int, VariableUnion], Field(default_factory=dict)]] = Field(
        description="Variables mapping",
        default=defaultdict(dict),
    )
-    # TODO: This user inputs is not used for pool.
+
+    # The `user_inputs` is used only when constructing the inputs for the `StartNode`. It's not used elsewhere.
    user_inputs: Mapping[str, Any] = Field(
        description="User inputs",
        default_factory=dict,
    )
-    system_variables: Mapping[SystemVariableKey, Any] = Field(
+    system_variables: SystemVariable = Field(
        description="System variables",
-        default_factory=dict,
    )
-    environment_variables: Sequence[Variable] = Field(
+    environment_variables: Sequence[VariableUnion] = Field(
        description="Environment variables.",
        default_factory=list,
    )
-    conversation_variables: Sequence[Variable] = Field(
+    conversation_variables: Sequence[VariableUnion] = Field(
        description="Conversation variables.",
        default_factory=list,
    )

    def model_post_init(self, context: Any, /) -> None:
-        for key, value in self.system_variables.items():
-            self.add((SYSTEM_VARIABLE_NODE_ID, key.value), value)
+        # Create a mapping from field names to SystemVariableKey enum values
+        self._add_system_variables(self.system_variables)
        # Add environment variables to the variable pool
        for var in self.environment_variables:
            self.add((ENVIRONMENT_VARIABLE_NODE_ID, var.name), var)
@@ -83,8 +84,22 @@ class VariablePool(BaseModel):
            segment = variable_factory.build_segment(value)
            variable = variable_factory.segment_to_variable(segment=segment, selector=selector)

-        hash_key = hash(tuple(selector[1:]))
-        self.variable_dictionary[selector[0]][hash_key] = variable
+        key, hash_key = self._selector_to_keys(selector)
+        # Based on the definition of `VariableUnion`,
+        # `list[Variable]` can be safely used as `list[VariableUnion]` since they are compatible.
+        self.variable_dictionary[key][hash_key] = cast(VariableUnion, variable)
+
+    @classmethod
+    def _selector_to_keys(cls, selector: Sequence[str]) -> tuple[str, int]:
+        return selector[0], hash(tuple(selector[1:]))
+
+    def _has(self, selector: Sequence[str]) -> bool:
+        key, hash_key = self._selector_to_keys(selector)
+        if key not in self.variable_dictionary:
+            return False
+        if hash_key not in self.variable_dictionary[key]:
+            return False
+        return True

    def get(self, selector: Sequence[str], /) -> Segment | None:
        """
@@ -102,8 +117,8 @@ class VariablePool(BaseModel):
        if len(selector) < MIN_SELECTORS_LENGTH:
            return None

-        hash_key = hash(tuple(selector[1:]))
-        value = self.variable_dictionary[selector[0]].get(hash_key)
+        key, hash_key = self._selector_to_keys(selector)
+        value: Segment | None = self.variable_dictionary[key].get(hash_key)

        if value is None:
            selector, attr = selector[:-1], selector[-1]
@@ -136,8 +151,9 @@ class VariablePool(BaseModel):
        if len(selector) == 1:
            self.variable_dictionary[selector[0]] = {}
            return
+        key, hash_key = self._selector_to_keys(selector)
        hash_key = hash(tuple(selector[1:]))
-        self.variable_dictionary[selector[0]].pop(hash_key, None)
+        self.variable_dictionary[key].pop(hash_key, None)

    def convert_template(self, template: str, /):
        parts = VARIABLE_PATTERN.split(template)
@@ -154,3 +170,20 @@ class VariablePool(BaseModel):
        if isinstance(segment, FileSegment):
            return segment
        return None
+
+    def _add_system_variables(self, system_variable: SystemVariable):
+        sys_var_mapping = system_variable.to_dict()
+        for key, value in sys_var_mapping.items():
+            if value is None:
+                continue
+            selector = (SYSTEM_VARIABLE_NODE_ID, key)
+            # If the system variable already exists, do not add it again.
+            # This ensures that we can keep the id of the system variables intact.
+            if self._has(selector):
+                continue
+            self.add(selector, value)  # type: ignore
+
+    @classmethod
+    def empty(cls) -> "VariablePool":
+        """Create an empty variable pool."""
+        return cls(system_variables=SystemVariable.empty())
--- a/api/core/workflow/graph_engine/entities/graph_runtime_state.py
+++ b/api/core/workflow/graph_engine/entities/graph_runtime_state.py
@@ -17,8 +17,12 @@ class GraphRuntimeState(BaseModel):
    """total tokens"""
    llm_usage: LLMUsage = LLMUsage.empty_usage()
    """llm usage info"""
+
+    # The `outputs` field stores the final output values generated by executing workflows or chatflows.
+    #
+    # Note: Since the type of this field is `dict[str, Any]`, its values may not remain consistent
+    # after a serialization and deserialization round trip.
    outputs: dict[str, Any] = {}
-    """outputs"""

    node_run_steps: int = 0
    """node run steps"""
--- a/api/core/workflow/nodes/loop/entities.py
+++ b/api/core/workflow/nodes/loop/entities.py
@@ -1,11 +1,29 @@
 from collections.abc import Mapping
-from typing import Any, Literal, Optional
+from typing import Annotated, Any, Literal, Optional

-from pydantic import BaseModel, Field
+from pydantic import AfterValidator, BaseModel, Field

+from core.variables.types import SegmentType
 from core.workflow.nodes.base import BaseLoopNodeData, BaseLoopState, BaseNodeData
 from core.workflow.utils.condition.entities import Condition

+_VALID_VAR_TYPE = frozenset(
+    [
+        SegmentType.STRING,
+        SegmentType.NUMBER,
+        SegmentType.OBJECT,
+        SegmentType.ARRAY_STRING,
+        SegmentType.ARRAY_NUMBER,
+        SegmentType.ARRAY_OBJECT,
+    ]
+)
+
+
+def _is_valid_var_type(seg_type: SegmentType) -> SegmentType:
+    if seg_type not in _VALID_VAR_TYPE:
+        raise ValueError(...)
+    return seg_type
+

 class LoopVariableData(BaseModel):
    """
@@ -13,7 +31,7 @@ class LoopVariableData(BaseModel):
    """

    label: str
-    var_type: Literal["string", "number", "object", "array[string]", "array[number]", "array[object]"]
+    var_type: Annotated[SegmentType, AfterValidator(_is_valid_var_type)]
    value_type: Literal["variable", "constant"]
    value: Optional[Any | list[str]] = None

--- a/api/core/workflow/nodes/loop/loop_node.py
+++ b/api/core/workflow/nodes/loop/loop_node.py
@@ -7,14 +7,9 @@ from typing import TYPE_CHECKING, Any, Literal, cast

 from configs import dify_config
 from core.variables import (
-    ArrayNumberSegment,
-    ArrayObjectSegment,
-    ArrayStringSegment,
    IntegerSegment,
-    ObjectSegment,
    Segment,
    SegmentType,
-    StringSegment,
 )
 from core.workflow.entities.node_entities import NodeRunResult
 from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
@@ -39,6 +34,7 @@ from core.workflow.nodes.enums import NodeType
 from core.workflow.nodes.event import NodeEvent, RunCompletedEvent
 from core.workflow.nodes.loop.entities import LoopNodeData
 from core.workflow.utils.condition.processor import ConditionProcessor
+from factories.variable_factory import TypeMismatchError, build_segment_with_type

 if TYPE_CHECKING:
    from core.workflow.entities.variable_pool import VariablePool
@@ -505,23 +501,21 @@ class LoopNode(BaseNode[LoopNodeData]):
        return variable_mapping

    @staticmethod
-    def _get_segment_for_constant(var_type: str, value: Any) -> Segment:
+    def _get_segment_for_constant(var_type: SegmentType, value: Any) -> Segment:
        """Get the appropriate segment type for a constant value."""
-        segment_mapping: dict[str, tuple[type[Segment], SegmentType]] = {
-            "string": (StringSegment, SegmentType.STRING),
-            "number": (IntegerSegment, SegmentType.NUMBER),
-            "object": (ObjectSegment, SegmentType.OBJECT),
-            "array[string]": (ArrayStringSegment, SegmentType.ARRAY_STRING),
-            "array[number]": (ArrayNumberSegment, SegmentType.ARRAY_NUMBER),
-            "array[object]": (ArrayObjectSegment, SegmentType.ARRAY_OBJECT),
-        }
        if var_type in ["array[string]", "array[number]", "array[object]"]:
-            if value:
+            if value and isinstance(value, str):
                value = json.loads(value)
            else:
                value = []
-        segment_info = segment_mapping.get(var_type)
-        if not segment_info:
-            raise ValueError(f"Invalid variable type: {var_type}")
-        segment_class, value_type = segment_info
-        return segment_class(value=value, value_type=value_type)
+        try:
+            return build_segment_with_type(var_type, value)
+        except TypeMismatchError as type_exc:
+            # Attempt to parse the value as a JSON-encoded string, if applicable.
+            if not isinstance(value, str):
+                raise
+            try:
+                value = json.loads(value)
+            except ValueError:
+                raise type_exc
+            return build_segment_with_type(var_type, value)
--- a/api/core/workflow/nodes/start/start_node.py
+++ b/api/core/workflow/nodes/start/start_node.py
@@ -16,7 +16,7 @@ class StartNode(BaseNode[StartNodeData]):

    def _run(self) -> NodeRunResult:
        node_inputs = dict(self.graph_runtime_state.variable_pool.user_inputs)
-        system_inputs = self.graph_runtime_state.variable_pool.system_variables
+        system_inputs = self.graph_runtime_state.variable_pool.system_variables.to_dict()

        # TODO: System variables should be directly accessible, no need for special handling
        # Set system variables as node outputs.
--- a/api/core/workflow/nodes/variable_assigner/v1/node.py
+++ b/api/core/workflow/nodes/variable_assigner/v1/node.py
@@ -130,6 +130,7 @@ class VariableAssignerNode(BaseNode[VariableAssignerData]):


 def get_zero_value(t: SegmentType):
+    # TODO(QuantumGhost): this should be a method of `SegmentType`.
    match t:
        case SegmentType.ARRAY_OBJECT | SegmentType.ARRAY_STRING | SegmentType.ARRAY_NUMBER:
            return variable_factory.build_segment([])
@@ -137,6 +138,10 @@ def get_zero_value(t: SegmentType):
            return variable_factory.build_segment({})
        case SegmentType.STRING:
            return variable_factory.build_segment("")
+        case SegmentType.INTEGER:
+            return variable_factory.build_segment(0)
+        case SegmentType.FLOAT:
+            return variable_factory.build_segment(0.0)
        case SegmentType.NUMBER:
            return variable_factory.build_segment(0)
        case _:
--- a/api/core/workflow/nodes/variable_assigner/v2/constants.py
+++ b/api/core/workflow/nodes/variable_assigner/v2/constants.py
@@ -1,5 +1,6 @@
 from core.variables import SegmentType

+# Note: This mapping is duplicated with `get_zero_value`. Consider refactoring to avoid redundancy.
 EMPTY_VALUE_MAPPING = {
    SegmentType.STRING: "",
    SegmentType.NUMBER: 0,
--- a/api/core/workflow/nodes/variable_assigner/v2/helpers.py
+++ b/api/core/workflow/nodes/variable_assigner/v2/helpers.py
@@ -10,10 +10,16 @@ def is_operation_supported(*, variable_type: SegmentType, operation: Operation):
        case Operation.OVER_WRITE | Operation.CLEAR:
            return True
        case Operation.SET:
-            return variable_type in {SegmentType.OBJECT, SegmentType.STRING, SegmentType.NUMBER}
+            return variable_type in {
+                SegmentType.OBJECT,
+                SegmentType.STRING,
+                SegmentType.NUMBER,
+                SegmentType.INTEGER,
+                SegmentType.FLOAT,
+            }
        case Operation.ADD | Operation.SUBTRACT | Operation.MULTIPLY | Operation.DIVIDE:
            # Only number variable can be added, subtracted, multiplied or divided
-            return variable_type == SegmentType.NUMBER
+            return variable_type in {SegmentType.NUMBER, SegmentType.INTEGER, SegmentType.FLOAT}
        case Operation.APPEND | Operation.EXTEND:
            # Only array variable can be appended or extended
            return variable_type in {
@@ -46,7 +52,7 @@ def is_constant_input_supported(*, variable_type: SegmentType, operation: Operat
    match variable_type:
        case SegmentType.STRING | SegmentType.OBJECT:
            return operation in {Operation.OVER_WRITE, Operation.SET}
-        case SegmentType.NUMBER:
+        case SegmentType.NUMBER | SegmentType.INTEGER | SegmentType.FLOAT:
            return operation in {
                Operation.OVER_WRITE,
                Operation.SET,
@@ -66,7 +72,7 @@ def is_input_value_valid(*, variable_type: SegmentType, operation: Operation, va
        case SegmentType.STRING:
            return isinstance(value, str)

-        case SegmentType.NUMBER:
+        case SegmentType.NUMBER | SegmentType.INTEGER | SegmentType.FLOAT:
            if not isinstance(value, int | float):
                return False
            if operation == Operation.DIVIDE and value == 0:
--- a/api/core/workflow/system_variable.py
+++ b/api/core/workflow/system_variable.py
@@ -0,0 +1,89 @@
+from collections.abc import Sequence
+from typing import Any
+
+from pydantic import AliasChoices, BaseModel, ConfigDict, Field, model_validator
+
+from core.file.models import File
+from core.workflow.enums import SystemVariableKey
+
+
+class SystemVariable(BaseModel):
+    """A model for managing system variables.
+
+    Fields with a value of `None` are treated as absent and will not be included
+    in the variable pool.
+    """
+
+    model_config = ConfigDict(
+        extra="forbid",
+        serialize_by_alias=True,
+        validate_by_alias=True,
+    )
+
+    user_id: str | None = None
+
+    # Ideally, `app_id` and `workflow_id` should be required and not `None`.
+    # However, there are scenarios in the codebase where these fields are not set.
+    # To maintain compatibility, they are marked as optional here.
+    app_id: str | None = None
+    workflow_id: str | None = None
+
+    files: Sequence[File] = Field(default_factory=list)
+
+    # NOTE: The `workflow_execution_id` field was previously named `workflow_run_id`.
+    # To maintain compatibility with existing workflows, it must be serialized
+    # as `workflow_run_id` in dictionaries or JSON objects, and also referenced
+    # as `workflow_run_id` in the variable pool.
+    workflow_execution_id: str | None = Field(
+        validation_alias=AliasChoices("workflow_execution_id", "workflow_run_id"),
+        serialization_alias="workflow_run_id",
+        default=None,
+    )
+    # Chatflow related fields.
+    query: str | None = None
+    conversation_id: str | None = None
+    dialogue_count: int | None = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate_json_fields(cls, data):
+        if isinstance(data, dict):
+            # For JSON validation, only allow workflow_run_id
+            if "workflow_execution_id" in data and "workflow_run_id" not in data:
+                # This is likely from direct instantiation, allow it
+                return data
+            elif "workflow_execution_id" in data and "workflow_run_id" in data:
+                # Both present, remove workflow_execution_id
+                data = data.copy()
+                data.pop("workflow_execution_id")
+                return data
+        return data
+
+    @classmethod
+    def empty(cls) -> "SystemVariable":
+        return cls()
+
+    def to_dict(self) -> dict[SystemVariableKey, Any]:
+        # NOTE: This method is provided for compatibility with legacy code.
+        # New code should use the `SystemVariable` object directly instead of converting
+        # it to a dictionary, as this conversion results in the loss of type information
+        # for each key, making static analysis more difficult.
+
+        d: dict[SystemVariableKey, Any] = {
+            SystemVariableKey.FILES: self.files,
+        }
+        if self.user_id is not None:
+            d[SystemVariableKey.USER_ID] = self.user_id
+        if self.app_id is not None:
+            d[SystemVariableKey.APP_ID] = self.app_id
+        if self.workflow_id is not None:
+            d[SystemVariableKey.WORKFLOW_ID] = self.workflow_id
+        if self.workflow_execution_id is not None:
+            d[SystemVariableKey.WORKFLOW_EXECUTION_ID] = self.workflow_execution_id
+        if self.query is not None:
+            d[SystemVariableKey.QUERY] = self.query
+        if self.conversation_id is not None:
+            d[SystemVariableKey.CONVERSATION_ID] = self.conversation_id
+        if self.dialogue_count is not None:
+            d[SystemVariableKey.DIALOGUE_COUNT] = self.dialogue_count
+        return d
--- a/api/core/workflow/workflow_cycle_manager.py
+++ b/api/core/workflow/workflow_cycle_manager.py
@@ -26,6 +26,7 @@ from core.workflow.entities.workflow_node_execution import (
 from core.workflow.enums import SystemVariableKey
 from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
 from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
+from core.workflow.system_variable import SystemVariable
 from core.workflow.workflow_entry import WorkflowEntry
 from libs.datetime_utils import naive_utc_now

@@ -43,7 +44,7 @@ class WorkflowCycleManager:
        self,
        *,
        application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity],
-        workflow_system_variables: dict[SystemVariableKey, Any],
+        workflow_system_variables: SystemVariable,
        workflow_info: CycleManagerWorkflowInfo,
        workflow_execution_repository: WorkflowExecutionRepository,
        workflow_node_execution_repository: WorkflowNodeExecutionRepository,
@@ -56,17 +57,22 @@ class WorkflowCycleManager:

    def handle_workflow_run_start(self) -> WorkflowExecution:
        inputs = {**self._application_generate_entity.inputs}
-        for key, value in (self._workflow_system_variables or {}).items():
-            if key.value == "conversation":
-                continue
-            inputs[f"sys.{key.value}"] = value
+
+        # Iterate over SystemVariable fields using Pydantic's model_fields
+        if self._workflow_system_variables:
+            for field_name, value in self._workflow_system_variables.to_dict().items():
+                if field_name == SystemVariableKey.CONVERSATION_ID:
+                    continue
+                inputs[f"sys.{field_name}"] = value

        # handle special values
        inputs = dict(WorkflowEntry.handle_special_values(inputs) or {})

        # init workflow run
        # TODO: This workflow_run_id should always not be None, maybe we can use a more elegant way to handle this
-        execution_id = str(self._workflow_system_variables.get(SystemVariableKey.WORKFLOW_EXECUTION_ID) or uuid4())
+        execution_id = str(
+            self._workflow_system_variables.workflow_execution_id if self._workflow_system_variables else None
+        ) or str(uuid4())
        execution = WorkflowExecution.new(
            id_=execution_id,
            workflow_id=self._workflow_info.workflow_id,
--- a/api/core/workflow/workflow_entry.py
+++ b/api/core/workflow/workflow_entry.py
@@ -21,6 +21,7 @@ from core.workflow.nodes import NodeType
 from core.workflow.nodes.base import BaseNode
 from core.workflow.nodes.event import NodeEvent
 from core.workflow.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
+from core.workflow.system_variable import SystemVariable
 from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool
 from factories import file_factory
 from models.enums import UserFrom
@@ -254,7 +255,7 @@ class WorkflowEntry:

        # init variable pool
        variable_pool = VariablePool(
-            system_variables={},
+            system_variables=SystemVariable.empty(),
            user_inputs={},
            environment_variables=[],
        )