feat(graph_engine): Support pausing workflow graph executions (#26585)

Signed-off-by: -LAN- <laipz8200@outlook.com>
2025-10-19 21:33:41 +08:00
parent 9a5f214623
commit 578247ffbc
112 changed files with 3766 additions and 2415 deletions
--- a/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_runner_conversation_variables.py
+++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_runner_conversation_variables.py
@@ -99,6 +99,8 @@ class TestAdvancedChatAppRunnerConversationVariables:
            workflow=mock_workflow,
            system_user_id=str(uuid4()),
            app=MagicMock(),
+            workflow_execution_repository=MagicMock(),
+            workflow_node_execution_repository=MagicMock(),
        )

        # Mock database session
@@ -237,6 +239,8 @@ class TestAdvancedChatAppRunnerConversationVariables:
            workflow=mock_workflow,
            system_user_id=str(uuid4()),
            app=MagicMock(),
+            workflow_execution_repository=MagicMock(),
+            workflow_node_execution_repository=MagicMock(),
        )

        # Mock database session
@@ -390,6 +394,8 @@ class TestAdvancedChatAppRunnerConversationVariables:
            workflow=mock_workflow,
            system_user_id=str(uuid4()),
            app=MagicMock(),
+            workflow_execution_repository=MagicMock(),
+            workflow_node_execution_repository=MagicMock(),
        )

        # Mock database session
--- a/api/tests/unit_tests/core/app/apps/common/test_graph_runtime_state_support.py
+++ b/api/tests/unit_tests/core/app/apps/common/test_graph_runtime_state_support.py
@@ -0,0 +1,63 @@
+from types import SimpleNamespace
+
+import pytest
+
+from core.app.apps.common.graph_runtime_state_support import GraphRuntimeStateSupport
+from core.workflow.runtime import GraphRuntimeState
+from core.workflow.runtime.variable_pool import VariablePool
+from core.workflow.system_variable import SystemVariable
+
+
+def _make_state(workflow_run_id: str | None) -> GraphRuntimeState:
+    variable_pool = VariablePool(system_variables=SystemVariable(workflow_execution_id=workflow_run_id))
+    return GraphRuntimeState(variable_pool=variable_pool, start_at=0.0)
+
+
+class _StubPipeline(GraphRuntimeStateSupport):
+    def __init__(self, *, cached_state: GraphRuntimeState | None, queue_state: GraphRuntimeState | None):
+        self._graph_runtime_state = cached_state
+        self._base_task_pipeline = SimpleNamespace(queue_manager=SimpleNamespace(graph_runtime_state=queue_state))
+
+
+def test_ensure_graph_runtime_initialized_caches_explicit_state():
+    explicit_state = _make_state("run-explicit")
+    pipeline = _StubPipeline(cached_state=None, queue_state=None)
+
+    resolved = pipeline._ensure_graph_runtime_initialized(explicit_state)
+
+    assert resolved is explicit_state
+    assert pipeline._graph_runtime_state is explicit_state
+
+
+def test_resolve_graph_runtime_state_reads_from_queue_when_cache_empty():
+    queued_state = _make_state("run-queue")
+    pipeline = _StubPipeline(cached_state=None, queue_state=queued_state)
+
+    resolved = pipeline._resolve_graph_runtime_state()
+
+    assert resolved is queued_state
+    assert pipeline._graph_runtime_state is queued_state
+
+
+def test_resolve_graph_runtime_state_raises_when_no_state_available():
+    pipeline = _StubPipeline(cached_state=None, queue_state=None)
+
+    with pytest.raises(ValueError):
+        pipeline._resolve_graph_runtime_state()
+
+
+def test_extract_workflow_run_id_returns_value():
+    state = _make_state("run-identifier")
+    pipeline = _StubPipeline(cached_state=state, queue_state=None)
+
+    run_id = pipeline._extract_workflow_run_id(state)
+
+    assert run_id == "run-identifier"
+
+
+def test_extract_workflow_run_id_raises_when_missing():
+    state = _make_state(None)
+    pipeline = _StubPipeline(cached_state=state, queue_state=None)
+
+    with pytest.raises(ValueError):
+        pipeline._extract_workflow_run_id(state)
--- a/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_process_data.py
+++ b/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_process_data.py
@@ -3,8 +3,7 @@ Unit tests for WorkflowResponseConverter focusing on process_data truncation fun
 """

 import uuid
-from dataclasses import dataclass
-from datetime import datetime
+from collections.abc import Mapping
 from typing import Any
 from unittest.mock import Mock

@@ -12,24 +11,17 @@ import pytest

 from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter
 from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity
-from core.app.entities.queue_entities import QueueNodeRetryEvent, QueueNodeSucceededEvent
-from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution, WorkflowNodeExecutionStatus
+from core.app.entities.queue_entities import (
+    QueueNodeRetryEvent,
+    QueueNodeStartedEvent,
+    QueueNodeSucceededEvent,
+)
 from core.workflow.enums import NodeType
+from core.workflow.system_variable import SystemVariable
 from libs.datetime_utils import naive_utc_now
 from models import Account


-@dataclass
-class ProcessDataResponseScenario:
-    """Test scenario for process_data in responses."""
-
-    name: str
-    original_process_data: dict[str, Any] | None
-    truncated_process_data: dict[str, Any] | None
-    expected_response_data: dict[str, Any] | None
-    expected_truncated_flag: bool
-
-
 class TestWorkflowResponseConverterCenarios:
    """Test process_data truncation in WorkflowResponseConverter."""

@@ -39,6 +31,7 @@ class TestWorkflowResponseConverterCenarios:
        mock_app_config = Mock()
        mock_app_config.tenant_id = "test-tenant-id"
        mock_entity.app_config = mock_app_config
+        mock_entity.inputs = {}
        return mock_entity

    def create_workflow_response_converter(self) -> WorkflowResponseConverter:
@@ -50,54 +43,59 @@ class TestWorkflowResponseConverterCenarios:
        mock_user.name = "Test User"
        mock_user.email = "test@example.com"

-        return WorkflowResponseConverter(application_generate_entity=mock_entity, user=mock_user)
-
-    def create_workflow_node_execution(
-        self,
-        process_data: dict[str, Any] | None = None,
-        truncated_process_data: dict[str, Any] | None = None,
-        execution_id: str = "test-execution-id",
-    ) -> WorkflowNodeExecution:
-        """Create a WorkflowNodeExecution for testing."""
-        execution = WorkflowNodeExecution(
-            id=execution_id,
-            workflow_id="test-workflow-id",
-            workflow_execution_id="test-run-id",
-            index=1,
-            node_id="test-node-id",
-            node_type=NodeType.LLM,
-            title="Test Node",
-            process_data=process_data,
-            status=WorkflowNodeExecutionStatus.SUCCEEDED,
-            created_at=datetime.now(),
-            finished_at=datetime.now(),
+        system_variables = SystemVariable(workflow_id="wf-id", workflow_execution_id="initial-run-id")
+        return WorkflowResponseConverter(
+            application_generate_entity=mock_entity,
+            user=mock_user,
+            system_variables=system_variables,
        )

-        if truncated_process_data is not None:
-            execution.set_truncated_process_data(truncated_process_data)
+    def create_node_started_event(self, *, node_execution_id: str | None = None) -> QueueNodeStartedEvent:
+        """Create a QueueNodeStartedEvent for testing."""
+        return QueueNodeStartedEvent(
+            node_execution_id=node_execution_id or str(uuid.uuid4()),
+            node_id="test-node-id",
+            node_title="Test Node",
+            node_type=NodeType.CODE,
+            start_at=naive_utc_now(),
+            predecessor_node_id=None,
+            in_iteration_id=None,
+            in_loop_id=None,
+            provider_type="built-in",
+            provider_id="code",
+        )

-        return execution
-
-    def create_node_succeeded_event(self) -> QueueNodeSucceededEvent:
+    def create_node_succeeded_event(
+        self,
+        *,
+        node_execution_id: str,
+        process_data: Mapping[str, Any] | None = None,
+    ) -> QueueNodeSucceededEvent:
        """Create a QueueNodeSucceededEvent for testing."""
        return QueueNodeSucceededEvent(
            node_id="test-node-id",
            node_type=NodeType.CODE,
-            node_execution_id=str(uuid.uuid4()),
+            node_execution_id=node_execution_id,
            start_at=naive_utc_now(),
-            parallel_id=None,
-            parallel_start_node_id=None,
-            parent_parallel_id=None,
-            parent_parallel_start_node_id=None,
            in_iteration_id=None,
            in_loop_id=None,
+            inputs={},
+            process_data=process_data or {},
+            outputs={},
+            execution_metadata={},
        )

-    def create_node_retry_event(self) -> QueueNodeRetryEvent:
+    def create_node_retry_event(
+        self,
+        *,
+        node_execution_id: str,
+        process_data: Mapping[str, Any] | None = None,
+    ) -> QueueNodeRetryEvent:
        """Create a QueueNodeRetryEvent for testing."""
        return QueueNodeRetryEvent(
            inputs={"data": "inputs"},
            outputs={"data": "outputs"},
+            process_data=process_data or {},
            error="oops",
            retry_index=1,
            node_id="test-node-id",
@@ -105,12 +103,8 @@ class TestWorkflowResponseConverterCenarios:
            node_title="test code",
            provider_type="built-in",
            provider_id="code",
-            node_execution_id=str(uuid.uuid4()),
+            node_execution_id=node_execution_id,
            start_at=naive_utc_now(),
-            parallel_id=None,
-            parallel_start_node_id=None,
-            parent_parallel_id=None,
-            parent_parallel_start_node_id=None,
            in_iteration_id=None,
            in_loop_id=None,
        )
@@ -122,15 +116,28 @@ class TestWorkflowResponseConverterCenarios:
        original_data = {"large_field": "x" * 10000, "metadata": "info"}
        truncated_data = {"large_field": "[TRUNCATED]", "metadata": "info"}

-        execution = self.create_workflow_node_execution(
-            process_data=original_data, truncated_process_data=truncated_data
+        converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id")
+        start_event = self.create_node_started_event()
+        converter.workflow_node_start_to_stream_response(
+            event=start_event,
+            task_id="test-task-id",
        )
-        event = self.create_node_succeeded_event()
+
+        event = self.create_node_succeeded_event(
+            node_execution_id=start_event.node_execution_id,
+            process_data=original_data,
+        )
+
+        def fake_truncate(mapping):
+            if mapping == dict(original_data):
+                return truncated_data, True
+            return mapping, False
+
+        converter._truncator.truncate_variable_mapping = fake_truncate  # type: ignore[assignment]

        response = converter.workflow_node_finish_to_stream_response(
            event=event,
            task_id="test-task-id",
-            workflow_node_execution=execution,
        )

        # Response should use truncated data, not original
@@ -145,13 +152,26 @@ class TestWorkflowResponseConverterCenarios:

        original_data = {"small": "data"}

-        execution = self.create_workflow_node_execution(process_data=original_data)
-        event = self.create_node_succeeded_event()
+        converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id")
+        start_event = self.create_node_started_event()
+        converter.workflow_node_start_to_stream_response(
+            event=start_event,
+            task_id="test-task-id",
+        )
+
+        event = self.create_node_succeeded_event(
+            node_execution_id=start_event.node_execution_id,
+            process_data=original_data,
+        )
+
+        def fake_truncate(mapping):
+            return mapping, False
+
+        converter._truncator.truncate_variable_mapping = fake_truncate  # type: ignore[assignment]

        response = converter.workflow_node_finish_to_stream_response(
            event=event,
            task_id="test-task-id",
-            workflow_node_execution=execution,
        )

        # Response should use original data
@@ -163,18 +183,31 @@ class TestWorkflowResponseConverterCenarios:
        """Test node finish response when process_data is None."""
        converter = self.create_workflow_response_converter()

-        execution = self.create_workflow_node_execution(process_data=None)
-        event = self.create_node_succeeded_event()
+        converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id")
+        start_event = self.create_node_started_event()
+        converter.workflow_node_start_to_stream_response(
+            event=start_event,
+            task_id="test-task-id",
+        )
+
+        event = self.create_node_succeeded_event(
+            node_execution_id=start_event.node_execution_id,
+            process_data=None,
+        )
+
+        def fake_truncate(mapping):
+            return mapping, False
+
+        converter._truncator.truncate_variable_mapping = fake_truncate  # type: ignore[assignment]

        response = converter.workflow_node_finish_to_stream_response(
            event=event,
            task_id="test-task-id",
-            workflow_node_execution=execution,
        )

-        # Response should have None process_data
+        # Response should normalize missing process_data to an empty mapping
        assert response is not None
-        assert response.data.process_data is None
+        assert response.data.process_data == {}
        assert response.data.process_data_truncated is False

    def test_workflow_node_retry_response_uses_truncated_process_data(self):
@@ -184,15 +217,28 @@ class TestWorkflowResponseConverterCenarios:
        original_data = {"large_field": "x" * 10000, "metadata": "info"}
        truncated_data = {"large_field": "[TRUNCATED]", "metadata": "info"}

-        execution = self.create_workflow_node_execution(
-            process_data=original_data, truncated_process_data=truncated_data
+        converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id")
+        start_event = self.create_node_started_event()
+        converter.workflow_node_start_to_stream_response(
+            event=start_event,
+            task_id="test-task-id",
        )
-        event = self.create_node_retry_event()
+
+        event = self.create_node_retry_event(
+            node_execution_id=start_event.node_execution_id,
+            process_data=original_data,
+        )
+
+        def fake_truncate(mapping):
+            if mapping == dict(original_data):
+                return truncated_data, True
+            return mapping, False
+
+        converter._truncator.truncate_variable_mapping = fake_truncate  # type: ignore[assignment]

        response = converter.workflow_node_retry_to_stream_response(
            event=event,
            task_id="test-task-id",
-            workflow_node_execution=execution,
        )

        # Response should use truncated data, not original
@@ -207,224 +253,72 @@ class TestWorkflowResponseConverterCenarios:

        original_data = {"small": "data"}

-        execution = self.create_workflow_node_execution(process_data=original_data)
-        event = self.create_node_retry_event()
+        converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id")
+        start_event = self.create_node_started_event()
+        converter.workflow_node_start_to_stream_response(
+            event=start_event,
+            task_id="test-task-id",
+        )
+
+        event = self.create_node_retry_event(
+            node_execution_id=start_event.node_execution_id,
+            process_data=original_data,
+        )
+
+        def fake_truncate(mapping):
+            return mapping, False
+
+        converter._truncator.truncate_variable_mapping = fake_truncate  # type: ignore[assignment]

        response = converter.workflow_node_retry_to_stream_response(
            event=event,
            task_id="test-task-id",
-            workflow_node_execution=execution,
        )

-        # Response should use original data
        assert response is not None
        assert response.data.process_data == original_data
        assert response.data.process_data_truncated is False

    def test_iteration_and_loop_nodes_return_none(self):
-        """Test that iteration and loop nodes return None (no change from existing behavior)."""
+        """Test that iteration and loop nodes return None (no streaming events)."""
        converter = self.create_workflow_response_converter()

-        # Test iteration node
-        iteration_execution = self.create_workflow_node_execution(process_data={"test": "data"})
-        iteration_execution.node_type = NodeType.ITERATION
-
-        event = self.create_node_succeeded_event()
-
-        response = converter.workflow_node_finish_to_stream_response(
-            event=event,
-            task_id="test-task-id",
-            workflow_node_execution=iteration_execution,
-        )
-
-        # Should return None for iteration nodes
-        assert response is None
-
-        # Test loop node
-        loop_execution = self.create_workflow_node_execution(process_data={"test": "data"})
-        loop_execution.node_type = NodeType.LOOP
-
-        response = converter.workflow_node_finish_to_stream_response(
-            event=event,
-            task_id="test-task-id",
-            workflow_node_execution=loop_execution,
-        )
-
-        # Should return None for loop nodes
-        assert response is None
-
-    def test_execution_without_workflow_execution_id_returns_none(self):
-        """Test that executions without workflow_execution_id return None."""
-        converter = self.create_workflow_response_converter()
-
-        execution = self.create_workflow_node_execution(process_data={"test": "data"})
-        execution.workflow_execution_id = None  # Single-step debugging
-
-        event = self.create_node_succeeded_event()
-
-        response = converter.workflow_node_finish_to_stream_response(
-            event=event,
-            task_id="test-task-id",
-            workflow_node_execution=execution,
-        )
-
-        # Should return None for single-step debugging
-        assert response is None
-
-    @staticmethod
-    def get_process_data_response_scenarios() -> list[ProcessDataResponseScenario]:
-        """Create test scenarios for process_data responses."""
-        return [
-            ProcessDataResponseScenario(
-                name="none_process_data",
-                original_process_data=None,
-                truncated_process_data=None,
-                expected_response_data=None,
-                expected_truncated_flag=False,
-            ),
-            ProcessDataResponseScenario(
-                name="small_process_data_no_truncation",
-                original_process_data={"small": "data"},
-                truncated_process_data=None,
-                expected_response_data={"small": "data"},
-                expected_truncated_flag=False,
-            ),
-            ProcessDataResponseScenario(
-                name="large_process_data_with_truncation",
-                original_process_data={"large": "x" * 10000, "metadata": "info"},
-                truncated_process_data={"large": "[TRUNCATED]", "metadata": "info"},
-                expected_response_data={"large": "[TRUNCATED]", "metadata": "info"},
-                expected_truncated_flag=True,
-            ),
-            ProcessDataResponseScenario(
-                name="empty_process_data",
-                original_process_data={},
-                truncated_process_data=None,
-                expected_response_data={},
-                expected_truncated_flag=False,
-            ),
-            ProcessDataResponseScenario(
-                name="complex_data_with_truncation",
-                original_process_data={
-                    "logs": ["entry"] * 1000,  # Large array
-                    "config": {"setting": "value"},
-                    "status": "processing",
-                },
-                truncated_process_data={
-                    "logs": "[TRUNCATED: 1000 items]",
-                    "config": {"setting": "value"},
-                    "status": "processing",
-                },
-                expected_response_data={
-                    "logs": "[TRUNCATED: 1000 items]",
-                    "config": {"setting": "value"},
-                    "status": "processing",
-                },
-                expected_truncated_flag=True,
-            ),
-        ]
-
-    @pytest.mark.parametrize(
-        "scenario",
-        get_process_data_response_scenarios(),
-        ids=[scenario.name for scenario in get_process_data_response_scenarios()],
-    )
-    def test_node_finish_response_scenarios(self, scenario: ProcessDataResponseScenario):
-        """Test various scenarios for node finish responses."""
-
-        mock_user = Mock(spec=Account)
-        mock_user.id = "test-user-id"
-        mock_user.name = "Test User"
-        mock_user.email = "test@example.com"
-
-        converter = WorkflowResponseConverter(
-            application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant")),
-            user=mock_user,
-        )
-
-        execution = WorkflowNodeExecution(
-            id="test-execution-id",
-            workflow_id="test-workflow-id",
-            workflow_execution_id="test-run-id",
-            index=1,
-            node_id="test-node-id",
-            node_type=NodeType.LLM,
-            title="Test Node",
-            process_data=scenario.original_process_data,
-            status=WorkflowNodeExecutionStatus.SUCCEEDED,
-            created_at=datetime.now(),
-            finished_at=datetime.now(),
-        )
-
-        if scenario.truncated_process_data is not None:
-            execution.set_truncated_process_data(scenario.truncated_process_data)
-
-        event = QueueNodeSucceededEvent(
-            node_id="test-node-id",
-            node_type=NodeType.CODE,
+        iteration_event = QueueNodeSucceededEvent(
+            node_id="iteration-node",
+            node_type=NodeType.ITERATION,
            node_execution_id=str(uuid.uuid4()),
            start_at=naive_utc_now(),
-            parallel_id=None,
-            parallel_start_node_id=None,
-            parent_parallel_id=None,
-            parent_parallel_start_node_id=None,
            in_iteration_id=None,
            in_loop_id=None,
+            inputs={},
+            process_data={},
+            outputs={},
+            execution_metadata={},
        )

        response = converter.workflow_node_finish_to_stream_response(
-            event=event,
+            event=iteration_event,
            task_id="test-task-id",
-            workflow_node_execution=execution,
        )
+        assert response is None

-        assert response is not None
-        assert response.data.process_data == scenario.expected_response_data
-        assert response.data.process_data_truncated == scenario.expected_truncated_flag
-
-    @pytest.mark.parametrize(
-        "scenario",
-        get_process_data_response_scenarios(),
-        ids=[scenario.name for scenario in get_process_data_response_scenarios()],
-    )
-    def test_node_retry_response_scenarios(self, scenario: ProcessDataResponseScenario):
-        """Test various scenarios for node retry responses."""
-
-        mock_user = Mock(spec=Account)
-        mock_user.id = "test-user-id"
-        mock_user.name = "Test User"
-        mock_user.email = "test@example.com"
-
-        converter = WorkflowResponseConverter(
-            application_generate_entity=Mock(spec=WorkflowAppGenerateEntity, app_config=Mock(tenant_id="test-tenant")),
-            user=mock_user,
-        )
-
-        execution = WorkflowNodeExecution(
-            id="test-execution-id",
-            workflow_id="test-workflow-id",
-            workflow_execution_id="test-run-id",
-            index=1,
-            node_id="test-node-id",
-            node_type=NodeType.LLM,
-            title="Test Node",
-            process_data=scenario.original_process_data,
-            status=WorkflowNodeExecutionStatus.FAILED,  # Retry scenario
-            created_at=datetime.now(),
-            finished_at=datetime.now(),
-        )
-
-        if scenario.truncated_process_data is not None:
-            execution.set_truncated_process_data(scenario.truncated_process_data)
-
-        event = self.create_node_retry_event()
-
-        response = converter.workflow_node_retry_to_stream_response(
-            event=event,
+        loop_event = iteration_event.model_copy(update={"node_type": NodeType.LOOP})
+        response = converter.workflow_node_finish_to_stream_response(
+            event=loop_event,
            task_id="test-task-id",
-            workflow_node_execution=execution,
+        )
+        assert response is None
+
+    def test_finish_without_start_raises(self):
+        """Ensure finish responses require a prior workflow start."""
+        converter = self.create_workflow_response_converter()
+        event = self.create_node_succeeded_event(
+            node_execution_id=str(uuid.uuid4()),
+            process_data={},
        )

-        assert response is not None
-        assert response.data.process_data == scenario.expected_response_data
-        assert response.data.process_data_truncated == scenario.expected_truncated_flag
+        with pytest.raises(ValueError):
+            converter.workflow_node_finish_to_stream_response(
+                event=event,
+                task_id="test-task-id",
+            )