feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -0,0 +1,212 @@
"""
Unit tests for WorkflowNodeExecutionOffload model, focusing on process_data truncation functionality.
"""
from unittest.mock import Mock
import pytest
from models.model import UploadFile
from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload
class TestWorkflowNodeExecutionModel:
"""Test WorkflowNodeExecutionModel with process_data truncation features."""
def create_mock_offload_data(
self,
inputs_file_id: str | None = None,
outputs_file_id: str | None = None,
process_data_file_id: str | None = None,
) -> WorkflowNodeExecutionOffload:
"""Create a mock offload data object."""
offload = Mock(spec=WorkflowNodeExecutionOffload)
offload.inputs_file_id = inputs_file_id
offload.outputs_file_id = outputs_file_id
offload.process_data_file_id = process_data_file_id
# Mock file objects
if inputs_file_id:
offload.inputs_file = Mock(spec=UploadFile)
else:
offload.inputs_file = None
if outputs_file_id:
offload.outputs_file = Mock(spec=UploadFile)
else:
offload.outputs_file = None
if process_data_file_id:
offload.process_data_file = Mock(spec=UploadFile)
else:
offload.process_data_file = None
return offload
def test_process_data_truncated_property_false_when_no_offload_data(self):
"""Test process_data_truncated returns False when no offload_data."""
execution = WorkflowNodeExecutionModel()
execution.offload_data = []
assert execution.process_data_truncated is False
def test_process_data_truncated_property_false_when_no_process_data_file(self):
"""Test process_data_truncated returns False when no process_data file."""
from models.enums import ExecutionOffLoadType
execution = WorkflowNodeExecutionModel()
# Create real offload instances for inputs and outputs but not process_data
inputs_offload = WorkflowNodeExecutionOffload()
inputs_offload.type_ = ExecutionOffLoadType.INPUTS
inputs_offload.file_id = "inputs-file"
outputs_offload = WorkflowNodeExecutionOffload()
outputs_offload.type_ = ExecutionOffLoadType.OUTPUTS
outputs_offload.file_id = "outputs-file"
execution.offload_data = [inputs_offload, outputs_offload]
assert execution.process_data_truncated is False
def test_process_data_truncated_property_true_when_process_data_file_exists(self):
"""Test process_data_truncated returns True when process_data file exists."""
from models.enums import ExecutionOffLoadType
execution = WorkflowNodeExecutionModel()
# Create a real offload instance for process_data
process_data_offload = WorkflowNodeExecutionOffload()
process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA
process_data_offload.file_id = "process-data-file-id"
execution.offload_data = [process_data_offload]
assert execution.process_data_truncated is True
def test_load_full_process_data_with_no_offload_data(self):
"""Test load_full_process_data when no offload data exists."""
execution = WorkflowNodeExecutionModel()
execution.offload_data = []
execution.process_data = '{"test": "data"}'
# Mock session and storage
mock_session = Mock()
mock_storage = Mock()
result = execution.load_full_process_data(mock_session, mock_storage)
assert result == {"test": "data"}
def test_load_full_process_data_with_no_file(self):
"""Test load_full_process_data when no process_data file exists."""
from models.enums import ExecutionOffLoadType
execution = WorkflowNodeExecutionModel()
# Create offload data for inputs only, not process_data
inputs_offload = WorkflowNodeExecutionOffload()
inputs_offload.type_ = ExecutionOffLoadType.INPUTS
inputs_offload.file_id = "inputs-file"
execution.offload_data = [inputs_offload]
execution.process_data = '{"test": "data"}'
# Mock session and storage
mock_session = Mock()
mock_storage = Mock()
result = execution.load_full_process_data(mock_session, mock_storage)
assert result == {"test": "data"}
def test_load_full_process_data_with_file(self):
"""Test load_full_process_data when process_data file exists."""
from models.enums import ExecutionOffLoadType
execution = WorkflowNodeExecutionModel()
# Create process_data offload
process_data_offload = WorkflowNodeExecutionOffload()
process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA
process_data_offload.file_id = "file-id"
execution.offload_data = [process_data_offload]
execution.process_data = '{"truncated": "data"}'
# Mock session and storage
mock_session = Mock()
mock_storage = Mock()
# Mock the _load_full_content method to return full data
full_process_data = {"full": "data", "large_field": "x" * 10000}
with pytest.MonkeyPatch.context() as mp:
# Mock the _load_full_content method
def mock_load_full_content(session, file_id, storage):
assert session == mock_session
assert file_id == "file-id"
assert storage == mock_storage
return full_process_data
mp.setattr(execution, "_load_full_content", mock_load_full_content)
result = execution.load_full_process_data(mock_session, mock_storage)
assert result == full_process_data
def test_consistency_with_inputs_outputs_truncation(self):
"""Test that process_data truncation behaves consistently with inputs/outputs."""
from models.enums import ExecutionOffLoadType
execution = WorkflowNodeExecutionModel()
# Create offload data for all three types
inputs_offload = WorkflowNodeExecutionOffload()
inputs_offload.type_ = ExecutionOffLoadType.INPUTS
inputs_offload.file_id = "inputs-file"
outputs_offload = WorkflowNodeExecutionOffload()
outputs_offload.type_ = ExecutionOffLoadType.OUTPUTS
outputs_offload.file_id = "outputs-file"
process_data_offload = WorkflowNodeExecutionOffload()
process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA
process_data_offload.file_id = "process-data-file"
execution.offload_data = [inputs_offload, outputs_offload, process_data_offload]
# All three should be truncated
assert execution.inputs_truncated is True
assert execution.outputs_truncated is True
assert execution.process_data_truncated is True
def test_mixed_truncation_states(self):
"""Test mixed states of truncation."""
from models.enums import ExecutionOffLoadType
execution = WorkflowNodeExecutionModel()
# Only process_data is truncated
process_data_offload = WorkflowNodeExecutionOffload()
process_data_offload.type_ = ExecutionOffLoadType.PROCESS_DATA
process_data_offload.file_id = "process-data-file"
execution.offload_data = [process_data_offload]
assert execution.inputs_truncated is False
assert execution.outputs_truncated is False
assert execution.process_data_truncated is True
def test_preload_offload_data_and_files_method_exists(self):
"""Test that the preload method includes process_data_file."""
# This test verifies the method exists and can be called
# The actual SQL behavior would be tested in integration tests
from sqlalchemy import select
stmt = select(WorkflowNodeExecutionModel)
# This should not raise an exception
preloaded_stmt = WorkflowNodeExecutionModel.preload_offload_data_and_files(stmt)
# The statement should be modified (different object)
assert preloaded_stmt is not stmt