feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -3,6 +3,7 @@ Unit tests for the SQLAlchemy implementation of WorkflowNodeExecutionRepository.
"""
import json
import uuid
from datetime import datetime
from decimal import Decimal
from unittest.mock import MagicMock, PropertyMock
@@ -13,12 +14,14 @@ from sqlalchemy.orm import Session, sessionmaker
from core.model_runtime.utils.encoders import jsonable_encoder
from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
from core.workflow.entities.workflow_node_execution import (
from core.workflow.entities import (
WorkflowNodeExecution,
)
from core.workflow.enums import (
NodeType,
WorkflowNodeExecutionMetadataKey,
WorkflowNodeExecutionStatus,
)
from core.workflow.nodes.enums import NodeType
from core.workflow.repositories.workflow_node_execution_repository import OrderConfig
from models.account import Account, Tenant
from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom
@@ -85,7 +88,7 @@ def test_save(repository, session):
"""Test save method."""
session_obj, _ = session
# Create a mock execution
execution = MagicMock(spec=WorkflowNodeExecutionModel)
execution = MagicMock(spec=WorkflowNodeExecution)
execution.id = "test-id"
execution.node_execution_id = "test-node-execution-id"
execution.tenant_id = None
@@ -94,13 +97,14 @@ def test_save(repository, session):
execution.process_data = None
execution.outputs = None
execution.metadata = None
execution.workflow_id = str(uuid.uuid4())
# Mock the to_db_model method to return the execution itself
# This simulates the behavior of setting tenant_id and app_id
db_model = MagicMock(spec=WorkflowNodeExecutionModel)
db_model.id = "test-id"
db_model.node_execution_id = "test-node-execution-id"
repository.to_db_model = MagicMock(return_value=db_model)
repository._to_db_model = MagicMock(return_value=db_model)
# Mock session.get to return None (no existing record)
session_obj.get.return_value = None
@@ -109,7 +113,7 @@ def test_save(repository, session):
repository.save(execution)
# Assert to_db_model was called with the execution
repository.to_db_model.assert_called_once_with(execution)
repository._to_db_model.assert_called_once_with(execution)
# Assert session.get was called to check for existing record
session_obj.get.assert_called_once_with(WorkflowNodeExecutionModel, db_model.id)
@@ -150,7 +154,7 @@ def test_save_with_existing_tenant_id(repository, session):
}
# Mock the to_db_model method to return the modified execution
repository.to_db_model = MagicMock(return_value=modified_execution)
repository._to_db_model = MagicMock(return_value=modified_execution)
# Mock session.get to return an existing record
existing_model = MagicMock(spec=WorkflowNodeExecutionModel)
@@ -160,7 +164,7 @@ def test_save_with_existing_tenant_id(repository, session):
repository.save(execution)
# Assert to_db_model was called with the execution
repository.to_db_model.assert_called_once_with(execution)
repository._to_db_model.assert_called_once_with(execution)
# Assert session.get was called to check for existing record
session_obj.get.assert_called_once_with(WorkflowNodeExecutionModel, modified_execution.id)
@@ -177,10 +181,19 @@ def test_get_by_workflow_run(repository, session, mocker: MockerFixture):
session_obj, _ = session
# Set up mock
mock_select = mocker.patch("core.repositories.sqlalchemy_workflow_node_execution_repository.select")
mock_asc = mocker.patch("core.repositories.sqlalchemy_workflow_node_execution_repository.asc")
mock_desc = mocker.patch("core.repositories.sqlalchemy_workflow_node_execution_repository.desc")
mock_WorkflowNodeExecutionModel = mocker.patch(
"core.repositories.sqlalchemy_workflow_node_execution_repository.WorkflowNodeExecutionModel"
)
mock_stmt = mocker.MagicMock()
mock_select.return_value = mock_stmt
mock_stmt.where.return_value = mock_stmt
mock_stmt.order_by.return_value = mock_stmt
mock_asc.return_value = mock_stmt
mock_desc.return_value = mock_stmt
mock_WorkflowNodeExecutionModel.preload_offload_data_and_files.return_value = mock_stmt
# Create a properly configured mock execution
mock_execution = mocker.MagicMock(spec=WorkflowNodeExecutionModel)
@@ -199,6 +212,7 @@ def test_get_by_workflow_run(repository, session, mocker: MockerFixture):
# Assert select was called with correct parameters
mock_select.assert_called_once()
session_obj.scalars.assert_called_once_with(mock_stmt)
mock_WorkflowNodeExecutionModel.preload_offload_data_and_files.assert_called_once_with(mock_stmt)
# Assert _to_domain_model was called with the mock execution
repository._to_domain_model.assert_called_once_with(mock_execution)
# Assert the result contains our mock domain model
@@ -234,7 +248,7 @@ def test_to_db_model(repository):
)
# Convert to DB model
db_model = repository.to_db_model(domain_model)
db_model = repository._to_db_model(domain_model)
# Assert DB model has correct values
assert isinstance(db_model, WorkflowNodeExecutionModel)

View File

@@ -0,0 +1,106 @@
"""
Unit tests for SQLAlchemyWorkflowNodeExecutionRepository, focusing on process_data truncation functionality.
"""
from datetime import datetime
from typing import Any
from unittest.mock import MagicMock, Mock
from sqlalchemy.orm import sessionmaker
from core.repositories.sqlalchemy_workflow_node_execution_repository import (
SQLAlchemyWorkflowNodeExecutionRepository,
)
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution
from core.workflow.enums import NodeType
from models import Account, WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom
class TestSQLAlchemyWorkflowNodeExecutionRepositoryProcessData:
"""Test process_data truncation functionality in SQLAlchemyWorkflowNodeExecutionRepository."""
def create_mock_account(self) -> Account:
"""Create a mock Account for testing."""
account = Mock(spec=Account)
account.id = "test-user-id"
account.tenant_id = "test-tenant-id"
return account
def create_mock_session_factory(self) -> sessionmaker:
"""Create a mock session factory for testing."""
mock_session = MagicMock()
mock_session_factory = MagicMock(spec=sessionmaker)
mock_session_factory.return_value.__enter__.return_value = mock_session
mock_session_factory.return_value.__exit__.return_value = None
return mock_session_factory
def create_repository(self, mock_file_service=None) -> SQLAlchemyWorkflowNodeExecutionRepository:
"""Create a repository instance for testing."""
mock_account = self.create_mock_account()
mock_session_factory = self.create_mock_session_factory()
repository = SQLAlchemyWorkflowNodeExecutionRepository(
session_factory=mock_session_factory,
user=mock_account,
app_id="test-app-id",
triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
)
if mock_file_service:
repository._file_service = mock_file_service
return repository
def create_workflow_node_execution(
self,
process_data: dict[str, Any] | None = None,
execution_id: str = "test-execution-id",
) -> WorkflowNodeExecution:
"""Create a WorkflowNodeExecution instance for testing."""
return WorkflowNodeExecution(
id=execution_id,
workflow_id="test-workflow-id",
index=1,
node_id="test-node-id",
node_type=NodeType.LLM,
title="Test Node",
process_data=process_data,
created_at=datetime.now(),
)
def test_to_domain_model_without_offload_data(self):
"""Test _to_domain_model without offload data."""
repository = self.create_repository()
# Create mock database model without offload data
db_model = Mock(spec=WorkflowNodeExecutionModel)
db_model.id = "test-execution-id"
db_model.node_execution_id = "test-node-execution-id"
db_model.workflow_id = "test-workflow-id"
db_model.workflow_run_id = None
db_model.index = 1
db_model.predecessor_node_id = None
db_model.node_id = "test-node-id"
db_model.node_type = "llm"
db_model.title = "Test Node"
db_model.status = "succeeded"
db_model.error = None
db_model.elapsed_time = 1.5
db_model.created_at = datetime.now()
db_model.finished_at = None
process_data = {"normal": "data"}
db_model.process_data_dict = process_data
db_model.inputs_dict = None
db_model.outputs_dict = None
db_model.execution_metadata_dict = {}
db_model.offload_data = None
domain_model = repository._to_domain_model(db_model)
# Domain model should have the data from database
assert domain_model.process_data == process_data
# Should not be truncated
assert domain_model.process_data_truncated is False
assert domain_model.get_truncated_process_data() is None