feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -3,16 +3,27 @@ import unittest
import uuid
import pytest
from sqlalchemy import delete
from sqlalchemy.orm import Session
from core.variables.segments import StringSegment
from core.variables.types import SegmentType
from core.variables.variables import StringVariable
from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID
from core.workflow.nodes import NodeType
from extensions.ext_database import db
from extensions.ext_storage import storage
from factories.variable_factory import build_segment
from libs import datetime_utils
from models import db
from models.workflow import Workflow, WorkflowDraftVariable, WorkflowNodeExecutionModel
from services.workflow_draft_variable_service import DraftVarLoader, VariableResetError, WorkflowDraftVariableService
from models.enums import CreatorUserRole
from models.model import UploadFile
from models.workflow import Workflow, WorkflowDraftVariable, WorkflowDraftVariableFile, WorkflowNodeExecutionModel
from services.workflow_draft_variable_service import (
DraftVariableSaver,
DraftVarLoader,
VariableResetError,
WorkflowDraftVariableService,
)
@pytest.mark.usefixtures("flask_req_ctx")
@@ -175,6 +186,23 @@ class TestDraftVariableLoader(unittest.TestCase):
_node1_id = "test_loader_node_1"
_node_exec_id = str(uuid.uuid4())
# @pytest.fixture
# def test_app_id(self):
# return str(uuid.uuid4())
# @pytest.fixture
# def test_tenant_id(self):
# return str(uuid.uuid4())
# @pytest.fixture
# def session(self):
# with Session(bind=db.engine, expire_on_commit=False) as session:
# yield session
# @pytest.fixture
# def node_var(self, session):
# pass
def setUp(self):
self._test_app_id = str(uuid.uuid4())
self._test_tenant_id = str(uuid.uuid4())
@@ -241,6 +269,246 @@ class TestDraftVariableLoader(unittest.TestCase):
node1_var = next(v for v in variables if v.selector[0] == self._node1_id)
assert node1_var.id == self._node_var_id
@pytest.mark.usefixtures("setup_account")
def test_load_offloaded_variable_string_type_integration(self, setup_account):
"""Test _load_offloaded_variable with string type using DraftVariableSaver for data creation."""
# Create a large string that will be offloaded
test_content = "x" * 15000 # Create a string larger than LARGE_VARIABLE_THRESHOLD (10KB)
large_string_segment = StringSegment(value=test_content)
node_execution_id = str(uuid.uuid4())
try:
with Session(bind=db.engine, expire_on_commit=False) as session:
# Use DraftVariableSaver to create offloaded variable (this mimics production)
saver = DraftVariableSaver(
session=session,
app_id=self._test_app_id,
node_id="test_offload_node",
node_type=NodeType.LLM, # Use a real node type
node_execution_id=node_execution_id,
user=setup_account,
)
# Save the variable - this will trigger offloading due to large size
saver.save(outputs={"offloaded_string_var": large_string_segment})
session.commit()
# Now test loading using DraftVarLoader
var_loader = DraftVarLoader(engine=db.engine, app_id=self._test_app_id, tenant_id=self._test_tenant_id)
# Load the variable using the standard workflow
variables = var_loader.load_variables([["test_offload_node", "offloaded_string_var"]])
# Verify results
assert len(variables) == 1
loaded_variable = variables[0]
assert loaded_variable.name == "offloaded_string_var"
assert loaded_variable.selector == ["test_offload_node", "offloaded_string_var"]
assert isinstance(loaded_variable.value, StringSegment)
assert loaded_variable.value.value == test_content
finally:
# Clean up - delete all draft variables for this app
with Session(bind=db.engine) as session:
service = WorkflowDraftVariableService(session)
service.delete_workflow_variables(self._test_app_id)
session.commit()
def test_load_offloaded_variable_object_type_integration(self):
"""Test _load_offloaded_variable with object type using real storage and service."""
# Create a test object
test_object = {"key1": "value1", "key2": 42, "nested": {"inner": "data"}}
test_json = json.dumps(test_object, ensure_ascii=False, separators=(",", ":"))
content_bytes = test_json.encode()
# Create an upload file record
upload_file = UploadFile(
tenant_id=self._test_tenant_id,
storage_type="local",
key=f"test_offload_{uuid.uuid4()}.json",
name="test_offload.json",
size=len(content_bytes),
extension="json",
mime_type="application/json",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=str(uuid.uuid4()),
created_at=datetime_utils.naive_utc_now(),
used=True,
used_by=str(uuid.uuid4()),
used_at=datetime_utils.naive_utc_now(),
)
# Store the content in storage
storage.save(upload_file.key, content_bytes)
# Create a variable file record
variable_file = WorkflowDraftVariableFile(
upload_file_id=upload_file.id,
value_type=SegmentType.OBJECT,
tenant_id=self._test_tenant_id,
app_id=self._test_app_id,
user_id=str(uuid.uuid4()),
size=len(content_bytes),
created_at=datetime_utils.naive_utc_now(),
)
try:
with Session(bind=db.engine, expire_on_commit=False) as session:
# Add upload file and variable file first to get their IDs
session.add_all([upload_file, variable_file])
session.flush() # This generates the IDs
# Now create the offloaded draft variable with the correct file_id
offloaded_var = WorkflowDraftVariable.new_node_variable(
app_id=self._test_app_id,
node_id="test_offload_node",
name="offloaded_object_var",
value=build_segment({"truncated": True}),
visible=True,
node_execution_id=str(uuid.uuid4()),
)
offloaded_var.file_id = variable_file.id
session.add(offloaded_var)
session.flush()
session.commit()
# Use the service method that properly preloads relationships
service = WorkflowDraftVariableService(session)
draft_vars = service.get_draft_variables_by_selectors(
self._test_app_id, [["test_offload_node", "offloaded_object_var"]]
)
assert len(draft_vars) == 1
loaded_var = draft_vars[0]
assert loaded_var.is_truncated()
# Create DraftVarLoader and test loading
var_loader = DraftVarLoader(engine=db.engine, app_id=self._test_app_id, tenant_id=self._test_tenant_id)
# Test the _load_offloaded_variable method
selector_tuple, variable = var_loader._load_offloaded_variable(loaded_var)
# Verify the results
assert selector_tuple == ("test_offload_node", "offloaded_object_var")
assert variable.id == loaded_var.id
assert variable.name == "offloaded_object_var"
assert variable.value.value == test_object
finally:
# Clean up
with Session(bind=db.engine) as session:
# Query and delete by ID to ensure they're tracked in this session
session.query(WorkflowDraftVariable).filter_by(id=offloaded_var.id).delete()
session.query(WorkflowDraftVariableFile).filter_by(id=variable_file.id).delete()
session.query(UploadFile).filter_by(id=upload_file.id).delete()
session.commit()
# Clean up storage
try:
storage.delete(upload_file.key)
except Exception:
pass # Ignore cleanup failures
def test_load_variables_with_offloaded_variables_integration(self):
"""Test load_variables method with mix of regular and offloaded variables using real storage."""
# Create a regular variable (already exists from setUp)
# Create offloaded variable content
test_content = "This is offloaded content for integration test"
content_bytes = test_content.encode()
# Create upload file record
upload_file = UploadFile(
tenant_id=self._test_tenant_id,
storage_type="local",
key=f"test_integration_{uuid.uuid4()}.txt",
name="test_integration.txt",
size=len(content_bytes),
extension="txt",
mime_type="text/plain",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=str(uuid.uuid4()),
created_at=datetime_utils.naive_utc_now(),
used=True,
used_by=str(uuid.uuid4()),
used_at=datetime_utils.naive_utc_now(),
)
# Store the content
storage.save(upload_file.key, content_bytes)
# Create variable file
variable_file = WorkflowDraftVariableFile(
upload_file_id=upload_file.id,
value_type=SegmentType.STRING,
tenant_id=self._test_tenant_id,
app_id=self._test_app_id,
user_id=str(uuid.uuid4()),
size=len(content_bytes),
created_at=datetime_utils.naive_utc_now(),
)
try:
with Session(bind=db.engine, expire_on_commit=False) as session:
# Add upload file and variable file first to get their IDs
session.add_all([upload_file, variable_file])
session.flush() # This generates the IDs
# Now create the offloaded draft variable with the correct file_id
offloaded_var = WorkflowDraftVariable.new_node_variable(
app_id=self._test_app_id,
node_id="test_integration_node",
name="offloaded_integration_var",
value=build_segment("truncated"),
visible=True,
node_execution_id=str(uuid.uuid4()),
)
offloaded_var.file_id = variable_file.id
session.add(offloaded_var)
session.flush()
session.commit()
# Test load_variables with both regular and offloaded variables
# This method should handle the relationship preloading internally
var_loader = DraftVarLoader(engine=db.engine, app_id=self._test_app_id, tenant_id=self._test_tenant_id)
variables = var_loader.load_variables(
[
[SYSTEM_VARIABLE_NODE_ID, "sys_var"], # Regular variable from setUp
["test_integration_node", "offloaded_integration_var"], # Offloaded variable
]
)
# Verify results
assert len(variables) == 2
# Find regular variable
regular_var = next(v for v in variables if v.selector[0] == SYSTEM_VARIABLE_NODE_ID)
assert regular_var.id == self._sys_var_id
assert regular_var.value == "sys_value"
# Find offloaded variable
offloaded_loaded_var = next(v for v in variables if v.selector[0] == "test_integration_node")
assert offloaded_loaded_var.id == offloaded_var.id
assert offloaded_loaded_var.value == test_content
finally:
# Clean up
with Session(bind=db.engine) as session:
# Query and delete by ID to ensure they're tracked in this session
session.query(WorkflowDraftVariable).filter_by(id=offloaded_var.id).delete()
session.query(WorkflowDraftVariableFile).filter_by(id=variable_file.id).delete()
session.query(UploadFile).filter_by(id=upload_file.id).delete()
session.commit()
# Clean up storage
try:
storage.delete(upload_file.key)
except Exception:
pass # Ignore cleanup failures
@pytest.mark.usefixtures("flask_req_ctx")
class TestWorkflowDraftVariableServiceResetVariable(unittest.TestCase):
@@ -272,7 +540,7 @@ class TestWorkflowDraftVariableServiceResetVariable(unittest.TestCase):
triggered_from="workflow-run",
workflow_run_id=str(uuid.uuid4()),
index=1,
node_execution_id=self._node_exec_id,
node_execution_id=str(uuid.uuid4()),
node_id=self._node_id,
node_type=NodeType.LLM.value,
title="Test Node",
@@ -281,7 +549,7 @@ class TestWorkflowDraftVariableServiceResetVariable(unittest.TestCase):
outputs='{"test_var": "output_value", "other_var": "other_output"}',
status="succeeded",
elapsed_time=1.5,
created_by_role="account",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=str(uuid.uuid4()),
)
@@ -336,10 +604,14 @@ class TestWorkflowDraftVariableServiceResetVariable(unittest.TestCase):
)
self._conv_var.last_edited_at = datetime_utils.naive_utc_now()
with Session(db.engine, expire_on_commit=False) as persistent_session, persistent_session.begin():
persistent_session.add(
self._workflow_node_execution,
)
# Add all to database
db.session.add_all(
[
self._workflow_node_execution,
self._node_var_with_exec,
self._node_var_without_exec,
self._node_var_missing_exec,
@@ -354,6 +626,14 @@ class TestWorkflowDraftVariableServiceResetVariable(unittest.TestCase):
self._node_var_missing_exec_id = self._node_var_missing_exec.id
self._conv_var_id = self._conv_var.id
def tearDown(self):
self._session.rollback()
with Session(db.engine) as session, session.begin():
stmt = delete(WorkflowNodeExecutionModel).where(
WorkflowNodeExecutionModel.id == self._workflow_node_execution.id
)
session.execute(stmt)
def _get_test_srv(self) -> WorkflowDraftVariableService:
return WorkflowDraftVariableService(session=self._session)
@@ -377,12 +657,10 @@ class TestWorkflowDraftVariableServiceResetVariable(unittest.TestCase):
created_by=str(uuid.uuid4()),
environment_variables=[],
conversation_variables=conversation_vars,
rag_pipeline_variables=[],
)
return workflow
def tearDown(self):
self._session.rollback()
def test_reset_node_variable_with_valid_execution_record(self):
"""Test resetting a node variable with valid execution record - should restore from execution"""
srv = self._get_test_srv()