feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -1,12 +1,15 @@
import uuid
from unittest.mock import patch
import pytest
from sqlalchemy import delete
from core.variables.segments import StringSegment
from models import Tenant, db
from models.model import App
from models.workflow import WorkflowDraftVariable
from extensions.ext_database import db
from models import Tenant
from models.enums import CreatorUserRole
from models.model import App, UploadFile
from models.workflow import WorkflowDraftVariable, WorkflowDraftVariableFile
from tasks.remove_app_and_related_data_task import _delete_draft_variables, delete_draft_variables_batch
@@ -212,3 +215,256 @@ class TestDeleteDraftVariablesIntegration:
.execution_options(synchronize_session=False)
)
db.session.execute(query)
class TestDeleteDraftVariablesWithOffloadIntegration:
"""Integration tests for draft variable deletion with Offload data."""
@pytest.fixture
def setup_offload_test_data(self, app_and_tenant):
"""Create test data with draft variables that have associated Offload files."""
tenant, app = app_and_tenant
# Create UploadFile records
from libs.datetime_utils import naive_utc_now
upload_file1 = UploadFile(
tenant_id=tenant.id,
storage_type="local",
key="test/file1.json",
name="file1.json",
size=1024,
extension="json",
mime_type="application/json",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=str(uuid.uuid4()),
created_at=naive_utc_now(),
used=False,
)
upload_file2 = UploadFile(
tenant_id=tenant.id,
storage_type="local",
key="test/file2.json",
name="file2.json",
size=2048,
extension="json",
mime_type="application/json",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=str(uuid.uuid4()),
created_at=naive_utc_now(),
used=False,
)
db.session.add(upload_file1)
db.session.add(upload_file2)
db.session.flush()
# Create WorkflowDraftVariableFile records
from core.variables.types import SegmentType
var_file1 = WorkflowDraftVariableFile(
tenant_id=tenant.id,
app_id=app.id,
user_id=str(uuid.uuid4()),
upload_file_id=upload_file1.id,
size=1024,
length=10,
value_type=SegmentType.STRING,
)
var_file2 = WorkflowDraftVariableFile(
tenant_id=tenant.id,
app_id=app.id,
user_id=str(uuid.uuid4()),
upload_file_id=upload_file2.id,
size=2048,
length=20,
value_type=SegmentType.OBJECT,
)
db.session.add(var_file1)
db.session.add(var_file2)
db.session.flush()
# Create WorkflowDraftVariable records with file associations
draft_var1 = WorkflowDraftVariable.new_node_variable(
app_id=app.id,
node_id="node_1",
name="large_var_1",
value=StringSegment(value="truncated..."),
node_execution_id=str(uuid.uuid4()),
file_id=var_file1.id,
)
draft_var2 = WorkflowDraftVariable.new_node_variable(
app_id=app.id,
node_id="node_2",
name="large_var_2",
value=StringSegment(value="truncated..."),
node_execution_id=str(uuid.uuid4()),
file_id=var_file2.id,
)
# Create a regular variable without Offload data
draft_var3 = WorkflowDraftVariable.new_node_variable(
app_id=app.id,
node_id="node_3",
name="regular_var",
value=StringSegment(value="regular_value"),
node_execution_id=str(uuid.uuid4()),
)
db.session.add(draft_var1)
db.session.add(draft_var2)
db.session.add(draft_var3)
db.session.commit()
yield {
"app": app,
"tenant": tenant,
"upload_files": [upload_file1, upload_file2],
"variable_files": [var_file1, var_file2],
"draft_variables": [draft_var1, draft_var2, draft_var3],
}
# Cleanup
db.session.rollback()
# Clean up any remaining records
for table, ids in [
(WorkflowDraftVariable, [v.id for v in [draft_var1, draft_var2, draft_var3]]),
(WorkflowDraftVariableFile, [vf.id for vf in [var_file1, var_file2]]),
(UploadFile, [uf.id for uf in [upload_file1, upload_file2]]),
]:
cleanup_query = delete(table).where(table.id.in_(ids)).execution_options(synchronize_session=False)
db.session.execute(cleanup_query)
db.session.commit()
@patch("extensions.ext_storage.storage")
def test_delete_draft_variables_with_offload_data(self, mock_storage, setup_offload_test_data):
"""Test that deleting draft variables also cleans up associated Offload data."""
data = setup_offload_test_data
app_id = data["app"].id
# Mock storage deletion to succeed
mock_storage.delete.return_value = None
# Verify initial state
draft_vars_before = db.session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
var_files_before = db.session.query(WorkflowDraftVariableFile).count()
upload_files_before = db.session.query(UploadFile).count()
assert draft_vars_before == 3 # 2 with files + 1 regular
assert var_files_before == 2
assert upload_files_before == 2
# Delete draft variables
deleted_count = delete_draft_variables_batch(app_id, batch_size=10)
# Verify results
assert deleted_count == 3
# Check that all draft variables are deleted
draft_vars_after = db.session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
assert draft_vars_after == 0
# Check that associated Offload data is cleaned up
var_files_after = db.session.query(WorkflowDraftVariableFile).count()
upload_files_after = db.session.query(UploadFile).count()
assert var_files_after == 0 # All variable files should be deleted
assert upload_files_after == 0 # All upload files should be deleted
# Verify storage deletion was called for both files
assert mock_storage.delete.call_count == 2
storage_keys_deleted = [call.args[0] for call in mock_storage.delete.call_args_list]
assert "test/file1.json" in storage_keys_deleted
assert "test/file2.json" in storage_keys_deleted
@patch("extensions.ext_storage.storage")
def test_delete_draft_variables_storage_failure_continues_cleanup(self, mock_storage, setup_offload_test_data):
"""Test that database cleanup continues even when storage deletion fails."""
data = setup_offload_test_data
app_id = data["app"].id
# Mock storage deletion to fail for first file, succeed for second
mock_storage.delete.side_effect = [Exception("Storage error"), None]
# Delete draft variables
deleted_count = delete_draft_variables_batch(app_id, batch_size=10)
# Verify that all draft variables are still deleted
assert deleted_count == 3
draft_vars_after = db.session.query(WorkflowDraftVariable).filter_by(app_id=app_id).count()
assert draft_vars_after == 0
# Database cleanup should still succeed even with storage errors
var_files_after = db.session.query(WorkflowDraftVariableFile).count()
upload_files_after = db.session.query(UploadFile).count()
assert var_files_after == 0
assert upload_files_after == 0
# Verify storage deletion was attempted for both files
assert mock_storage.delete.call_count == 2
@patch("extensions.ext_storage.storage")
def test_delete_draft_variables_partial_offload_data(self, mock_storage, setup_offload_test_data):
"""Test deletion with mix of variables with and without Offload data."""
data = setup_offload_test_data
app_id = data["app"].id
# Create additional app with only regular variables (no offload data)
tenant = data["tenant"]
app2 = App(
tenant_id=tenant.id,
name="Test App 2",
mode="workflow",
enable_site=True,
enable_api=True,
)
db.session.add(app2)
db.session.flush()
# Add regular variables to app2
regular_vars = []
for i in range(3):
var = WorkflowDraftVariable.new_node_variable(
app_id=app2.id,
node_id=f"node_{i}",
name=f"var_{i}",
value=StringSegment(value="regular_value"),
node_execution_id=str(uuid.uuid4()),
)
db.session.add(var)
regular_vars.append(var)
db.session.commit()
try:
# Mock storage deletion
mock_storage.delete.return_value = None
# Delete variables for app2 (no offload data)
deleted_count_app2 = delete_draft_variables_batch(app2.id, batch_size=10)
assert deleted_count_app2 == 3
# Verify storage wasn't called for app2 (no offload files)
mock_storage.delete.assert_not_called()
# Delete variables for original app (with offload data)
deleted_count_app1 = delete_draft_variables_batch(app_id, batch_size=10)
assert deleted_count_app1 == 3
# Now storage should be called for the offload files
assert mock_storage.delete.call_count == 2
finally:
# Cleanup app2 and its variables
cleanup_vars_query = (
delete(WorkflowDraftVariable)
.where(WorkflowDraftVariable.app_id == app2.id)
.execution_options(synchronize_session=False)
)
db.session.execute(cleanup_vars_query)
app2_obj = db.session.get(App, app2.id)
if app2_obj:
db.session.delete(app2_obj)
db.session.commit()