feat: knowledge pipeline (#25360)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
@@ -1,14 +1,18 @@
|
||||
from unittest.mock import ANY, MagicMock, call, patch
|
||||
|
||||
import pytest
|
||||
import sqlalchemy as sa
|
||||
|
||||
from tasks.remove_app_and_related_data_task import _delete_draft_variables, delete_draft_variables_batch
|
||||
from tasks.remove_app_and_related_data_task import (
|
||||
_delete_draft_variable_offload_data,
|
||||
_delete_draft_variables,
|
||||
delete_draft_variables_batch,
|
||||
)
|
||||
|
||||
|
||||
class TestDeleteDraftVariablesBatch:
|
||||
@patch("tasks.remove_app_and_related_data_task._delete_draft_variable_offload_data")
|
||||
@patch("tasks.remove_app_and_related_data_task.db")
|
||||
def test_delete_draft_variables_batch_success(self, mock_db):
|
||||
def test_delete_draft_variables_batch_success(self, mock_db, mock_offload_cleanup):
|
||||
"""Test successful deletion of draft variables in batches."""
|
||||
app_id = "test-app-id"
|
||||
batch_size = 100
|
||||
@@ -24,13 +28,19 @@ class TestDeleteDraftVariablesBatch:
|
||||
mock_engine.begin.return_value = mock_context_manager
|
||||
|
||||
# Mock two batches of results, then empty
|
||||
batch1_ids = [f"var-{i}" for i in range(100)]
|
||||
batch2_ids = [f"var-{i}" for i in range(100, 150)]
|
||||
batch1_data = [(f"var-{i}", f"file-{i}" if i % 2 == 0 else None) for i in range(100)]
|
||||
batch2_data = [(f"var-{i}", f"file-{i}" if i % 3 == 0 else None) for i in range(100, 150)]
|
||||
|
||||
batch1_ids = [row[0] for row in batch1_data]
|
||||
batch1_file_ids = [row[1] for row in batch1_data if row[1] is not None]
|
||||
|
||||
batch2_ids = [row[0] for row in batch2_data]
|
||||
batch2_file_ids = [row[1] for row in batch2_data if row[1] is not None]
|
||||
|
||||
# Setup side effects for execute calls in the correct order:
|
||||
# 1. SELECT (returns batch1_ids)
|
||||
# 1. SELECT (returns batch1_data with id, file_id)
|
||||
# 2. DELETE (returns result with rowcount=100)
|
||||
# 3. SELECT (returns batch2_ids)
|
||||
# 3. SELECT (returns batch2_data)
|
||||
# 4. DELETE (returns result with rowcount=50)
|
||||
# 5. SELECT (returns empty, ends loop)
|
||||
|
||||
@@ -41,14 +51,14 @@ class TestDeleteDraftVariablesBatch:
|
||||
|
||||
# First SELECT result
|
||||
select_result1 = MagicMock()
|
||||
select_result1.__iter__.return_value = iter([(id_,) for id_ in batch1_ids])
|
||||
select_result1.__iter__.return_value = iter(batch1_data)
|
||||
|
||||
# First DELETE result
|
||||
delete_result1 = MockResult(rowcount=100)
|
||||
|
||||
# Second SELECT result
|
||||
select_result2 = MagicMock()
|
||||
select_result2.__iter__.return_value = iter([(id_,) for id_ in batch2_ids])
|
||||
select_result2.__iter__.return_value = iter(batch2_data)
|
||||
|
||||
# Second DELETE result
|
||||
delete_result2 = MockResult(rowcount=50)
|
||||
@@ -66,6 +76,9 @@ class TestDeleteDraftVariablesBatch:
|
||||
select_result3, # Third SELECT (empty)
|
||||
]
|
||||
|
||||
# Mock offload data cleanup
|
||||
mock_offload_cleanup.side_effect = [len(batch1_file_ids), len(batch2_file_ids)]
|
||||
|
||||
# Execute the function
|
||||
result = delete_draft_variables_batch(app_id, batch_size)
|
||||
|
||||
@@ -75,65 +88,18 @@ class TestDeleteDraftVariablesBatch:
|
||||
# Verify database calls
|
||||
assert mock_conn.execute.call_count == 5 # 3 selects + 2 deletes
|
||||
|
||||
# Verify the expected calls in order:
|
||||
# 1. SELECT, 2. DELETE, 3. SELECT, 4. DELETE, 5. SELECT
|
||||
expected_calls = [
|
||||
# First SELECT
|
||||
call(
|
||||
sa.text("""
|
||||
SELECT id FROM workflow_draft_variables
|
||||
WHERE app_id = :app_id
|
||||
LIMIT :batch_size
|
||||
"""),
|
||||
{"app_id": app_id, "batch_size": batch_size},
|
||||
),
|
||||
# First DELETE
|
||||
call(
|
||||
sa.text("""
|
||||
DELETE FROM workflow_draft_variables
|
||||
WHERE id IN :ids
|
||||
"""),
|
||||
{"ids": tuple(batch1_ids)},
|
||||
),
|
||||
# Second SELECT
|
||||
call(
|
||||
sa.text("""
|
||||
SELECT id FROM workflow_draft_variables
|
||||
WHERE app_id = :app_id
|
||||
LIMIT :batch_size
|
||||
"""),
|
||||
{"app_id": app_id, "batch_size": batch_size},
|
||||
),
|
||||
# Second DELETE
|
||||
call(
|
||||
sa.text("""
|
||||
DELETE FROM workflow_draft_variables
|
||||
WHERE id IN :ids
|
||||
"""),
|
||||
{"ids": tuple(batch2_ids)},
|
||||
),
|
||||
# Third SELECT (empty result)
|
||||
call(
|
||||
sa.text("""
|
||||
SELECT id FROM workflow_draft_variables
|
||||
WHERE app_id = :app_id
|
||||
LIMIT :batch_size
|
||||
"""),
|
||||
{"app_id": app_id, "batch_size": batch_size},
|
||||
),
|
||||
]
|
||||
# Verify offload cleanup was called for both batches with file_ids
|
||||
expected_offload_calls = [call(mock_conn, batch1_file_ids), call(mock_conn, batch2_file_ids)]
|
||||
mock_offload_cleanup.assert_has_calls(expected_offload_calls)
|
||||
|
||||
# Check that all calls were made correctly
|
||||
actual_calls = mock_conn.execute.call_args_list
|
||||
assert len(actual_calls) == len(expected_calls)
|
||||
|
||||
# Simplified verification - just check that the right number of calls were made
|
||||
# Simplified verification - check that the right number of calls were made
|
||||
# and that the SQL queries contain the expected patterns
|
||||
actual_calls = mock_conn.execute.call_args_list
|
||||
for i, actual_call in enumerate(actual_calls):
|
||||
if i % 2 == 0: # SELECT calls (even indices: 0, 2, 4)
|
||||
# Verify it's a SELECT query
|
||||
# Verify it's a SELECT query that now includes file_id
|
||||
sql_text = str(actual_call[0][0])
|
||||
assert "SELECT id FROM workflow_draft_variables" in sql_text
|
||||
assert "SELECT id, file_id FROM workflow_draft_variables" in sql_text
|
||||
assert "WHERE app_id = :app_id" in sql_text
|
||||
assert "LIMIT :batch_size" in sql_text
|
||||
else: # DELETE calls (odd indices: 1, 3)
|
||||
@@ -142,8 +108,9 @@ class TestDeleteDraftVariablesBatch:
|
||||
assert "DELETE FROM workflow_draft_variables" in sql_text
|
||||
assert "WHERE id IN :ids" in sql_text
|
||||
|
||||
@patch("tasks.remove_app_and_related_data_task._delete_draft_variable_offload_data")
|
||||
@patch("tasks.remove_app_and_related_data_task.db")
|
||||
def test_delete_draft_variables_batch_empty_result(self, mock_db):
|
||||
def test_delete_draft_variables_batch_empty_result(self, mock_db, mock_offload_cleanup):
|
||||
"""Test deletion when no draft variables exist for the app."""
|
||||
app_id = "nonexistent-app-id"
|
||||
batch_size = 1000
|
||||
@@ -167,6 +134,7 @@ class TestDeleteDraftVariablesBatch:
|
||||
|
||||
assert result == 0
|
||||
assert mock_conn.execute.call_count == 1 # Only one select query
|
||||
mock_offload_cleanup.assert_not_called() # No files to clean up
|
||||
|
||||
def test_delete_draft_variables_batch_invalid_batch_size(self):
|
||||
"""Test that invalid batch size raises ValueError."""
|
||||
@@ -178,9 +146,10 @@ class TestDeleteDraftVariablesBatch:
|
||||
with pytest.raises(ValueError, match="batch_size must be positive"):
|
||||
delete_draft_variables_batch(app_id, 0)
|
||||
|
||||
@patch("tasks.remove_app_and_related_data_task._delete_draft_variable_offload_data")
|
||||
@patch("tasks.remove_app_and_related_data_task.db")
|
||||
@patch("tasks.remove_app_and_related_data_task.logger")
|
||||
def test_delete_draft_variables_batch_logs_progress(self, mock_logging, mock_db):
|
||||
def test_delete_draft_variables_batch_logs_progress(self, mock_logging, mock_db, mock_offload_cleanup):
|
||||
"""Test that batch deletion logs progress correctly."""
|
||||
app_id = "test-app-id"
|
||||
batch_size = 50
|
||||
@@ -196,10 +165,13 @@ class TestDeleteDraftVariablesBatch:
|
||||
mock_engine.begin.return_value = mock_context_manager
|
||||
|
||||
# Mock one batch then empty
|
||||
batch_ids = [f"var-{i}" for i in range(30)]
|
||||
batch_data = [(f"var-{i}", f"file-{i}" if i % 3 == 0 else None) for i in range(30)]
|
||||
batch_ids = [row[0] for row in batch_data]
|
||||
batch_file_ids = [row[1] for row in batch_data if row[1] is not None]
|
||||
|
||||
# Create properly configured mocks
|
||||
select_result = MagicMock()
|
||||
select_result.__iter__.return_value = iter([(id_,) for id_ in batch_ids])
|
||||
select_result.__iter__.return_value = iter(batch_data)
|
||||
|
||||
# Create simple object with rowcount attribute
|
||||
class MockResult:
|
||||
@@ -220,10 +192,17 @@ class TestDeleteDraftVariablesBatch:
|
||||
empty_result,
|
||||
]
|
||||
|
||||
# Mock offload cleanup
|
||||
mock_offload_cleanup.return_value = len(batch_file_ids)
|
||||
|
||||
result = delete_draft_variables_batch(app_id, batch_size)
|
||||
|
||||
assert result == 30
|
||||
|
||||
# Verify offload cleanup was called with file_ids
|
||||
if batch_file_ids:
|
||||
mock_offload_cleanup.assert_called_once_with(mock_conn, batch_file_ids)
|
||||
|
||||
# Verify logging calls
|
||||
assert mock_logging.info.call_count == 2
|
||||
mock_logging.info.assert_any_call(
|
||||
@@ -241,3 +220,118 @@ class TestDeleteDraftVariablesBatch:
|
||||
|
||||
assert result == expected_return
|
||||
mock_batch_delete.assert_called_once_with(app_id, batch_size=1000)
|
||||
|
||||
|
||||
class TestDeleteDraftVariableOffloadData:
|
||||
"""Test the Offload data cleanup functionality."""
|
||||
|
||||
@patch("extensions.ext_storage.storage")
|
||||
def test_delete_draft_variable_offload_data_success(self, mock_storage):
|
||||
"""Test successful deletion of offload data."""
|
||||
|
||||
# Mock connection
|
||||
mock_conn = MagicMock()
|
||||
file_ids = ["file-1", "file-2", "file-3"]
|
||||
|
||||
# Mock query results: (variable_file_id, storage_key, upload_file_id)
|
||||
query_results = [
|
||||
("file-1", "storage/key/1", "upload-1"),
|
||||
("file-2", "storage/key/2", "upload-2"),
|
||||
("file-3", "storage/key/3", "upload-3"),
|
||||
]
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.__iter__.return_value = iter(query_results)
|
||||
mock_conn.execute.return_value = mock_result
|
||||
|
||||
# Execute function
|
||||
result = _delete_draft_variable_offload_data(mock_conn, file_ids)
|
||||
|
||||
# Verify return value
|
||||
assert result == 3
|
||||
|
||||
# Verify storage deletion calls
|
||||
expected_storage_calls = [call("storage/key/1"), call("storage/key/2"), call("storage/key/3")]
|
||||
mock_storage.delete.assert_has_calls(expected_storage_calls, any_order=True)
|
||||
|
||||
# Verify database calls - should be 3 calls total
|
||||
assert mock_conn.execute.call_count == 3
|
||||
|
||||
# Verify the queries were called
|
||||
actual_calls = mock_conn.execute.call_args_list
|
||||
|
||||
# First call should be the SELECT query
|
||||
select_call_sql = str(actual_calls[0][0][0])
|
||||
assert "SELECT wdvf.id, uf.key, uf.id as upload_file_id" in select_call_sql
|
||||
assert "FROM workflow_draft_variable_files wdvf" in select_call_sql
|
||||
assert "JOIN upload_files uf ON wdvf.upload_file_id = uf.id" in select_call_sql
|
||||
assert "WHERE wdvf.id IN :file_ids" in select_call_sql
|
||||
|
||||
# Second call should be DELETE upload_files
|
||||
delete_upload_call_sql = str(actual_calls[1][0][0])
|
||||
assert "DELETE FROM upload_files" in delete_upload_call_sql
|
||||
assert "WHERE id IN :upload_file_ids" in delete_upload_call_sql
|
||||
|
||||
# Third call should be DELETE workflow_draft_variable_files
|
||||
delete_variable_files_call_sql = str(actual_calls[2][0][0])
|
||||
assert "DELETE FROM workflow_draft_variable_files" in delete_variable_files_call_sql
|
||||
assert "WHERE id IN :file_ids" in delete_variable_files_call_sql
|
||||
|
||||
def test_delete_draft_variable_offload_data_empty_file_ids(self):
|
||||
"""Test handling of empty file_ids list."""
|
||||
mock_conn = MagicMock()
|
||||
|
||||
result = _delete_draft_variable_offload_data(mock_conn, [])
|
||||
|
||||
assert result == 0
|
||||
mock_conn.execute.assert_not_called()
|
||||
|
||||
@patch("extensions.ext_storage.storage")
|
||||
@patch("tasks.remove_app_and_related_data_task.logging")
|
||||
def test_delete_draft_variable_offload_data_storage_failure(self, mock_logging, mock_storage):
|
||||
"""Test handling of storage deletion failures."""
|
||||
mock_conn = MagicMock()
|
||||
file_ids = ["file-1", "file-2"]
|
||||
|
||||
# Mock query results
|
||||
query_results = [
|
||||
("file-1", "storage/key/1", "upload-1"),
|
||||
("file-2", "storage/key/2", "upload-2"),
|
||||
]
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.__iter__.return_value = iter(query_results)
|
||||
mock_conn.execute.return_value = mock_result
|
||||
|
||||
# Make storage.delete fail for the first file
|
||||
mock_storage.delete.side_effect = [Exception("Storage error"), None]
|
||||
|
||||
# Execute function
|
||||
result = _delete_draft_variable_offload_data(mock_conn, file_ids)
|
||||
|
||||
# Should still return 2 (both files processed, even if one storage delete failed)
|
||||
assert result == 1 # Only one storage deletion succeeded
|
||||
|
||||
# Verify warning was logged
|
||||
mock_logging.exception.assert_called_once_with("Failed to delete storage object %s", "storage/key/1")
|
||||
|
||||
# Verify both database cleanup calls still happened
|
||||
assert mock_conn.execute.call_count == 3
|
||||
|
||||
@patch("tasks.remove_app_and_related_data_task.logging")
|
||||
def test_delete_draft_variable_offload_data_database_failure(self, mock_logging):
|
||||
"""Test handling of database operation failures."""
|
||||
mock_conn = MagicMock()
|
||||
file_ids = ["file-1"]
|
||||
|
||||
# Make execute raise an exception
|
||||
mock_conn.execute.side_effect = Exception("Database error")
|
||||
|
||||
# Execute function - should not raise, but log error
|
||||
result = _delete_draft_variable_offload_data(mock_conn, file_ids)
|
||||
|
||||
# Should return 0 when error occurs
|
||||
assert result == 0
|
||||
|
||||
# Verify error was logged
|
||||
mock_logging.exception.assert_called_once_with("Error deleting draft variable offload data:")
|
||||
|
||||
Reference in New Issue
Block a user