feat: enable tenant isolation on duplicate document indexing tasks (#29080)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
hj24
2025-12-08 17:54:57 +08:00
committed by GitHub
parent e6d504558a
commit 3cb944f318
18 changed files with 2097 additions and 158 deletions

View File

@@ -117,7 +117,7 @@ import pytest
from core.entities.document_task import DocumentTask
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan
from services.document_indexing_task_proxy import DocumentIndexingTaskProxy
from services.document_indexing_proxy.document_indexing_task_proxy import DocumentIndexingTaskProxy
# ============================================================================
# Test Data Factory
@@ -370,7 +370,7 @@ class TestDocumentIndexingTaskProxy:
# Features Property Tests
# ========================================================================
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_features_property(self, mock_feature_service):
"""
Test cached_property features.
@@ -400,7 +400,7 @@ class TestDocumentIndexingTaskProxy:
mock_feature_service.get_features.assert_called_once_with("tenant-123")
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_features_property_with_different_tenants(self, mock_feature_service):
"""
Test features property with different tenant IDs.
@@ -438,7 +438,7 @@ class TestDocumentIndexingTaskProxy:
# Direct Queue Routing Tests
# ========================================================================
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_direct_queue(self, mock_task):
"""
Test _send_to_direct_queue method.
@@ -460,7 +460,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
mock_task.delay.assert_called_once_with(tenant_id=tenant_id, dataset_id=dataset_id, document_ids=document_ids)
@patch("services.document_indexing_task_proxy.priority_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.priority_document_indexing_task")
def test_send_to_direct_queue_with_priority_task(self, mock_task):
"""
Test _send_to_direct_queue with priority task function.
@@ -481,7 +481,7 @@ class TestDocumentIndexingTaskProxy:
tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1", "doc-2", "doc-3"]
)
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_direct_queue_with_single_document(self, mock_task):
"""
Test _send_to_direct_queue with single document ID.
@@ -502,7 +502,7 @@ class TestDocumentIndexingTaskProxy:
tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1"]
)
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_direct_queue_with_empty_documents(self, mock_task):
"""
Test _send_to_direct_queue with empty document_ids list.
@@ -525,7 +525,7 @@ class TestDocumentIndexingTaskProxy:
# Tenant Queue Routing Tests
# ========================================================================
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_tenant_queue_with_existing_task_key(self, mock_task):
"""
Test _send_to_tenant_queue when task key exists.
@@ -564,7 +564,7 @@ class TestDocumentIndexingTaskProxy:
mock_task.delay.assert_not_called()
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_tenant_queue_without_task_key(self, mock_task):
"""
Test _send_to_tenant_queue when no task key exists.
@@ -594,7 +594,7 @@ class TestDocumentIndexingTaskProxy:
proxy._tenant_isolated_task_queue.push_tasks.assert_not_called()
@patch("services.document_indexing_task_proxy.priority_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.priority_document_indexing_task")
def test_send_to_tenant_queue_with_priority_task(self, mock_task):
"""
Test _send_to_tenant_queue with priority task function.
@@ -621,7 +621,7 @@ class TestDocumentIndexingTaskProxy:
tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1", "doc-2", "doc-3"]
)
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_tenant_queue_document_task_serialization(self, mock_task):
"""
Test DocumentTask serialization in _send_to_tenant_queue.
@@ -659,7 +659,7 @@ class TestDocumentIndexingTaskProxy:
# Queue Type Selection Tests
# ========================================================================
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_default_tenant_queue(self, mock_task):
"""
Test _send_to_default_tenant_queue method.
@@ -678,7 +678,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_tenant_queue.assert_called_once_with(mock_task)
@patch("services.document_indexing_task_proxy.priority_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.priority_document_indexing_task")
def test_send_to_priority_tenant_queue(self, mock_task):
"""
Test _send_to_priority_tenant_queue method.
@@ -697,7 +697,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_tenant_queue.assert_called_once_with(mock_task)
@patch("services.document_indexing_task_proxy.priority_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.priority_document_indexing_task")
def test_send_to_priority_direct_queue(self, mock_task):
"""
Test _send_to_priority_direct_queue method.
@@ -720,7 +720,7 @@ class TestDocumentIndexingTaskProxy:
# Dispatch Logic Tests
# ========================================================================
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_dispatch_with_billing_enabled_sandbox_plan(self, mock_feature_service):
"""
Test _dispatch method when billing is enabled with SANDBOX plan.
@@ -745,7 +745,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_default_tenant_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_dispatch_with_billing_enabled_team_plan(self, mock_feature_service):
"""
Test _dispatch method when billing is enabled with TEAM plan.
@@ -770,7 +770,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_priority_tenant_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_dispatch_with_billing_enabled_professional_plan(self, mock_feature_service):
"""
Test _dispatch method when billing is enabled with PROFESSIONAL plan.
@@ -795,7 +795,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_priority_tenant_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_dispatch_with_billing_disabled(self, mock_feature_service):
"""
Test _dispatch method when billing is disabled.
@@ -818,7 +818,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_priority_direct_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_dispatch_edge_case_empty_plan(self, mock_feature_service):
"""
Test _dispatch method with empty plan string.
@@ -842,7 +842,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_priority_tenant_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_dispatch_edge_case_none_plan(self, mock_feature_service):
"""
Test _dispatch method with None plan.
@@ -870,7 +870,7 @@ class TestDocumentIndexingTaskProxy:
# Delay Method Tests
# ========================================================================
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_delay_method(self, mock_feature_service):
"""
Test delay method integration.
@@ -895,7 +895,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_default_tenant_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_delay_method_with_team_plan(self, mock_feature_service):
"""
Test delay method with TEAM plan.
@@ -920,7 +920,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_priority_tenant_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_delay_method_with_billing_disabled(self, mock_feature_service):
"""
Test delay method with billing disabled.
@@ -1021,7 +1021,7 @@ class TestDocumentIndexingTaskProxy:
# Batch Operations Tests
# ========================================================================
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_batch_operation_with_multiple_documents(self, mock_task):
"""
Test batch operation with multiple documents.
@@ -1044,7 +1044,7 @@ class TestDocumentIndexingTaskProxy:
tenant_id="tenant-123", dataset_id="dataset-456", document_ids=document_ids
)
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_batch_operation_with_large_batch(self, mock_task):
"""
Test batch operation with large batch of documents.
@@ -1073,7 +1073,7 @@ class TestDocumentIndexingTaskProxy:
# Error Handling Tests
# ========================================================================
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_direct_queue_task_delay_failure(self, mock_task):
"""
Test _send_to_direct_queue when task.delay() raises an exception.
@@ -1090,7 +1090,7 @@ class TestDocumentIndexingTaskProxy:
with pytest.raises(Exception, match="Task delay failed"):
proxy._send_to_direct_queue(mock_task)
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_tenant_queue_push_tasks_failure(self, mock_task):
"""
Test _send_to_tenant_queue when push_tasks raises an exception.
@@ -1111,7 +1111,7 @@ class TestDocumentIndexingTaskProxy:
with pytest.raises(Exception, match="Push tasks failed"):
proxy._send_to_tenant_queue(mock_task)
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_tenant_queue_set_waiting_time_failure(self, mock_task):
"""
Test _send_to_tenant_queue when set_task_waiting_time raises an exception.
@@ -1132,7 +1132,7 @@ class TestDocumentIndexingTaskProxy:
with pytest.raises(Exception, match="Set waiting time failed"):
proxy._send_to_tenant_queue(mock_task)
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
def test_dispatch_feature_service_failure(self, mock_feature_service):
"""
Test _dispatch when FeatureService.get_features raises an exception.
@@ -1153,8 +1153,8 @@ class TestDocumentIndexingTaskProxy:
# Integration Tests
# ========================================================================
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_full_flow_sandbox_plan(self, mock_task, mock_feature_service):
"""
Test full flow for SANDBOX plan with tenant queue.
@@ -1187,8 +1187,8 @@ class TestDocumentIndexingTaskProxy:
tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1", "doc-2", "doc-3"]
)
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_task_proxy.priority_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.priority_document_indexing_task")
def test_full_flow_team_plan(self, mock_task, mock_feature_service):
"""
Test full flow for TEAM plan with priority tenant queue.
@@ -1221,8 +1221,8 @@ class TestDocumentIndexingTaskProxy:
tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1", "doc-2", "doc-3"]
)
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_task_proxy.priority_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.priority_document_indexing_task")
def test_full_flow_billing_disabled(self, mock_task, mock_feature_service):
"""
Test full flow for billing disabled (self-hosted/enterprise).

View File

@@ -3,7 +3,7 @@ from unittest.mock import Mock, patch
from core.entities.document_task import DocumentTask
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan
from services.document_indexing_task_proxy import DocumentIndexingTaskProxy
from services.document_indexing_proxy.document_indexing_task_proxy import DocumentIndexingTaskProxy
class DocumentIndexingTaskProxyTestDataFactory:
@@ -59,7 +59,7 @@ class TestDocumentIndexingTaskProxy:
assert proxy._tenant_isolated_task_queue._tenant_id == tenant_id
assert proxy._tenant_isolated_task_queue._unique_key == "document_indexing"
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.base.FeatureService")
def test_features_property(self, mock_feature_service):
"""Test cached_property features."""
# Arrange
@@ -77,7 +77,7 @@ class TestDocumentIndexingTaskProxy:
assert features1 is features2 # Should be the same instance due to caching
mock_feature_service.get_features.assert_called_once_with("tenant-123")
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_direct_queue(self, mock_task):
"""Test _send_to_direct_queue method."""
# Arrange
@@ -92,7 +92,7 @@ class TestDocumentIndexingTaskProxy:
tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1", "doc-2", "doc-3"]
)
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_tenant_queue_with_existing_task_key(self, mock_task):
"""Test _send_to_tenant_queue when task key exists."""
# Arrange
@@ -115,7 +115,7 @@ class TestDocumentIndexingTaskProxy:
assert pushed_tasks[0]["document_ids"] == ["doc-1", "doc-2", "doc-3"]
mock_task.delay.assert_not_called()
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
@patch("services.document_indexing_proxy.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_tenant_queue_without_task_key(self, mock_task):
"""Test _send_to_tenant_queue when no task key exists."""
# Arrange
@@ -135,8 +135,7 @@ class TestDocumentIndexingTaskProxy:
)
proxy._tenant_isolated_task_queue.push_tasks.assert_not_called()
@patch("services.document_indexing_task_proxy.normal_document_indexing_task")
def test_send_to_default_tenant_queue(self, mock_task):
def test_send_to_default_tenant_queue(self):
"""Test _send_to_default_tenant_queue method."""
# Arrange
proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy()
@@ -146,10 +145,9 @@ class TestDocumentIndexingTaskProxy:
proxy._send_to_default_tenant_queue()
# Assert
proxy._send_to_tenant_queue.assert_called_once_with(mock_task)
proxy._send_to_tenant_queue.assert_called_once_with(proxy.NORMAL_TASK_FUNC)
@patch("services.document_indexing_task_proxy.priority_document_indexing_task")
def test_send_to_priority_tenant_queue(self, mock_task):
def test_send_to_priority_tenant_queue(self):
"""Test _send_to_priority_tenant_queue method."""
# Arrange
proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy()
@@ -159,10 +157,9 @@ class TestDocumentIndexingTaskProxy:
proxy._send_to_priority_tenant_queue()
# Assert
proxy._send_to_tenant_queue.assert_called_once_with(mock_task)
proxy._send_to_tenant_queue.assert_called_once_with(proxy.PRIORITY_TASK_FUNC)
@patch("services.document_indexing_task_proxy.priority_document_indexing_task")
def test_send_to_priority_direct_queue(self, mock_task):
def test_send_to_priority_direct_queue(self):
"""Test _send_to_priority_direct_queue method."""
# Arrange
proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy()
@@ -172,9 +169,9 @@ class TestDocumentIndexingTaskProxy:
proxy._send_to_priority_direct_queue()
# Assert
proxy._send_to_direct_queue.assert_called_once_with(mock_task)
proxy._send_to_direct_queue.assert_called_once_with(proxy.PRIORITY_TASK_FUNC)
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_with_billing_enabled_sandbox_plan(self, mock_feature_service):
"""Test _dispatch method when billing is enabled with sandbox plan."""
# Arrange
@@ -191,7 +188,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_default_tenant_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_with_billing_enabled_non_sandbox_plan(self, mock_feature_service):
"""Test _dispatch method when billing is enabled with non-sandbox plan."""
# Arrange
@@ -208,7 +205,7 @@ class TestDocumentIndexingTaskProxy:
# If billing enabled with non sandbox plan, should send to priority tenant queue
proxy._send_to_priority_tenant_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_with_billing_disabled(self, mock_feature_service):
"""Test _dispatch method when billing is disabled."""
# Arrange
@@ -223,7 +220,7 @@ class TestDocumentIndexingTaskProxy:
# If billing disabled, for example: self-hosted or enterprise, should send to priority direct queue
proxy._send_to_priority_direct_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.base.FeatureService")
def test_delay_method(self, mock_feature_service):
"""Test delay method integration."""
# Arrange
@@ -256,7 +253,7 @@ class TestDocumentIndexingTaskProxy:
assert task.dataset_id == dataset_id
assert task.document_ids == document_ids
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_edge_case_empty_plan(self, mock_feature_service):
"""Test _dispatch method with empty plan string."""
# Arrange
@@ -271,7 +268,7 @@ class TestDocumentIndexingTaskProxy:
# Assert
proxy._send_to_priority_tenant_queue.assert_called_once()
@patch("services.document_indexing_task_proxy.FeatureService")
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_edge_case_none_plan(self, mock_feature_service):
"""Test _dispatch method with None plan."""
# Arrange

View File

@@ -0,0 +1,363 @@
from unittest.mock import Mock, patch
from core.entities.document_task import DocumentTask
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan
from services.document_indexing_proxy.duplicate_document_indexing_task_proxy import (
DuplicateDocumentIndexingTaskProxy,
)
class DuplicateDocumentIndexingTaskProxyTestDataFactory:
"""Factory class for creating test data and mock objects for DuplicateDocumentIndexingTaskProxy tests."""
@staticmethod
def create_mock_features(billing_enabled: bool = False, plan: CloudPlan = CloudPlan.SANDBOX) -> Mock:
"""Create mock features with billing configuration."""
features = Mock()
features.billing = Mock()
features.billing.enabled = billing_enabled
features.billing.subscription = Mock()
features.billing.subscription.plan = plan
return features
@staticmethod
def create_mock_tenant_queue(has_task_key: bool = False) -> Mock:
"""Create mock TenantIsolatedTaskQueue."""
queue = Mock(spec=TenantIsolatedTaskQueue)
queue.get_task_key.return_value = "task_key" if has_task_key else None
queue.push_tasks = Mock()
queue.set_task_waiting_time = Mock()
return queue
@staticmethod
def create_duplicate_document_task_proxy(
tenant_id: str = "tenant-123", dataset_id: str = "dataset-456", document_ids: list[str] | None = None
) -> DuplicateDocumentIndexingTaskProxy:
"""Create DuplicateDocumentIndexingTaskProxy instance for testing."""
if document_ids is None:
document_ids = ["doc-1", "doc-2", "doc-3"]
return DuplicateDocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids)
class TestDuplicateDocumentIndexingTaskProxy:
"""Test cases for DuplicateDocumentIndexingTaskProxy class."""
def test_initialization(self):
"""Test DuplicateDocumentIndexingTaskProxy initialization."""
# Arrange
tenant_id = "tenant-123"
dataset_id = "dataset-456"
document_ids = ["doc-1", "doc-2", "doc-3"]
# Act
proxy = DuplicateDocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids)
# Assert
assert proxy._tenant_id == tenant_id
assert proxy._dataset_id == dataset_id
assert proxy._document_ids == document_ids
assert isinstance(proxy._tenant_isolated_task_queue, TenantIsolatedTaskQueue)
assert proxy._tenant_isolated_task_queue._tenant_id == tenant_id
assert proxy._tenant_isolated_task_queue._unique_key == "duplicate_document_indexing"
def test_queue_name(self):
"""Test QUEUE_NAME class variable."""
# Arrange & Act
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
# Assert
assert proxy.QUEUE_NAME == "duplicate_document_indexing"
def test_task_functions(self):
"""Test NORMAL_TASK_FUNC and PRIORITY_TASK_FUNC class variables."""
# Arrange & Act
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
# Assert
assert proxy.NORMAL_TASK_FUNC.__name__ == "normal_duplicate_document_indexing_task"
assert proxy.PRIORITY_TASK_FUNC.__name__ == "priority_duplicate_document_indexing_task"
@patch("services.document_indexing_proxy.base.FeatureService")
def test_features_property(self, mock_feature_service):
"""Test cached_property features."""
# Arrange
mock_features = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_features()
mock_feature_service.get_features.return_value = mock_features
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
# Act
features1 = proxy.features
features2 = proxy.features # Second call should use cached property
# Assert
assert features1 == mock_features
assert features2 == mock_features
assert features1 is features2 # Should be the same instance due to caching
mock_feature_service.get_features.assert_called_once_with("tenant-123")
@patch(
"services.document_indexing_proxy.duplicate_document_indexing_task_proxy.normal_duplicate_document_indexing_task"
)
def test_send_to_direct_queue(self, mock_task):
"""Test _send_to_direct_queue method."""
# Arrange
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
mock_task.delay = Mock()
# Act
proxy._send_to_direct_queue(mock_task)
# Assert
mock_task.delay.assert_called_once_with(
tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1", "doc-2", "doc-3"]
)
@patch(
"services.document_indexing_proxy.duplicate_document_indexing_task_proxy.normal_duplicate_document_indexing_task"
)
def test_send_to_tenant_queue_with_existing_task_key(self, mock_task):
"""Test _send_to_tenant_queue when task key exists."""
# Arrange
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._tenant_isolated_task_queue = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_tenant_queue(
has_task_key=True
)
mock_task.delay = Mock()
# Act
proxy._send_to_tenant_queue(mock_task)
# Assert
proxy._tenant_isolated_task_queue.push_tasks.assert_called_once()
pushed_tasks = proxy._tenant_isolated_task_queue.push_tasks.call_args[0][0]
assert len(pushed_tasks) == 1
assert isinstance(DocumentTask(**pushed_tasks[0]), DocumentTask)
assert pushed_tasks[0]["tenant_id"] == "tenant-123"
assert pushed_tasks[0]["dataset_id"] == "dataset-456"
assert pushed_tasks[0]["document_ids"] == ["doc-1", "doc-2", "doc-3"]
mock_task.delay.assert_not_called()
@patch(
"services.document_indexing_proxy.duplicate_document_indexing_task_proxy.normal_duplicate_document_indexing_task"
)
def test_send_to_tenant_queue_without_task_key(self, mock_task):
"""Test _send_to_tenant_queue when no task key exists."""
# Arrange
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._tenant_isolated_task_queue = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_tenant_queue(
has_task_key=False
)
mock_task.delay = Mock()
# Act
proxy._send_to_tenant_queue(mock_task)
# Assert
proxy._tenant_isolated_task_queue.set_task_waiting_time.assert_called_once()
mock_task.delay.assert_called_once_with(
tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1", "doc-2", "doc-3"]
)
proxy._tenant_isolated_task_queue.push_tasks.assert_not_called()
def test_send_to_default_tenant_queue(self):
"""Test _send_to_default_tenant_queue method."""
# Arrange
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_tenant_queue = Mock()
# Act
proxy._send_to_default_tenant_queue()
# Assert
proxy._send_to_tenant_queue.assert_called_once_with(proxy.NORMAL_TASK_FUNC)
def test_send_to_priority_tenant_queue(self):
"""Test _send_to_priority_tenant_queue method."""
# Arrange
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_tenant_queue = Mock()
# Act
proxy._send_to_priority_tenant_queue()
# Assert
proxy._send_to_tenant_queue.assert_called_once_with(proxy.PRIORITY_TASK_FUNC)
def test_send_to_priority_direct_queue(self):
"""Test _send_to_priority_direct_queue method."""
# Arrange
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_direct_queue = Mock()
# Act
proxy._send_to_priority_direct_queue()
# Assert
proxy._send_to_direct_queue.assert_called_once_with(proxy.PRIORITY_TASK_FUNC)
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_with_billing_enabled_sandbox_plan(self, mock_feature_service):
"""Test _dispatch method when billing is enabled with sandbox plan."""
# Arrange
mock_features = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_features(
billing_enabled=True, plan=CloudPlan.SANDBOX
)
mock_feature_service.get_features.return_value = mock_features
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_default_tenant_queue = Mock()
# Act
proxy._dispatch()
# Assert
proxy._send_to_default_tenant_queue.assert_called_once()
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_with_billing_enabled_non_sandbox_plan(self, mock_feature_service):
"""Test _dispatch method when billing is enabled with non-sandbox plan."""
# Arrange
mock_features = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_features(
billing_enabled=True, plan=CloudPlan.TEAM
)
mock_feature_service.get_features.return_value = mock_features
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_priority_tenant_queue = Mock()
# Act
proxy._dispatch()
# Assert
# If billing enabled with non sandbox plan, should send to priority tenant queue
proxy._send_to_priority_tenant_queue.assert_called_once()
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_with_billing_disabled(self, mock_feature_service):
"""Test _dispatch method when billing is disabled."""
# Arrange
mock_features = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_features(billing_enabled=False)
mock_feature_service.get_features.return_value = mock_features
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_priority_direct_queue = Mock()
# Act
proxy._dispatch()
# Assert
# If billing disabled, for example: self-hosted or enterprise, should send to priority direct queue
proxy._send_to_priority_direct_queue.assert_called_once()
@patch("services.document_indexing_proxy.base.FeatureService")
def test_delay_method(self, mock_feature_service):
"""Test delay method integration."""
# Arrange
mock_features = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_features(
billing_enabled=True, plan=CloudPlan.SANDBOX
)
mock_feature_service.get_features.return_value = mock_features
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_default_tenant_queue = Mock()
# Act
proxy.delay()
# Assert
# If billing enabled with sandbox plan, should send to default tenant queue
proxy._send_to_default_tenant_queue.assert_called_once()
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_edge_case_empty_plan(self, mock_feature_service):
"""Test _dispatch method with empty plan string."""
# Arrange
mock_features = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_features(
billing_enabled=True, plan=""
)
mock_feature_service.get_features.return_value = mock_features
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_priority_tenant_queue = Mock()
# Act
proxy._dispatch()
# Assert
proxy._send_to_priority_tenant_queue.assert_called_once()
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_edge_case_none_plan(self, mock_feature_service):
"""Test _dispatch method with None plan."""
# Arrange
mock_features = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_features(
billing_enabled=True, plan=None
)
mock_feature_service.get_features.return_value = mock_features
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_priority_tenant_queue = Mock()
# Act
proxy._dispatch()
# Assert
proxy._send_to_priority_tenant_queue.assert_called_once()
def test_initialization_with_empty_document_ids(self):
"""Test initialization with empty document_ids list."""
# Arrange
tenant_id = "tenant-123"
dataset_id = "dataset-456"
document_ids = []
# Act
proxy = DuplicateDocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids)
# Assert
assert proxy._tenant_id == tenant_id
assert proxy._dataset_id == dataset_id
assert proxy._document_ids == document_ids
def test_initialization_with_single_document_id(self):
"""Test initialization with single document_id."""
# Arrange
tenant_id = "tenant-123"
dataset_id = "dataset-456"
document_ids = ["doc-1"]
# Act
proxy = DuplicateDocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids)
# Assert
assert proxy._tenant_id == tenant_id
assert proxy._dataset_id == dataset_id
assert proxy._document_ids == document_ids
def test_initialization_with_large_batch(self):
"""Test initialization with large batch of document IDs."""
# Arrange
tenant_id = "tenant-123"
dataset_id = "dataset-456"
document_ids = [f"doc-{i}" for i in range(100)]
# Act
proxy = DuplicateDocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids)
# Assert
assert proxy._tenant_id == tenant_id
assert proxy._dataset_id == dataset_id
assert proxy._document_ids == document_ids
assert len(proxy._document_ids) == 100
@patch("services.document_indexing_proxy.base.FeatureService")
def test_dispatch_with_professional_plan(self, mock_feature_service):
"""Test _dispatch method when billing is enabled with professional plan."""
# Arrange
mock_features = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_mock_features(
billing_enabled=True, plan=CloudPlan.PROFESSIONAL
)
mock_feature_service.get_features.return_value = mock_features
proxy = DuplicateDocumentIndexingTaskProxyTestDataFactory.create_duplicate_document_task_proxy()
proxy._send_to_priority_tenant_queue = Mock()
# Act
proxy._dispatch()
# Assert
proxy._send_to_priority_tenant_queue.assert_called_once()

View File

@@ -19,7 +19,7 @@ from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan
from extensions.ext_redis import redis_client
from models.dataset import Dataset, Document
from services.document_indexing_task_proxy import DocumentIndexingTaskProxy
from services.document_indexing_proxy.document_indexing_task_proxy import DocumentIndexingTaskProxy
from tasks.document_indexing_task import (
_document_indexing,
_document_indexing_with_tenant_queue,
@@ -138,7 +138,9 @@ class TestTaskEnqueuing:
with patch.object(DocumentIndexingTaskProxy, "features") as mock_features:
mock_features.billing.enabled = False
with patch("services.document_indexing_task_proxy.priority_document_indexing_task") as mock_task:
# Mock the class variable directly
mock_task = Mock()
with patch.object(DocumentIndexingTaskProxy, "PRIORITY_TASK_FUNC", mock_task):
proxy = DocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids)
# Act
@@ -163,7 +165,9 @@ class TestTaskEnqueuing:
mock_features.billing.enabled = True
mock_features.billing.subscription.plan = CloudPlan.SANDBOX
with patch("services.document_indexing_task_proxy.normal_document_indexing_task") as mock_task:
# Mock the class variable directly
mock_task = Mock()
with patch.object(DocumentIndexingTaskProxy, "NORMAL_TASK_FUNC", mock_task):
proxy = DocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids)
# Act
@@ -187,7 +191,9 @@ class TestTaskEnqueuing:
mock_features.billing.enabled = True
mock_features.billing.subscription.plan = CloudPlan.PROFESSIONAL
with patch("services.document_indexing_task_proxy.priority_document_indexing_task") as mock_task:
# Mock the class variable directly
mock_task = Mock()
with patch.object(DocumentIndexingTaskProxy, "PRIORITY_TASK_FUNC", mock_task):
proxy = DocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids)
# Act
@@ -211,7 +217,9 @@ class TestTaskEnqueuing:
mock_features.billing.enabled = True
mock_features.billing.subscription.plan = CloudPlan.PROFESSIONAL
with patch("services.document_indexing_task_proxy.priority_document_indexing_task") as mock_task:
# Mock the class variable directly
mock_task = Mock()
with patch.object(DocumentIndexingTaskProxy, "PRIORITY_TASK_FUNC", mock_task):
proxy = DocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids)
# Act
@@ -1493,7 +1501,9 @@ class TestEdgeCases:
mock_features.billing.enabled = True
mock_features.billing.subscription.plan = CloudPlan.PROFESSIONAL
with patch("services.document_indexing_task_proxy.priority_document_indexing_task") as mock_task:
# Mock the class variable directly
mock_task = Mock()
with patch.object(DocumentIndexingTaskProxy, "PRIORITY_TASK_FUNC", mock_task):
# Act - Enqueue multiple tasks rapidly
for doc_ids in document_ids_list:
proxy = DocumentIndexingTaskProxy(tenant_id, dataset_id, doc_ids)
@@ -1898,7 +1908,7 @@ class TestRobustness:
- Error is propagated appropriately
"""
# Arrange
with patch("services.document_indexing_task_proxy.FeatureService.get_features") as mock_get_features:
with patch("services.document_indexing_proxy.base.FeatureService.get_features") as mock_get_features:
# Simulate FeatureService failure
mock_get_features.side_effect = Exception("Feature service unavailable")

View File

@@ -0,0 +1,567 @@
"""
Unit tests for duplicate document indexing tasks.
This module tests the duplicate document indexing task functionality including:
- Task enqueuing to different queues (normal, priority, tenant-isolated)
- Batch processing of multiple duplicate documents
- Progress tracking through task lifecycle
- Error handling and retry mechanisms
- Cleanup of old document data before re-indexing
"""
import uuid
from unittest.mock import MagicMock, Mock, patch
import pytest
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan
from models.dataset import Dataset, Document, DocumentSegment
from tasks.duplicate_document_indexing_task import (
_duplicate_document_indexing_task,
_duplicate_document_indexing_task_with_tenant_queue,
duplicate_document_indexing_task,
normal_duplicate_document_indexing_task,
priority_duplicate_document_indexing_task,
)
# ============================================================================
# Fixtures
# ============================================================================
@pytest.fixture
def tenant_id():
"""Generate a unique tenant ID for testing."""
return str(uuid.uuid4())
@pytest.fixture
def dataset_id():
"""Generate a unique dataset ID for testing."""
return str(uuid.uuid4())
@pytest.fixture
def document_ids():
"""Generate a list of document IDs for testing."""
return [str(uuid.uuid4()) for _ in range(3)]
@pytest.fixture
def mock_dataset(dataset_id, tenant_id):
"""Create a mock Dataset object."""
dataset = Mock(spec=Dataset)
dataset.id = dataset_id
dataset.tenant_id = tenant_id
dataset.indexing_technique = "high_quality"
dataset.embedding_model_provider = "openai"
dataset.embedding_model = "text-embedding-ada-002"
return dataset
@pytest.fixture
def mock_documents(document_ids, dataset_id):
"""Create mock Document objects."""
documents = []
for doc_id in document_ids:
doc = Mock(spec=Document)
doc.id = doc_id
doc.dataset_id = dataset_id
doc.indexing_status = "waiting"
doc.error = None
doc.stopped_at = None
doc.processing_started_at = None
doc.doc_form = "text_model"
documents.append(doc)
return documents
@pytest.fixture
def mock_document_segments(document_ids):
"""Create mock DocumentSegment objects."""
segments = []
for doc_id in document_ids:
for i in range(3):
segment = Mock(spec=DocumentSegment)
segment.id = str(uuid.uuid4())
segment.document_id = doc_id
segment.index_node_id = f"node-{doc_id}-{i}"
segments.append(segment)
return segments
@pytest.fixture
def mock_db_session():
"""Mock database session."""
with patch("tasks.duplicate_document_indexing_task.db.session") as mock_session:
mock_query = MagicMock()
mock_session.query.return_value = mock_query
mock_query.where.return_value = mock_query
mock_session.scalars.return_value = MagicMock()
yield mock_session
@pytest.fixture
def mock_indexing_runner():
"""Mock IndexingRunner."""
with patch("tasks.duplicate_document_indexing_task.IndexingRunner") as mock_runner_class:
mock_runner = MagicMock(spec=IndexingRunner)
mock_runner_class.return_value = mock_runner
yield mock_runner
@pytest.fixture
def mock_feature_service():
"""Mock FeatureService."""
with patch("tasks.duplicate_document_indexing_task.FeatureService") as mock_service:
mock_features = Mock()
mock_features.billing = Mock()
mock_features.billing.enabled = False
mock_features.vector_space = Mock()
mock_features.vector_space.size = 0
mock_features.vector_space.limit = 1000
mock_service.get_features.return_value = mock_features
yield mock_service
@pytest.fixture
def mock_index_processor_factory():
"""Mock IndexProcessorFactory."""
with patch("tasks.duplicate_document_indexing_task.IndexProcessorFactory") as mock_factory:
mock_processor = MagicMock()
mock_processor.clean = Mock()
mock_factory.return_value.init_index_processor.return_value = mock_processor
yield mock_factory
@pytest.fixture
def mock_tenant_isolated_queue():
"""Mock TenantIsolatedTaskQueue."""
with patch("tasks.duplicate_document_indexing_task.TenantIsolatedTaskQueue") as mock_queue_class:
mock_queue = MagicMock(spec=TenantIsolatedTaskQueue)
mock_queue.pull_tasks.return_value = []
mock_queue.delete_task_key = Mock()
mock_queue.set_task_waiting_time = Mock()
mock_queue_class.return_value = mock_queue
yield mock_queue
# ============================================================================
# Tests for deprecated duplicate_document_indexing_task
# ============================================================================
class TestDuplicateDocumentIndexingTask:
"""Tests for the deprecated duplicate_document_indexing_task function."""
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task")
def test_duplicate_document_indexing_task_calls_core_function(self, mock_core_func, dataset_id, document_ids):
"""Test that duplicate_document_indexing_task calls the core _duplicate_document_indexing_task function."""
# Act
duplicate_document_indexing_task(dataset_id, document_ids)
# Assert
mock_core_func.assert_called_once_with(dataset_id, document_ids)
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task")
def test_duplicate_document_indexing_task_with_empty_document_ids(self, mock_core_func, dataset_id):
"""Test duplicate_document_indexing_task with empty document_ids list."""
# Arrange
document_ids = []
# Act
duplicate_document_indexing_task(dataset_id, document_ids)
# Assert
mock_core_func.assert_called_once_with(dataset_id, document_ids)
# ============================================================================
# Tests for _duplicate_document_indexing_task core function
# ============================================================================
class TestDuplicateDocumentIndexingTaskCore:
"""Tests for the _duplicate_document_indexing_task core function."""
def test_successful_duplicate_document_indexing(
self,
mock_db_session,
mock_indexing_runner,
mock_feature_service,
mock_index_processor_factory,
mock_dataset,
mock_documents,
mock_document_segments,
dataset_id,
document_ids,
):
"""Test successful duplicate document indexing flow."""
# Arrange
mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_dataset] + mock_documents
mock_db_session.scalars.return_value.all.return_value = mock_document_segments
# Act
_duplicate_document_indexing_task(dataset_id, document_ids)
# Assert
# Verify IndexingRunner was called
mock_indexing_runner.run.assert_called_once()
# Verify all documents were set to parsing status
for doc in mock_documents:
assert doc.indexing_status == "parsing"
assert doc.processing_started_at is not None
# Verify session operations
assert mock_db_session.commit.called
assert mock_db_session.close.called
def test_duplicate_document_indexing_dataset_not_found(self, mock_db_session, dataset_id, document_ids):
"""Test duplicate document indexing when dataset is not found."""
# Arrange
mock_db_session.query.return_value.where.return_value.first.return_value = None
# Act
_duplicate_document_indexing_task(dataset_id, document_ids)
# Assert
# Should close the session at least once
assert mock_db_session.close.called
def test_duplicate_document_indexing_with_billing_enabled_sandbox_plan(
self,
mock_db_session,
mock_feature_service,
mock_dataset,
dataset_id,
document_ids,
):
"""Test duplicate document indexing with billing enabled and sandbox plan."""
# Arrange
mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
mock_features = mock_feature_service.get_features.return_value
mock_features.billing.enabled = True
mock_features.billing.subscription.plan = CloudPlan.SANDBOX
# Act
_duplicate_document_indexing_task(dataset_id, document_ids)
# Assert
# For sandbox plan with multiple documents, should fail
mock_db_session.commit.assert_called()
def test_duplicate_document_indexing_with_billing_limit_exceeded(
self,
mock_db_session,
mock_feature_service,
mock_dataset,
mock_documents,
dataset_id,
document_ids,
):
"""Test duplicate document indexing when billing limit is exceeded."""
# Arrange
mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_dataset] + mock_documents
mock_db_session.scalars.return_value.all.return_value = [] # No segments to clean
mock_features = mock_feature_service.get_features.return_value
mock_features.billing.enabled = True
mock_features.billing.subscription.plan = CloudPlan.TEAM
mock_features.vector_space.size = 990
mock_features.vector_space.limit = 1000
# Act
_duplicate_document_indexing_task(dataset_id, document_ids)
# Assert
# Should commit the session
assert mock_db_session.commit.called
# Should close the session
assert mock_db_session.close.called
def test_duplicate_document_indexing_runner_error(
self,
mock_db_session,
mock_indexing_runner,
mock_feature_service,
mock_index_processor_factory,
mock_dataset,
mock_documents,
dataset_id,
document_ids,
):
"""Test duplicate document indexing when IndexingRunner raises an error."""
# Arrange
mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_dataset] + mock_documents
mock_db_session.scalars.return_value.all.return_value = []
mock_indexing_runner.run.side_effect = Exception("Indexing error")
# Act
_duplicate_document_indexing_task(dataset_id, document_ids)
# Assert
# Should close the session even after error
mock_db_session.close.assert_called_once()
def test_duplicate_document_indexing_document_is_paused(
self,
mock_db_session,
mock_indexing_runner,
mock_feature_service,
mock_index_processor_factory,
mock_dataset,
mock_documents,
dataset_id,
document_ids,
):
"""Test duplicate document indexing when document is paused."""
# Arrange
mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_dataset] + mock_documents
mock_db_session.scalars.return_value.all.return_value = []
mock_indexing_runner.run.side_effect = DocumentIsPausedError("Document paused")
# Act
_duplicate_document_indexing_task(dataset_id, document_ids)
# Assert
# Should handle DocumentIsPausedError gracefully
mock_db_session.close.assert_called_once()
def test_duplicate_document_indexing_cleans_old_segments(
self,
mock_db_session,
mock_indexing_runner,
mock_feature_service,
mock_index_processor_factory,
mock_dataset,
mock_documents,
mock_document_segments,
dataset_id,
document_ids,
):
"""Test that duplicate document indexing cleans old segments."""
# Arrange
mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_dataset] + mock_documents
mock_db_session.scalars.return_value.all.return_value = mock_document_segments
mock_processor = mock_index_processor_factory.return_value.init_index_processor.return_value
# Act
_duplicate_document_indexing_task(dataset_id, document_ids)
# Assert
# Verify clean was called for each document
assert mock_processor.clean.call_count == len(mock_documents)
# Verify segments were deleted
for segment in mock_document_segments:
mock_db_session.delete.assert_any_call(segment)
# ============================================================================
# Tests for tenant queue wrapper function
# ============================================================================
class TestDuplicateDocumentIndexingTaskWithTenantQueue:
"""Tests for _duplicate_document_indexing_task_with_tenant_queue function."""
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task")
def test_tenant_queue_wrapper_calls_core_function(
self,
mock_core_func,
mock_tenant_isolated_queue,
tenant_id,
dataset_id,
document_ids,
):
"""Test that tenant queue wrapper calls the core function."""
# Arrange
mock_task_func = Mock()
# Act
_duplicate_document_indexing_task_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task_func)
# Assert
mock_core_func.assert_called_once_with(dataset_id, document_ids)
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task")
def test_tenant_queue_wrapper_deletes_key_when_no_tasks(
self,
mock_core_func,
mock_tenant_isolated_queue,
tenant_id,
dataset_id,
document_ids,
):
"""Test that tenant queue wrapper deletes task key when no more tasks."""
# Arrange
mock_task_func = Mock()
mock_tenant_isolated_queue.pull_tasks.return_value = []
# Act
_duplicate_document_indexing_task_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task_func)
# Assert
mock_tenant_isolated_queue.delete_task_key.assert_called_once()
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task")
def test_tenant_queue_wrapper_processes_next_tasks(
self,
mock_core_func,
mock_tenant_isolated_queue,
tenant_id,
dataset_id,
document_ids,
):
"""Test that tenant queue wrapper processes next tasks from queue."""
# Arrange
mock_task_func = Mock()
next_task = {
"tenant_id": tenant_id,
"dataset_id": dataset_id,
"document_ids": document_ids,
}
mock_tenant_isolated_queue.pull_tasks.return_value = [next_task]
# Act
_duplicate_document_indexing_task_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task_func)
# Assert
mock_tenant_isolated_queue.set_task_waiting_time.assert_called_once()
mock_task_func.delay.assert_called_once_with(
tenant_id=tenant_id,
dataset_id=dataset_id,
document_ids=document_ids,
)
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task")
def test_tenant_queue_wrapper_handles_core_function_error(
self,
mock_core_func,
mock_tenant_isolated_queue,
tenant_id,
dataset_id,
document_ids,
):
"""Test that tenant queue wrapper handles errors from core function."""
# Arrange
mock_task_func = Mock()
mock_core_func.side_effect = Exception("Core function error")
# Act
_duplicate_document_indexing_task_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task_func)
# Assert
# Should still check for next tasks even after error
mock_tenant_isolated_queue.pull_tasks.assert_called_once()
# ============================================================================
# Tests for normal_duplicate_document_indexing_task
# ============================================================================
class TestNormalDuplicateDocumentIndexingTask:
"""Tests for normal_duplicate_document_indexing_task function."""
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task_with_tenant_queue")
def test_normal_task_calls_tenant_queue_wrapper(
self,
mock_wrapper_func,
tenant_id,
dataset_id,
document_ids,
):
"""Test that normal task calls tenant queue wrapper."""
# Act
normal_duplicate_document_indexing_task(tenant_id, dataset_id, document_ids)
# Assert
mock_wrapper_func.assert_called_once_with(
tenant_id, dataset_id, document_ids, normal_duplicate_document_indexing_task
)
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task_with_tenant_queue")
def test_normal_task_with_empty_document_ids(
self,
mock_wrapper_func,
tenant_id,
dataset_id,
):
"""Test normal task with empty document_ids list."""
# Arrange
document_ids = []
# Act
normal_duplicate_document_indexing_task(tenant_id, dataset_id, document_ids)
# Assert
mock_wrapper_func.assert_called_once_with(
tenant_id, dataset_id, document_ids, normal_duplicate_document_indexing_task
)
# ============================================================================
# Tests for priority_duplicate_document_indexing_task
# ============================================================================
class TestPriorityDuplicateDocumentIndexingTask:
"""Tests for priority_duplicate_document_indexing_task function."""
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task_with_tenant_queue")
def test_priority_task_calls_tenant_queue_wrapper(
self,
mock_wrapper_func,
tenant_id,
dataset_id,
document_ids,
):
"""Test that priority task calls tenant queue wrapper."""
# Act
priority_duplicate_document_indexing_task(tenant_id, dataset_id, document_ids)
# Assert
mock_wrapper_func.assert_called_once_with(
tenant_id, dataset_id, document_ids, priority_duplicate_document_indexing_task
)
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task_with_tenant_queue")
def test_priority_task_with_single_document(
self,
mock_wrapper_func,
tenant_id,
dataset_id,
):
"""Test priority task with single document."""
# Arrange
document_ids = ["doc-1"]
# Act
priority_duplicate_document_indexing_task(tenant_id, dataset_id, document_ids)
# Assert
mock_wrapper_func.assert_called_once_with(
tenant_id, dataset_id, document_ids, priority_duplicate_document_indexing_task
)
@patch("tasks.duplicate_document_indexing_task._duplicate_document_indexing_task_with_tenant_queue")
def test_priority_task_with_large_batch(
self,
mock_wrapper_func,
tenant_id,
dataset_id,
):
"""Test priority task with large batch of documents."""
# Arrange
document_ids = [f"doc-{i}" for i in range(100)]
# Act
priority_duplicate_document_indexing_task(tenant_id, dataset_id, document_ids)
# Assert
mock_wrapper_func.assert_called_once_with(
tenant_id, dataset_id, document_ids, priority_duplicate_document_indexing_task
)