feat: enable tenant isolation on duplicate document indexing tasks (#29080)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
11
api/services/document_indexing_proxy/__init__.py
Normal file
11
api/services/document_indexing_proxy/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from .base import DocumentTaskProxyBase
|
||||
from .batch_indexing_base import BatchDocumentIndexingProxy
|
||||
from .document_indexing_task_proxy import DocumentIndexingTaskProxy
|
||||
from .duplicate_document_indexing_task_proxy import DuplicateDocumentIndexingTaskProxy
|
||||
|
||||
__all__ = [
|
||||
"BatchDocumentIndexingProxy",
|
||||
"DocumentIndexingTaskProxy",
|
||||
"DocumentTaskProxyBase",
|
||||
"DuplicateDocumentIndexingTaskProxy",
|
||||
]
|
||||
111
api/services/document_indexing_proxy/base.py
Normal file
111
api/services/document_indexing_proxy/base.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from functools import cached_property
|
||||
from typing import Any, ClassVar
|
||||
|
||||
from enums.cloud_plan import CloudPlan
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DocumentTaskProxyBase(ABC):
|
||||
"""
|
||||
Base proxy for all document processing tasks.
|
||||
|
||||
Handles common logic:
|
||||
- Feature/billing checks
|
||||
- Dispatch routing based on plan
|
||||
|
||||
Subclasses must define:
|
||||
- QUEUE_NAME: Redis queue identifier
|
||||
- NORMAL_TASK_FUNC: Task function for normal priority
|
||||
- PRIORITY_TASK_FUNC: Task function for high priority
|
||||
"""
|
||||
|
||||
QUEUE_NAME: ClassVar[str]
|
||||
NORMAL_TASK_FUNC: ClassVar[Callable[..., Any]]
|
||||
PRIORITY_TASK_FUNC: ClassVar[Callable[..., Any]]
|
||||
|
||||
def __init__(self, tenant_id: str, dataset_id: str):
|
||||
"""
|
||||
Initialize with minimal required parameters.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for billing/features
|
||||
dataset_id: Dataset identifier for logging
|
||||
"""
|
||||
self._tenant_id = tenant_id
|
||||
self._dataset_id = dataset_id
|
||||
|
||||
@cached_property
|
||||
def features(self):
|
||||
return FeatureService.get_features(self._tenant_id)
|
||||
|
||||
@abstractmethod
|
||||
def _send_to_direct_queue(self, task_func: Callable[..., Any]):
|
||||
"""
|
||||
Send task directly to Celery queue without tenant isolation.
|
||||
|
||||
Subclasses implement this to pass task-specific parameters.
|
||||
|
||||
Args:
|
||||
task_func: The Celery task function to call
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _send_to_tenant_queue(self, task_func: Callable[..., Any]):
|
||||
"""
|
||||
Send task to tenant-isolated queue.
|
||||
|
||||
Subclasses implement this to handle queue management.
|
||||
|
||||
Args:
|
||||
task_func: The Celery task function to call
|
||||
"""
|
||||
pass
|
||||
|
||||
def _send_to_default_tenant_queue(self):
|
||||
"""Route to normal priority with tenant isolation."""
|
||||
self._send_to_tenant_queue(self.NORMAL_TASK_FUNC)
|
||||
|
||||
def _send_to_priority_tenant_queue(self):
|
||||
"""Route to priority queue with tenant isolation."""
|
||||
self._send_to_tenant_queue(self.PRIORITY_TASK_FUNC)
|
||||
|
||||
def _send_to_priority_direct_queue(self):
|
||||
"""Route to priority queue without tenant isolation."""
|
||||
self._send_to_direct_queue(self.PRIORITY_TASK_FUNC)
|
||||
|
||||
def _dispatch(self):
|
||||
"""
|
||||
Dispatch task based on billing plan.
|
||||
|
||||
Routing logic:
|
||||
- Sandbox plan → normal queue + tenant isolation
|
||||
- Paid plans → priority queue + tenant isolation
|
||||
- Self-hosted → priority queue, no isolation
|
||||
"""
|
||||
logger.info(
|
||||
"dispatch args: %s - %s - %s",
|
||||
self._tenant_id,
|
||||
self.features.billing.enabled,
|
||||
self.features.billing.subscription.plan,
|
||||
)
|
||||
# dispatch to different indexing queue with tenant isolation when billing enabled
|
||||
if self.features.billing.enabled:
|
||||
if self.features.billing.subscription.plan == CloudPlan.SANDBOX:
|
||||
# dispatch to normal pipeline queue with tenant self sub queue for sandbox plan
|
||||
self._send_to_default_tenant_queue()
|
||||
else:
|
||||
# dispatch to priority pipeline queue with tenant self sub queue for other plans
|
||||
self._send_to_priority_tenant_queue()
|
||||
else:
|
||||
# dispatch to priority queue without tenant isolation for others, e.g.: self-hosted or enterprise
|
||||
self._send_to_priority_direct_queue()
|
||||
|
||||
def delay(self):
|
||||
"""Public API: Queue the task asynchronously."""
|
||||
self._dispatch()
|
||||
76
api/services/document_indexing_proxy/batch_indexing_base.py
Normal file
76
api/services/document_indexing_proxy/batch_indexing_base.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import logging
|
||||
from collections.abc import Callable, Sequence
|
||||
from dataclasses import asdict
|
||||
from typing import Any
|
||||
|
||||
from core.entities.document_task import DocumentTask
|
||||
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
||||
|
||||
from .base import DocumentTaskProxyBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BatchDocumentIndexingProxy(DocumentTaskProxyBase):
|
||||
"""
|
||||
Base proxy for batch document indexing tasks (document_ids in plural).
|
||||
|
||||
Adds:
|
||||
- Tenant isolated queue management
|
||||
- Batch document handling
|
||||
"""
|
||||
|
||||
def __init__(self, tenant_id: str, dataset_id: str, document_ids: Sequence[str]):
|
||||
"""
|
||||
Initialize with batch documents.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
dataset_id: Dataset identifier
|
||||
document_ids: List of document IDs to process
|
||||
"""
|
||||
super().__init__(tenant_id, dataset_id)
|
||||
self._document_ids = document_ids
|
||||
self._tenant_isolated_task_queue = TenantIsolatedTaskQueue(tenant_id, self.QUEUE_NAME)
|
||||
|
||||
def _send_to_direct_queue(self, task_func: Callable[[str, str, Sequence[str]], Any]):
|
||||
"""
|
||||
Send batch task to direct queue.
|
||||
|
||||
Args:
|
||||
task_func: The Celery task function to call with (tenant_id, dataset_id, document_ids)
|
||||
"""
|
||||
logger.info("tenant %s send documents %s to direct queue", self._tenant_id, self._document_ids)
|
||||
task_func.delay( # type: ignore
|
||||
tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids
|
||||
)
|
||||
|
||||
def _send_to_tenant_queue(self, task_func: Callable[[str, str, Sequence[str]], Any]):
|
||||
"""
|
||||
Send batch task to tenant-isolated queue.
|
||||
|
||||
Args:
|
||||
task_func: The Celery task function to call with (tenant_id, dataset_id, document_ids)
|
||||
"""
|
||||
logger.info(
|
||||
"tenant %s send documents %s to tenant queue %s", self._tenant_id, self._document_ids, self.QUEUE_NAME
|
||||
)
|
||||
if self._tenant_isolated_task_queue.get_task_key():
|
||||
# Add to waiting queue using List operations (lpush)
|
||||
self._tenant_isolated_task_queue.push_tasks(
|
||||
[
|
||||
asdict(
|
||||
DocumentTask(
|
||||
tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
logger.info("tenant %s push tasks: %s - %s", self._tenant_id, self._dataset_id, self._document_ids)
|
||||
else:
|
||||
# Set flag and execute task
|
||||
self._tenant_isolated_task_queue.set_task_waiting_time()
|
||||
task_func.delay( # type: ignore
|
||||
tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids
|
||||
)
|
||||
logger.info("tenant %s init tasks: %s - %s", self._tenant_id, self._dataset_id, self._document_ids)
|
||||
@@ -0,0 +1,12 @@
|
||||
from typing import ClassVar
|
||||
|
||||
from services.document_indexing_proxy.batch_indexing_base import BatchDocumentIndexingProxy
|
||||
from tasks.document_indexing_task import normal_document_indexing_task, priority_document_indexing_task
|
||||
|
||||
|
||||
class DocumentIndexingTaskProxy(BatchDocumentIndexingProxy):
|
||||
"""Proxy for document indexing tasks."""
|
||||
|
||||
QUEUE_NAME: ClassVar[str] = "document_indexing"
|
||||
NORMAL_TASK_FUNC = normal_document_indexing_task
|
||||
PRIORITY_TASK_FUNC = priority_document_indexing_task
|
||||
@@ -0,0 +1,15 @@
|
||||
from typing import ClassVar
|
||||
|
||||
from services.document_indexing_proxy.batch_indexing_base import BatchDocumentIndexingProxy
|
||||
from tasks.duplicate_document_indexing_task import (
|
||||
normal_duplicate_document_indexing_task,
|
||||
priority_duplicate_document_indexing_task,
|
||||
)
|
||||
|
||||
|
||||
class DuplicateDocumentIndexingTaskProxy(BatchDocumentIndexingProxy):
|
||||
"""Proxy for duplicate document indexing tasks."""
|
||||
|
||||
QUEUE_NAME: ClassVar[str] = "duplicate_document_indexing"
|
||||
NORMAL_TASK_FUNC = normal_duplicate_document_indexing_task
|
||||
PRIORITY_TASK_FUNC = priority_duplicate_document_indexing_task
|
||||
Reference in New Issue
Block a user