Feat/support multimodal embedding (#29115)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Jyong
2025-12-09 14:41:46 +08:00
committed by GitHub
parent 77cf8f6c27
commit 9affc546c6
78 changed files with 3230 additions and 713 deletions

View File

@@ -3,7 +3,7 @@ from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from core.rag.index_processor.constant.index_type import IndexType
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
@@ -95,7 +95,7 @@ class TestAddDocumentToIndexTask:
created_by=account.id,
indexing_status="completed",
enabled=True,
doc_form=IndexType.PARAGRAPH_INDEX,
doc_form=IndexStructureType.PARAGRAPH_INDEX,
)
db.session.add(document)
db.session.commit()
@@ -172,7 +172,9 @@ class TestAddDocumentToIndexTask:
# Assert: Verify the expected outcomes
# Verify index processor was called correctly
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexStructureType.PARAGRAPH_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify database state changes
@@ -204,7 +206,7 @@ class TestAddDocumentToIndexTask:
)
# Update document to use different index type
document.doc_form = IndexType.QA_INDEX
document.doc_form = IndexStructureType.QA_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
@@ -221,7 +223,9 @@ class TestAddDocumentToIndexTask:
add_document_to_index_task(document.id)
# Assert: Verify different index type handling
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.QA_INDEX)
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexStructureType.QA_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
@@ -360,7 +364,7 @@ class TestAddDocumentToIndexTask:
)
# Update document to use parent-child index type
document.doc_form = IndexType.PARENT_CHILD_INDEX
document.doc_form = IndexStructureType.PARENT_CHILD_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
@@ -391,7 +395,7 @@ class TestAddDocumentToIndexTask:
# Assert: Verify parent-child index processing
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexType.PARENT_CHILD_INDEX
IndexStructureType.PARENT_CHILD_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
@@ -465,8 +469,10 @@ class TestAddDocumentToIndexTask:
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify index processing occurred with all completed segments
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
# Assert: Verify index processing occurred but with empty documents list
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexStructureType.PARAGRAPH_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with all completed segments
@@ -532,7 +538,9 @@ class TestAddDocumentToIndexTask:
assert len(remaining_logs) == 0
# Verify index processing occurred normally
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexStructureType.PARAGRAPH_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify segments were enabled
@@ -699,7 +707,9 @@ class TestAddDocumentToIndexTask:
add_document_to_index_task(document.id)
# Assert: Verify only eligible segments were processed
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexStructureType.PARAGRAPH_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters

View File

@@ -12,7 +12,7 @@ from unittest.mock import MagicMock, patch
from faker import Faker
from core.rag.index_processor.constant.index_type import IndexType
from core.rag.index_processor.constant.index_type import IndexStructureType
from models import Account, Dataset, Document, DocumentSegment, Tenant
from tasks.delete_segment_from_index_task import delete_segment_from_index_task
@@ -164,7 +164,7 @@ class TestDeleteSegmentFromIndexTask:
document.updated_at = fake.date_time_this_year()
document.doc_type = kwargs.get("doc_type", "text")
document.doc_metadata = kwargs.get("doc_metadata", {})
document.doc_form = kwargs.get("doc_form", IndexType.PARAGRAPH_INDEX)
document.doc_form = kwargs.get("doc_form", IndexStructureType.PARAGRAPH_INDEX)
document.doc_language = kwargs.get("doc_language", "en")
db_session_with_containers.add(document)
@@ -244,8 +244,11 @@ class TestDeleteSegmentFromIndexTask:
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Extract segment IDs for the task
segment_ids = [segment.id for segment in segments]
# Execute the task
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id, segment_ids)
# Verify the task completed successfully
assert result is None # Task should return None on success
@@ -279,7 +282,7 @@ class TestDeleteSegmentFromIndexTask:
index_node_ids = [f"node_{fake.uuid4()}" for _ in range(3)]
# Execute the task with non-existent dataset
result = delete_segment_from_index_task(index_node_ids, non_existent_dataset_id, non_existent_document_id)
result = delete_segment_from_index_task(index_node_ids, non_existent_dataset_id, non_existent_document_id, [])
# Verify the task completed without exceptions
assert result is None # Task should return None when dataset not found
@@ -305,7 +308,7 @@ class TestDeleteSegmentFromIndexTask:
index_node_ids = [f"node_{fake.uuid4()}" for _ in range(3)]
# Execute the task with non-existent document
result = delete_segment_from_index_task(index_node_ids, dataset.id, non_existent_document_id)
result = delete_segment_from_index_task(index_node_ids, dataset.id, non_existent_document_id, [])
# Verify the task completed without exceptions
assert result is None # Task should return None when document not found
@@ -330,9 +333,10 @@ class TestDeleteSegmentFromIndexTask:
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
index_node_ids = [segment.index_node_id for segment in segments]
segment_ids = [segment.id for segment in segments]
# Execute the task with disabled document
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id, segment_ids)
# Verify the task completed without exceptions
assert result is None # Task should return None when document is disabled
@@ -357,9 +361,10 @@ class TestDeleteSegmentFromIndexTask:
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
index_node_ids = [segment.index_node_id for segment in segments]
segment_ids = [segment.id for segment in segments]
# Execute the task with archived document
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id, segment_ids)
# Verify the task completed without exceptions
assert result is None # Task should return None when document is archived
@@ -386,9 +391,10 @@ class TestDeleteSegmentFromIndexTask:
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
index_node_ids = [segment.index_node_id for segment in segments]
segment_ids = [segment.id for segment in segments]
# Execute the task with incomplete indexing
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id, segment_ids)
# Verify the task completed without exceptions
assert result is None # Task should return None when indexing is not completed
@@ -409,7 +415,11 @@ class TestDeleteSegmentFromIndexTask:
fake = Faker()
# Test different document forms
document_forms = [IndexType.PARAGRAPH_INDEX, IndexType.QA_INDEX, IndexType.PARENT_CHILD_INDEX]
document_forms = [
IndexStructureType.PARAGRAPH_INDEX,
IndexStructureType.QA_INDEX,
IndexStructureType.PARENT_CHILD_INDEX,
]
for doc_form in document_forms:
# Create test data for each document form
@@ -420,13 +430,14 @@ class TestDeleteSegmentFromIndexTask:
segments = self._create_test_document_segments(db_session_with_containers, document, account, 2, fake)
index_node_ids = [segment.index_node_id for segment in segments]
segment_ids = [segment.id for segment in segments]
# Mock the index processor
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Execute the task
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id, segment_ids)
# Verify the task completed successfully
assert result is None
@@ -469,6 +480,7 @@ class TestDeleteSegmentFromIndexTask:
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
index_node_ids = [segment.index_node_id for segment in segments]
segment_ids = [segment.id for segment in segments]
# Mock the index processor to raise an exception
mock_processor = MagicMock()
@@ -476,7 +488,7 @@ class TestDeleteSegmentFromIndexTask:
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Execute the task - should not raise exception
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id, segment_ids)
# Verify the task completed without raising exceptions
assert result is None # Task should return None even when exceptions occur
@@ -518,7 +530,7 @@ class TestDeleteSegmentFromIndexTask:
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Execute the task
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id, [])
# Verify the task completed successfully
assert result is None
@@ -555,13 +567,14 @@ class TestDeleteSegmentFromIndexTask:
# Create large number of segments
segments = self._create_test_document_segments(db_session_with_containers, document, account, 50, fake)
index_node_ids = [segment.index_node_id for segment in segments]
segment_ids = [segment.id for segment in segments]
# Mock the index processor
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Execute the task
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id, segment_ids)
# Verify the task completed successfully
assert result is None

View File

@@ -3,7 +3,7 @@ from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from core.rag.index_processor.constant.index_type import IndexType
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
@@ -95,7 +95,7 @@ class TestEnableSegmentsToIndexTask:
created_by=account.id,
indexing_status="completed",
enabled=True,
doc_form=IndexType.PARAGRAPH_INDEX,
doc_form=IndexStructureType.PARAGRAPH_INDEX,
)
db.session.add(document)
db.session.commit()
@@ -166,7 +166,7 @@ class TestEnableSegmentsToIndexTask:
)
# Update document to use different index type
document.doc_form = IndexType.QA_INDEX
document.doc_form = IndexStructureType.QA_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
@@ -185,7 +185,9 @@ class TestEnableSegmentsToIndexTask:
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify different index type handling
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.QA_INDEX)
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexStructureType.QA_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
@@ -328,7 +330,9 @@ class TestEnableSegmentsToIndexTask:
enable_segments_to_index_task(non_existent_segment_ids, dataset.id, document.id)
# Assert: Verify index processor was created but load was not called
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexStructureType.PARAGRAPH_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_not_called()
def test_enable_segments_to_index_with_parent_child_structure(
@@ -350,7 +354,7 @@ class TestEnableSegmentsToIndexTask:
)
# Update document to use parent-child index type
document.doc_form = IndexType.PARENT_CHILD_INDEX
document.doc_form = IndexStructureType.PARENT_CHILD_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
@@ -383,7 +387,7 @@ class TestEnableSegmentsToIndexTask:
# Assert: Verify parent-child index processing
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexType.PARENT_CHILD_INDEX
IndexStructureType.PARENT_CHILD_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()