chore: tablestore full text search support score normalization (#23255)

Co-authored-by: xiaozhiqing.xzq <xiaozhiqing.xzq@alibaba-inc.com>
This commit is contained in:
wanttobeamaster
2025-08-01 14:14:11 +08:00
committed by GitHub
parent c33741a5e9
commit da5c003f97
6 changed files with 63 additions and 7 deletions

View File

@@ -2,6 +2,7 @@ import os
import uuid
import tablestore
from _pytest.python_api import approx
from core.rag.datasource.vdb.tablestore.tablestore_vector import (
TableStoreConfig,
@@ -16,7 +17,7 @@ from tests.integration_tests.vdb.test_vector_store import (
class TableStoreVectorTest(AbstractVectorTest):
def __init__(self):
def __init__(self, normalize_full_text_score: bool = False):
super().__init__()
self.vector = TableStoreVector(
collection_name=self.collection_name,
@@ -25,6 +26,7 @@ class TableStoreVectorTest(AbstractVectorTest):
instance_name=os.getenv("TABLESTORE_INSTANCE_NAME"),
access_key_id=os.getenv("TABLESTORE_ACCESS_KEY_ID"),
access_key_secret=os.getenv("TABLESTORE_ACCESS_KEY_SECRET"),
normalize_full_text_bm25_score=normalize_full_text_score,
),
)
@@ -64,7 +66,21 @@ class TableStoreVectorTest(AbstractVectorTest):
docs = self.vector.search_by_full_text(get_example_text(), document_ids_filter=[self.example_doc_id])
assert len(docs) == 1
assert docs[0].metadata["doc_id"] == self.example_doc_id
assert not hasattr(docs[0], "score")
if self.vector._config.normalize_full_text_bm25_score:
assert docs[0].metadata["score"] == approx(0.1214, abs=1e-3)
else:
assert docs[0].metadata.get("score") is None
# return none if normalize_full_text_score=true and score_threshold > 0
docs = self.vector.search_by_full_text(
get_example_text(), document_ids_filter=[self.example_doc_id], score_threshold=0.5
)
if self.vector._config.normalize_full_text_bm25_score:
assert len(docs) == 0
else:
assert len(docs) == 1
assert docs[0].metadata["doc_id"] == self.example_doc_id
assert docs[0].metadata.get("score") is None
docs = self.vector.search_by_full_text(get_example_text(), document_ids_filter=[str(uuid.uuid4())])
assert len(docs) == 0
@@ -80,3 +96,5 @@ class TableStoreVectorTest(AbstractVectorTest):
def test_tablestore_vector(setup_mock_redis):
TableStoreVectorTest().run_all_tests()
TableStoreVectorTest(normalize_full_text_score=True).run_all_tests()
TableStoreVectorTest(normalize_full_text_score=False).run_all_tests()