make logging not use f-str, change others to f-str (#22882)
This commit is contained in:
@@ -203,9 +203,9 @@ class BaiduVector(BaseVector):
|
||||
|
||||
def _create_table(self, dimension: int) -> None:
|
||||
# Try to grab distributed lock and create table
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=60):
|
||||
table_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
table_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(table_exist_cache_key):
|
||||
return
|
||||
|
||||
|
||||
@@ -57,9 +57,9 @@ class ChromaVector(BaseVector):
|
||||
self.add_texts(texts, embeddings, **kwargs)
|
||||
|
||||
def create_collection(self, collection_name: str):
|
||||
lock_name = "vector_indexing_lock_{}".format(collection_name)
|
||||
lock_name = f"vector_indexing_lock_{collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
self._client.get_or_create_collection(collection_name)
|
||||
|
||||
@@ -74,9 +74,9 @@ class CouchbaseVector(BaseVector):
|
||||
self.add_texts(texts, embeddings)
|
||||
|
||||
def _create_collection(self, vector_length: int, uuid: str):
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
if self._collection_exists(self._collection_name):
|
||||
@@ -242,7 +242,7 @@ class CouchbaseVector(BaseVector):
|
||||
try:
|
||||
self._cluster.query(query, named_parameters={"doc_ids": ids}).execute()
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to delete documents, ids: {ids}")
|
||||
logger.exception("Failed to delete documents, ids: %s", ids)
|
||||
|
||||
def delete_by_document_id(self, document_id: str):
|
||||
query = f"""
|
||||
|
||||
@@ -29,7 +29,7 @@ class ElasticSearchJaVector(ElasticSearchVector):
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
logger.info(f"Collection {self._collection_name} already exists.")
|
||||
logger.info("Collection %s already exists.", self._collection_name)
|
||||
return
|
||||
|
||||
if not self._client.indices.exists(index=self._collection_name):
|
||||
|
||||
@@ -186,7 +186,7 @@ class ElasticSearchVector(BaseVector):
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
logger.info(f"Collection {self._collection_name} already exists.")
|
||||
logger.info("Collection %s already exists.", self._collection_name)
|
||||
return
|
||||
|
||||
if not self._client.indices.exists(index=self._collection_name):
|
||||
|
||||
@@ -164,7 +164,7 @@ class HuaweiCloudVector(BaseVector):
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
logger.info(f"Collection {self._collection_name} already exists.")
|
||||
logger.info("Collection %s already exists.", self._collection_name)
|
||||
return
|
||||
|
||||
if not self._client.indices.exists(index=self._collection_name):
|
||||
|
||||
@@ -89,7 +89,7 @@ class LindormVectorStore(BaseVector):
|
||||
timeout: int = 60,
|
||||
**kwargs,
|
||||
):
|
||||
logger.info(f"Total documents to add: {len(documents)}")
|
||||
logger.info("Total documents to add: %s", len(documents))
|
||||
uuids = self._get_uuids(documents)
|
||||
|
||||
total_docs = len(documents)
|
||||
@@ -147,7 +147,7 @@ class LindormVectorStore(BaseVector):
|
||||
time.sleep(0.5)
|
||||
|
||||
except Exception:
|
||||
logger.exception(f"Failed to process batch {batch_num + 1}")
|
||||
logger.exception("Failed to process batch %s", batch_num + 1)
|
||||
raise
|
||||
|
||||
def get_ids_by_metadata_field(self, key: str, value: str):
|
||||
@@ -180,7 +180,7 @@ class LindormVectorStore(BaseVector):
|
||||
|
||||
# 1. First check if collection exists
|
||||
if not self._client.indices.exists(index=self._collection_name):
|
||||
logger.warning(f"Collection {self._collection_name} does not exist")
|
||||
logger.warning("Collection %s does not exist", self._collection_name)
|
||||
return
|
||||
|
||||
# 2. Batch process deletions
|
||||
@@ -196,7 +196,7 @@ class LindormVectorStore(BaseVector):
|
||||
}
|
||||
)
|
||||
else:
|
||||
logger.warning(f"DELETE BY ID: ID {id} does not exist in the index.")
|
||||
logger.warning("DELETE BY ID: ID %s does not exist in the index.", id)
|
||||
|
||||
# 3. Perform bulk deletion if there are valid documents to delete
|
||||
if actions:
|
||||
@@ -209,9 +209,9 @@ class LindormVectorStore(BaseVector):
|
||||
doc_id = delete_error.get("_id")
|
||||
|
||||
if status == 404:
|
||||
logger.warning(f"Document not found for deletion: {doc_id}")
|
||||
logger.warning("Document not found for deletion: %s", doc_id)
|
||||
else:
|
||||
logger.exception(f"Error deleting document: {error}")
|
||||
logger.exception("Error deleting document: %s", error)
|
||||
|
||||
def delete(self) -> None:
|
||||
if self._using_ugc:
|
||||
@@ -225,7 +225,7 @@ class LindormVectorStore(BaseVector):
|
||||
self._client.indices.delete(index=self._collection_name, params={"timeout": 60})
|
||||
logger.info("Delete index success")
|
||||
else:
|
||||
logger.warning(f"Index '{self._collection_name}' does not exist. No deletion performed.")
|
||||
logger.warning("Index '%s' does not exist. No deletion performed.", self._collection_name)
|
||||
|
||||
def text_exists(self, id: str) -> bool:
|
||||
try:
|
||||
@@ -257,7 +257,7 @@ class LindormVectorStore(BaseVector):
|
||||
params["routing"] = self._routing # type: ignore
|
||||
response = self._client.search(index=self._collection_name, body=query, params=params)
|
||||
except Exception:
|
||||
logger.exception(f"Error executing vector search, query: {query}")
|
||||
logger.exception("Error executing vector search, query: %s", query)
|
||||
raise
|
||||
|
||||
docs_and_scores = []
|
||||
@@ -324,10 +324,10 @@ class LindormVectorStore(BaseVector):
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
logger.info(f"Collection {self._collection_name} already exists.")
|
||||
logger.info("Collection %s already exists.", self._collection_name)
|
||||
return
|
||||
if self._client.indices.exists(index=self._collection_name):
|
||||
logger.info(f"{self._collection_name.lower()} already exists.")
|
||||
logger.info("%s already exists.", self._collection_name.lower())
|
||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||
return
|
||||
if len(self.kwargs) == 0 and len(kwargs) != 0:
|
||||
|
||||
@@ -103,7 +103,7 @@ class MilvusVector(BaseVector):
|
||||
# For standard Milvus installations, check version number
|
||||
return version.parse(milvus_version).base_version >= version.parse("2.5.0").base_version
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to check Milvus version: {str(e)}. Disabling hybrid search.")
|
||||
logger.warning("Failed to check Milvus version: %s. Disabling hybrid search.", str(e))
|
||||
return False
|
||||
|
||||
def get_type(self) -> str:
|
||||
@@ -289,9 +289,9 @@ class MilvusVector(BaseVector):
|
||||
"""
|
||||
Create a new collection in Milvus with the specified schema and index parameters.
|
||||
"""
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
# Grab the existing collection if it exists
|
||||
|
||||
@@ -53,7 +53,7 @@ class MyScaleVector(BaseVector):
|
||||
return self.add_texts(documents=texts, embeddings=embeddings, **kwargs)
|
||||
|
||||
def _create_collection(self, dimension: int):
|
||||
logging.info(f"create MyScale collection {self._collection_name} with dimension {dimension}")
|
||||
logging.info("create MyScale collection %s with dimension %s", self._collection_name, dimension)
|
||||
self._client.command(f"CREATE DATABASE IF NOT EXISTS {self._config.database}")
|
||||
fts_params = f"('{self._config.fts_params}')" if self._config.fts_params else ""
|
||||
sql = f"""
|
||||
@@ -151,7 +151,7 @@ class MyScaleVector(BaseVector):
|
||||
for r in self._client.query(sql).named_results()
|
||||
]
|
||||
except Exception as e:
|
||||
logging.exception(f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m") # noqa:TRY401
|
||||
logging.exception("\033[91m\033[1m%s\033[0m \033[95m%s\033[0m", type(e), str(e)) # noqa:TRY401
|
||||
return []
|
||||
|
||||
def delete(self) -> None:
|
||||
|
||||
@@ -147,7 +147,7 @@ class OceanBaseVector(BaseVector):
|
||||
logger.debug("Current OceanBase version is %s", ob_version)
|
||||
return version.parse(ob_version).base_version >= version.parse("4.3.5.1").base_version
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to check OceanBase version: {str(e)}. Disabling hybrid search.")
|
||||
logger.warning("Failed to check OceanBase version: %s. Disabling hybrid search.", str(e))
|
||||
return False
|
||||
|
||||
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
@@ -229,7 +229,7 @@ class OceanBaseVector(BaseVector):
|
||||
|
||||
return docs
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fulltext search: {str(e)}.")
|
||||
logger.warning("Failed to fulltext search: %s.", str(e))
|
||||
return []
|
||||
|
||||
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||
|
||||
@@ -131,7 +131,7 @@ class OpenSearchVector(BaseVector):
|
||||
def delete_by_ids(self, ids: list[str]) -> None:
|
||||
index_name = self._collection_name.lower()
|
||||
if not self._client.indices.exists(index=index_name):
|
||||
logger.warning(f"Index {index_name} does not exist")
|
||||
logger.warning("Index %s does not exist", index_name)
|
||||
return
|
||||
|
||||
# Obtaining All Actual Documents_ID
|
||||
@@ -142,7 +142,7 @@ class OpenSearchVector(BaseVector):
|
||||
if es_ids:
|
||||
actual_ids.extend(es_ids)
|
||||
else:
|
||||
logger.warning(f"Document with metadata doc_id {doc_id} not found for deletion")
|
||||
logger.warning("Document with metadata doc_id %s not found for deletion", doc_id)
|
||||
|
||||
if actual_ids:
|
||||
actions = [{"_op_type": "delete", "_index": index_name, "_id": es_id} for es_id in actual_ids]
|
||||
@@ -155,9 +155,9 @@ class OpenSearchVector(BaseVector):
|
||||
doc_id = delete_error.get("_id")
|
||||
|
||||
if status == 404:
|
||||
logger.warning(f"Document not found for deletion: {doc_id}")
|
||||
logger.warning("Document not found for deletion: %s", doc_id)
|
||||
else:
|
||||
logger.exception(f"Error deleting document: {error}")
|
||||
logger.exception("Error deleting document: %s", error)
|
||||
|
||||
def delete(self) -> None:
|
||||
self._client.indices.delete(index=self._collection_name.lower())
|
||||
@@ -198,7 +198,7 @@ class OpenSearchVector(BaseVector):
|
||||
try:
|
||||
response = self._client.search(index=self._collection_name.lower(), body=query)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error executing vector search, query: {query}")
|
||||
logger.exception("Error executing vector search, query: %s", query)
|
||||
raise
|
||||
|
||||
docs = []
|
||||
@@ -242,7 +242,7 @@ class OpenSearchVector(BaseVector):
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name.lower()}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
logger.info(f"Collection {self._collection_name.lower()} already exists.")
|
||||
logger.info("Collection %s already exists.", self._collection_name.lower())
|
||||
return
|
||||
|
||||
if not self._client.indices.exists(index=self._collection_name.lower()):
|
||||
@@ -272,7 +272,7 @@ class OpenSearchVector(BaseVector):
|
||||
},
|
||||
}
|
||||
|
||||
logger.info(f"Creating OpenSearch index {self._collection_name.lower()}")
|
||||
logger.info("Creating OpenSearch index %s", self._collection_name.lower())
|
||||
self._client.indices.create(index=self._collection_name.lower(), body=index_body)
|
||||
|
||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||
|
||||
@@ -82,9 +82,9 @@ class PGVectoRS(BaseVector):
|
||||
self.add_texts(texts, embeddings)
|
||||
|
||||
def create_collection(self, dimension: int):
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
index_name = f"{self._collection_name}_embedding_index"
|
||||
|
||||
@@ -155,7 +155,7 @@ class PGVector(BaseVector):
|
||||
cur.execute(f"DELETE FROM {self.table_name} WHERE id IN %s", (tuple(ids),))
|
||||
except psycopg2.errors.UndefinedTable:
|
||||
# table not exists
|
||||
logging.warning(f"Table {self.table_name} not found, skipping delete operation.")
|
||||
logging.warning("Table %s not found, skipping delete operation.", self.table_name)
|
||||
return
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
@@ -95,9 +95,9 @@ class QdrantVector(BaseVector):
|
||||
self.add_texts(texts, embeddings, **kwargs)
|
||||
|
||||
def create_collection(self, collection_name: str, vector_size: int):
|
||||
lock_name = "vector_indexing_lock_{}".format(collection_name)
|
||||
lock_name = f"vector_indexing_lock_{collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
collection_name = collection_name or uuid.uuid4().hex
|
||||
|
||||
@@ -70,9 +70,9 @@ class RelytVector(BaseVector):
|
||||
self.add_texts(texts, embeddings)
|
||||
|
||||
def create_collection(self, dimension: int):
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
index_name = f"{self._collection_name}_embedding_index"
|
||||
|
||||
@@ -142,7 +142,7 @@ class TableStoreVector(BaseVector):
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
logging.info(f"Collection {self._collection_name} already exists.")
|
||||
logging.info("Collection %s already exists.", self._collection_name)
|
||||
return
|
||||
|
||||
self._create_table_if_not_exist()
|
||||
|
||||
@@ -92,9 +92,9 @@ class TencentVector(BaseVector):
|
||||
|
||||
def _create_collection(self, dimension: int) -> None:
|
||||
self._dimension = dimension
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
|
||||
|
||||
@@ -104,9 +104,9 @@ class TidbOnQdrantVector(BaseVector):
|
||||
self.add_texts(texts, embeddings, **kwargs)
|
||||
|
||||
def create_collection(self, collection_name: str, vector_size: int):
|
||||
lock_name = "vector_indexing_lock_{}".format(collection_name)
|
||||
lock_name = f"vector_indexing_lock_{collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
collection_name = collection_name or uuid.uuid4().hex
|
||||
|
||||
@@ -91,9 +91,9 @@ class TiDBVector(BaseVector):
|
||||
|
||||
def _create_collection(self, dimension: int):
|
||||
logger.info("_create_collection, collection_name " + self._collection_name)
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
tidb_dist_func = self._get_distance_func()
|
||||
@@ -192,7 +192,7 @@ class TiDBVector(BaseVector):
|
||||
query_vector_str = ", ".join(format(x) for x in query_vector)
|
||||
query_vector_str = "[" + query_vector_str + "]"
|
||||
logger.debug(
|
||||
f"_collection_name: {self._collection_name}, score_threshold: {score_threshold}, distance: {distance}"
|
||||
"_collection_name: %s, score_threshold: %s, distance: %s", self._collection_name, score_threshold, distance
|
||||
)
|
||||
|
||||
docs = []
|
||||
|
||||
@@ -178,19 +178,19 @@ class Vector:
|
||||
def create(self, texts: Optional[list] = None, **kwargs):
|
||||
if texts:
|
||||
start = time.time()
|
||||
logger.info(f"start embedding {len(texts)} texts {start}")
|
||||
logger.info("start embedding %s texts %s", len(texts), start)
|
||||
batch_size = 1000
|
||||
total_batches = len(texts) + batch_size - 1
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i : i + batch_size]
|
||||
batch_start = time.time()
|
||||
logger.info(f"Processing batch {i // batch_size + 1}/{total_batches} ({len(batch)} texts)")
|
||||
logger.info("Processing batch %s/%s (%s texts)", i // batch_size + 1, total_batches, len(batch))
|
||||
batch_embeddings = self._embeddings.embed_documents([document.page_content for document in batch])
|
||||
logger.info(
|
||||
f"Embedding batch {i // batch_size + 1}/{total_batches} took {time.time() - batch_start:.3f}s"
|
||||
"Embedding batch %s/%s took %s s", i // batch_size + 1, total_batches, time.time() - batch_start
|
||||
)
|
||||
self._vector_processor.create(texts=batch, embeddings=batch_embeddings, **kwargs)
|
||||
logger.info(f"Embedding {len(texts)} texts took {time.time() - start:.3f}s")
|
||||
logger.info("Embedding %s texts took %s s", len(texts), time.time() - start)
|
||||
|
||||
def add_texts(self, documents: list[Document], **kwargs):
|
||||
if kwargs.get("duplicate_check", False):
|
||||
@@ -219,7 +219,7 @@ class Vector:
|
||||
self._vector_processor.delete()
|
||||
# delete collection redis cache
|
||||
if self._vector_processor.collection_name:
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._vector_processor.collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._vector_processor.collection_name}"
|
||||
redis_client.delete(collection_exist_cache_key)
|
||||
|
||||
def _get_embeddings(self) -> Embeddings:
|
||||
|
||||
@@ -92,9 +92,9 @@ class WeaviateVector(BaseVector):
|
||||
self.add_texts(texts, embeddings)
|
||||
|
||||
def _create_collection(self):
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
lock_name = f"vector_indexing_lock_{self._collection_name}"
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
schema = self._default_schema(self._collection_name)
|
||||
|
||||
Reference in New Issue
Block a user