feat: couchbase integration (#6165)
Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: Elliot Scribner <elliot.scribner@couchbase.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Bowen Liang <bowenliang@apache.org>
This commit is contained in:
@@ -120,7 +120,7 @@ SUPABASE_URL=your-server-url
|
||||
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
|
||||
# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, vikingdb, upstash
|
||||
# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash
|
||||
VECTOR_STORE=weaviate
|
||||
|
||||
# Weaviate configuration
|
||||
@@ -136,6 +136,13 @@ QDRANT_CLIENT_TIMEOUT=20
|
||||
QDRANT_GRPC_ENABLED=false
|
||||
QDRANT_GRPC_PORT=6334
|
||||
|
||||
#Couchbase configuration
|
||||
COUCHBASE_CONNECTION_STRING=127.0.0.1
|
||||
COUCHBASE_USER=Administrator
|
||||
COUCHBASE_PASSWORD=password
|
||||
COUCHBASE_BUCKET_NAME=Embeddings
|
||||
COUCHBASE_SCOPE_NAME=_default
|
||||
|
||||
# Milvus configuration
|
||||
MILVUS_URI=http://127.0.0.1:19530
|
||||
MILVUS_TOKEN=
|
||||
|
||||
@@ -278,6 +278,7 @@ def migrate_knowledge_vector_database():
|
||||
VectorType.BAIDU,
|
||||
VectorType.VIKINGDB,
|
||||
VectorType.UPSTASH,
|
||||
VectorType.COUCHBASE,
|
||||
}
|
||||
page = 1
|
||||
while True:
|
||||
|
||||
@@ -17,6 +17,7 @@ from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCO
|
||||
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
||||
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
|
||||
from configs.middleware.vdb.chroma_config import ChromaConfig
|
||||
from configs.middleware.vdb.couchbase_config import CouchbaseConfig
|
||||
from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
|
||||
from configs.middleware.vdb.milvus_config import MilvusConfig
|
||||
from configs.middleware.vdb.myscale_config import MyScaleConfig
|
||||
@@ -251,6 +252,7 @@ class MiddlewareConfig(
|
||||
TiDBVectorConfig,
|
||||
WeaviateConfig,
|
||||
ElasticsearchConfig,
|
||||
CouchbaseConfig,
|
||||
InternalTestConfig,
|
||||
VikingDBConfig,
|
||||
UpstashConfig,
|
||||
|
||||
34
api/configs/middleware/vdb/couchbase_config.py
Normal file
34
api/configs/middleware/vdb/couchbase_config.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class CouchbaseConfig(BaseModel):
|
||||
"""
|
||||
Couchbase configs
|
||||
"""
|
||||
|
||||
COUCHBASE_CONNECTION_STRING: Optional[str] = Field(
|
||||
description="COUCHBASE connection string",
|
||||
default=None,
|
||||
)
|
||||
|
||||
COUCHBASE_USER: Optional[str] = Field(
|
||||
description="COUCHBASE user",
|
||||
default=None,
|
||||
)
|
||||
|
||||
COUCHBASE_PASSWORD: Optional[str] = Field(
|
||||
description="COUCHBASE password",
|
||||
default=None,
|
||||
)
|
||||
|
||||
COUCHBASE_BUCKET_NAME: Optional[str] = Field(
|
||||
description="COUCHBASE bucket name",
|
||||
default=None,
|
||||
)
|
||||
|
||||
COUCHBASE_SCOPE_NAME: Optional[str] = Field(
|
||||
description="COUCHBASE scope name",
|
||||
default=None,
|
||||
)
|
||||
@@ -640,6 +640,7 @@ class DatasetRetrievalSettingApi(Resource):
|
||||
| VectorType.ELASTICSEARCH
|
||||
| VectorType.PGVECTOR
|
||||
| VectorType.TIDB_ON_QDRANT
|
||||
| VectorType.COUCHBASE
|
||||
):
|
||||
return {
|
||||
"retrieval_method": [
|
||||
@@ -678,6 +679,7 @@ class DatasetRetrievalSettingMockApi(Resource):
|
||||
| VectorType.MYSCALE
|
||||
| VectorType.ORACLE
|
||||
| VectorType.ELASTICSEARCH
|
||||
| VectorType.COUCHBASE
|
||||
| VectorType.PGVECTOR
|
||||
):
|
||||
return {
|
||||
|
||||
0
api/core/rag/datasource/vdb/couchbase/__init__.py
Normal file
0
api/core/rag/datasource/vdb/couchbase/__init__.py
Normal file
378
api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
Normal file
378
api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
Normal file
@@ -0,0 +1,378 @@
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from datetime import timedelta
|
||||
from typing import Any
|
||||
|
||||
from couchbase import search
|
||||
from couchbase.auth import PasswordAuthenticator
|
||||
from couchbase.cluster import Cluster
|
||||
from couchbase.management.search import SearchIndex
|
||||
|
||||
# needed for options -- cluster, timeout, SQL++ (N1QL) query, etc.
|
||||
from couchbase.options import ClusterOptions, SearchOptions
|
||||
from couchbase.vector_search import VectorQuery, VectorSearch
|
||||
from flask import current_app
|
||||
from pydantic import BaseModel, model_validator
|
||||
|
||||
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
|
||||
from core.rag.datasource.vdb.vector_type import VectorType
|
||||
from core.rag.embedding.embedding_base import Embeddings
|
||||
from core.rag.models.document import Document
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.dataset import Dataset
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CouchbaseConfig(BaseModel):
|
||||
connection_string: str
|
||||
user: str
|
||||
password: str
|
||||
bucket_name: str
|
||||
scope_name: str
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_config(cls, values: dict) -> dict:
|
||||
if not values.get("connection_string"):
|
||||
raise ValueError("config COUCHBASE_CONNECTION_STRING is required")
|
||||
if not values.get("user"):
|
||||
raise ValueError("config COUCHBASE_USER is required")
|
||||
if not values.get("password"):
|
||||
raise ValueError("config COUCHBASE_PASSWORD is required")
|
||||
if not values.get("bucket_name"):
|
||||
raise ValueError("config COUCHBASE_PASSWORD is required")
|
||||
if not values.get("scope_name"):
|
||||
raise ValueError("config COUCHBASE_SCOPE_NAME is required")
|
||||
return values
|
||||
|
||||
|
||||
class CouchbaseVector(BaseVector):
|
||||
def __init__(self, collection_name: str, config: CouchbaseConfig):
|
||||
super().__init__(collection_name)
|
||||
self._client_config = config
|
||||
|
||||
"""Connect to couchbase"""
|
||||
|
||||
auth = PasswordAuthenticator(config.user, config.password)
|
||||
options = ClusterOptions(auth)
|
||||
self._cluster = Cluster(config.connection_string, options)
|
||||
self._bucket = self._cluster.bucket(config.bucket_name)
|
||||
self._scope = self._bucket.scope(config.scope_name)
|
||||
self._bucket_name = config.bucket_name
|
||||
self._scope_name = config.scope_name
|
||||
|
||||
# Wait until the cluster is ready for use.
|
||||
self._cluster.wait_until_ready(timedelta(seconds=5))
|
||||
|
||||
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
index_id = str(uuid.uuid4()).replace("-", "")
|
||||
self._create_collection(uuid=index_id, vector_length=len(embeddings[0]))
|
||||
self.add_texts(texts, embeddings)
|
||||
|
||||
def _create_collection(self, vector_length: int, uuid: str):
|
||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||
with redis_client.lock(lock_name, timeout=20):
|
||||
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||
if redis_client.get(collection_exist_cache_key):
|
||||
return
|
||||
if self._collection_exists(self._collection_name):
|
||||
return
|
||||
manager = self._bucket.collections()
|
||||
manager.create_collection(self._client_config.scope_name, self._collection_name)
|
||||
|
||||
index_manager = self._scope.search_indexes()
|
||||
|
||||
index_definition = json.loads("""
|
||||
{
|
||||
"type": "fulltext-index",
|
||||
"name": "Embeddings._default.Vector_Search",
|
||||
"uuid": "26d4db528e78b716",
|
||||
"sourceType": "gocbcore",
|
||||
"sourceName": "Embeddings",
|
||||
"sourceUUID": "2242e4a25b4decd6650c9c7b3afa1dbf",
|
||||
"planParams": {
|
||||
"maxPartitionsPerPIndex": 1024,
|
||||
"indexPartitions": 1
|
||||
},
|
||||
"params": {
|
||||
"doc_config": {
|
||||
"docid_prefix_delim": "",
|
||||
"docid_regexp": "",
|
||||
"mode": "scope.collection.type_field",
|
||||
"type_field": "type"
|
||||
},
|
||||
"mapping": {
|
||||
"analysis": { },
|
||||
"default_analyzer": "standard",
|
||||
"default_datetime_parser": "dateTimeOptional",
|
||||
"default_field": "_all",
|
||||
"default_mapping": {
|
||||
"dynamic": true,
|
||||
"enabled": true
|
||||
},
|
||||
"default_type": "_default",
|
||||
"docvalues_dynamic": false,
|
||||
"index_dynamic": true,
|
||||
"store_dynamic": true,
|
||||
"type_field": "_type",
|
||||
"types": {
|
||||
"collection_name": {
|
||||
"dynamic": true,
|
||||
"enabled": true,
|
||||
"properties": {
|
||||
"embedding": {
|
||||
"dynamic": false,
|
||||
"enabled": true,
|
||||
"fields": [
|
||||
{
|
||||
"dims": 1536,
|
||||
"index": true,
|
||||
"name": "embedding",
|
||||
"similarity": "dot_product",
|
||||
"type": "vector",
|
||||
"vector_index_optimized_for": "recall"
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"dynamic": true,
|
||||
"enabled": true
|
||||
},
|
||||
"text": {
|
||||
"dynamic": false,
|
||||
"enabled": true,
|
||||
"fields": [
|
||||
{
|
||||
"index": true,
|
||||
"name": "text",
|
||||
"store": true,
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"store": {
|
||||
"indexType": "scorch",
|
||||
"segmentVersion": 16
|
||||
}
|
||||
},
|
||||
"sourceParams": { }
|
||||
}
|
||||
""")
|
||||
index_definition["name"] = self._collection_name + "_search"
|
||||
index_definition["uuid"] = uuid
|
||||
index_definition["params"]["mapping"]["types"]["collection_name"]["properties"]["embedding"]["fields"][0][
|
||||
"dims"
|
||||
] = vector_length
|
||||
index_definition["params"]["mapping"]["types"][self._scope_name + "." + self._collection_name] = (
|
||||
index_definition["params"]["mapping"]["types"].pop("collection_name")
|
||||
)
|
||||
time.sleep(2)
|
||||
index_manager.upsert_index(
|
||||
SearchIndex(
|
||||
index_definition["name"],
|
||||
params=index_definition["params"],
|
||||
source_name=self._bucket_name,
|
||||
),
|
||||
)
|
||||
time.sleep(1)
|
||||
|
||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||
|
||||
def _collection_exists(self, name: str):
|
||||
scope_collection_map: dict[str, Any] = {}
|
||||
|
||||
# Get a list of all scopes in the bucket
|
||||
for scope in self._bucket.collections().get_all_scopes():
|
||||
scope_collection_map[scope.name] = []
|
||||
|
||||
# Get a list of all the collections in the scope
|
||||
for collection in scope.collections:
|
||||
scope_collection_map[scope.name].append(collection.name)
|
||||
|
||||
# Check if the collection exists in the scope
|
||||
return self._collection_name in scope_collection_map[self._scope_name]
|
||||
|
||||
def get_type(self) -> str:
|
||||
return VectorType.COUCHBASE
|
||||
|
||||
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
uuids = self._get_uuids(documents)
|
||||
texts = [d.page_content for d in documents]
|
||||
metadatas = [d.metadata for d in documents]
|
||||
|
||||
doc_ids = []
|
||||
|
||||
documents_to_insert = [
|
||||
{"text": text, "embedding": vector, "metadata": metadata}
|
||||
for id, text, vector, metadata in zip(uuids, texts, embeddings, metadatas)
|
||||
]
|
||||
for doc, id in zip(documents_to_insert, uuids):
|
||||
result = self._scope.collection(self._collection_name).upsert(id, doc)
|
||||
|
||||
doc_ids.extend(uuids)
|
||||
|
||||
return doc_ids
|
||||
|
||||
def text_exists(self, id: str) -> bool:
|
||||
# Use a parameterized query for safety and correctness
|
||||
query = f"""
|
||||
SELECT COUNT(1) AS count FROM
|
||||
`{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||
WHERE META().id = $doc_id
|
||||
"""
|
||||
# Pass the id as a parameter to the query
|
||||
result = self._cluster.query(query, named_parameters={"doc_id": id}).execute()
|
||||
for row in result:
|
||||
return row["count"] > 0
|
||||
return False # Return False if no rows are returned
|
||||
|
||||
def delete_by_ids(self, ids: list[str]) -> None:
|
||||
query = f"""
|
||||
DELETE FROM `{self._bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||
WHERE META().id IN $doc_ids;
|
||||
"""
|
||||
try:
|
||||
self._cluster.query(query, named_parameters={"doc_ids": ids}).execute()
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
||||
def delete_by_document_id(self, document_id: str):
|
||||
query = f"""
|
||||
DELETE FROM
|
||||
`{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||
WHERE META().id = $doc_id;
|
||||
"""
|
||||
self._cluster.query(query, named_parameters={"doc_id": document_id}).execute()
|
||||
|
||||
# def get_ids_by_metadata_field(self, key: str, value: str):
|
||||
# query = f"""
|
||||
# SELECT id FROM
|
||||
# `{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||
# WHERE `metadata.{key}` = $value;
|
||||
# """
|
||||
# result = self._cluster.query(query, named_parameters={'value':value})
|
||||
# return [row['id'] for row in result.rows()]
|
||||
|
||||
def delete_by_metadata_field(self, key: str, value: str) -> None:
|
||||
query = f"""
|
||||
DELETE FROM `{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||
WHERE metadata.{key} = $value;
|
||||
"""
|
||||
self._cluster.query(query, named_parameters={"value": value}).execute()
|
||||
|
||||
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||
top_k = kwargs.get("top_k", 5)
|
||||
score_threshold = kwargs.get("score_threshold") or 0.0
|
||||
|
||||
search_req = search.SearchRequest.create(
|
||||
VectorSearch.from_vector_query(
|
||||
VectorQuery(
|
||||
"embedding",
|
||||
query_vector,
|
||||
top_k,
|
||||
)
|
||||
)
|
||||
)
|
||||
try:
|
||||
search_iter = self._scope.search(
|
||||
self._collection_name + "_search",
|
||||
search_req,
|
||||
SearchOptions(limit=top_k, collections=[self._collection_name], fields=["*"]),
|
||||
)
|
||||
|
||||
docs = []
|
||||
# Parse the results
|
||||
for row in search_iter.rows():
|
||||
text = row.fields.pop("text")
|
||||
metadata = self._format_metadata(row.fields)
|
||||
score = row.score
|
||||
metadata["score"] = score
|
||||
doc = Document(page_content=text, metadata=metadata)
|
||||
if score >= score_threshold:
|
||||
docs.append(doc)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Search failed with error: {e}")
|
||||
|
||||
return docs
|
||||
|
||||
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
||||
top_k = kwargs.get("top_k", 2)
|
||||
try:
|
||||
CBrequest = search.SearchRequest.create(search.QueryStringQuery("text:" + query))
|
||||
search_iter = self._scope.search(
|
||||
self._collection_name + "_search", CBrequest, SearchOptions(limit=top_k, fields=["*"])
|
||||
)
|
||||
|
||||
docs = []
|
||||
for row in search_iter.rows():
|
||||
text = row.fields.pop("text")
|
||||
metadata = self._format_metadata(row.fields)
|
||||
score = row.score
|
||||
metadata["score"] = score
|
||||
doc = Document(page_content=text, metadata=metadata)
|
||||
docs.append(doc)
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Search failed with error: {e}")
|
||||
|
||||
return docs
|
||||
|
||||
def delete(self):
|
||||
manager = self._bucket.collections()
|
||||
scopes = manager.get_all_scopes()
|
||||
|
||||
for scope in scopes:
|
||||
for collection in scope.collections:
|
||||
if collection.name == self._collection_name:
|
||||
manager.drop_collection("_default", self._collection_name)
|
||||
|
||||
def _format_metadata(self, row_fields: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Helper method to format the metadata from the Couchbase Search API.
|
||||
Args:
|
||||
row_fields (Dict[str, Any]): The fields to format.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The formatted metadata.
|
||||
"""
|
||||
metadata = {}
|
||||
for key, value in row_fields.items():
|
||||
# Couchbase Search returns the metadata key with a prefix
|
||||
# `metadata.` We remove it to get the original metadata key
|
||||
if key.startswith("metadata"):
|
||||
new_key = key.split("metadata" + ".")[-1]
|
||||
metadata[new_key] = value
|
||||
else:
|
||||
metadata[key] = value
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
class CouchbaseVectorFactory(AbstractVectorFactory):
|
||||
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> CouchbaseVector:
|
||||
if dataset.index_struct_dict:
|
||||
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||
collection_name = class_prefix
|
||||
else:
|
||||
dataset_id = dataset.id
|
||||
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
|
||||
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.COUCHBASE, collection_name))
|
||||
|
||||
config = current_app.config
|
||||
return CouchbaseVector(
|
||||
collection_name=collection_name,
|
||||
config=CouchbaseConfig(
|
||||
connection_string=config.get("COUCHBASE_CONNECTION_STRING"),
|
||||
user=config.get("COUCHBASE_USER"),
|
||||
password=config.get("COUCHBASE_PASSWORD"),
|
||||
bucket_name=config.get("COUCHBASE_BUCKET_NAME"),
|
||||
scope_name=config.get("COUCHBASE_SCOPE_NAME"),
|
||||
),
|
||||
)
|
||||
@@ -114,6 +114,10 @@ class Vector:
|
||||
from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory
|
||||
|
||||
return AnalyticdbVectorFactory
|
||||
case VectorType.COUCHBASE:
|
||||
from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseVectorFactory
|
||||
|
||||
return CouchbaseVectorFactory
|
||||
case VectorType.BAIDU:
|
||||
from core.rag.datasource.vdb.baidu.baidu_vector import BaiduVectorFactory
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ class VectorType(str, Enum):
|
||||
TENCENT = "tencent"
|
||||
ORACLE = "oracle"
|
||||
ELASTICSEARCH = "elasticsearch"
|
||||
COUCHBASE = "couchbase"
|
||||
BAIDU = "baidu"
|
||||
VIKINGDB = "vikingdb"
|
||||
UPSTASH = "upstash"
|
||||
|
||||
55
api/poetry.lock
generated
55
api/poetry.lock
generated
@@ -1801,6 +1801,46 @@ requests = ">=2.8"
|
||||
six = "*"
|
||||
xmltodict = "*"
|
||||
|
||||
[[package]]
|
||||
name = "couchbase"
|
||||
version = "4.3.3"
|
||||
description = "Python Client for Couchbase"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "couchbase-4.3.3-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:d8069e4f01332859d56cca597874645c914699162b3979d1b432f0dfc186b124"},
|
||||
{file = "couchbase-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1caa6cfef49c785b35b1702102f718227f351df87bba2694b9334520c41e9eb5"},
|
||||
{file = "couchbase-4.3.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f4a9a65c44935249fa078fb90a3c28ea71da9d2d5889fcd514b12d0538010ae0"},
|
||||
{file = "couchbase-4.3.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4f144b8c482c18283d8e419b844630d41f3249b07d43d40b5e3535444e57d0fb"},
|
||||
{file = "couchbase-4.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1c534fba6fdc7cf47eed9dee8a57d1e9eb867bf008574e321fa380a77cebf32f"},
|
||||
{file = "couchbase-4.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b841be06e0e4370b69ebef6bca3409c378186f7d6e964cd645ba18e97216c022"},
|
||||
{file = "couchbase-4.3.3-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:eee7a73b3acbdc78ae314fddf7f975b3c9e05df07df255f4dcc878939a2abae0"},
|
||||
{file = "couchbase-4.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:53417cafcf90ff4e2fd81ebba2a08b7ad56f17160d1c5019ad3b09c758aeb363"},
|
||||
{file = "couchbase-4.3.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0cefd13bea8b0f150f1b9d27fd7614f971f77419b31817781d26ba315ed658bb"},
|
||||
{file = "couchbase-4.3.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:78fa1054d7740e2fe38fce0a2aab4e9a2d30263d894e0615ee5df297f02f59a3"},
|
||||
{file = "couchbase-4.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb093899cfad5a7472258a9b6a57775dbf23a6e0180241507ba89ce3ab241e41"},
|
||||
{file = "couchbase-4.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f7cfbdc699af5715f49365ffbb05a6a7366a534c0d7161edf270ad3e735a6c5d"},
|
||||
{file = "couchbase-4.3.3-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:58352cae9b8affdaa2ac012e0a03c8c2632ee6297a878232888b4e0360d0d5df"},
|
||||
{file = "couchbase-4.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:728e7e3b5e1682706cb9d63993d289226d02a25089527b8ecb4e3889dabc38cf"},
|
||||
{file = "couchbase-4.3.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:73014bf098cf14187a39cc13453e0d859c1d54568df28f69cc308a9a5f24feb2"},
|
||||
{file = "couchbase-4.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a743375804068ae01b73c916bfca738764c8c12f381bb399ef04e784935856a1"},
|
||||
{file = "couchbase-4.3.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:394c122cfe02a76a99e7d5178e64129f6da49843225e78d8629abcab556c24af"},
|
||||
{file = "couchbase-4.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:bf85d7a5cda548d9801614651206068b4445fa37972e62b14d7521a958198693"},
|
||||
{file = "couchbase-4.3.3-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:92d23c9cedd571631070791f2afee0e3d7d8c9ce1bf2ea6e9a4f2fdbc37a0f1e"},
|
||||
{file = "couchbase-4.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:38c42eb29a73cce2998ae5df45bd61b16dce9765d3bff968ec5cf6a622faa291"},
|
||||
{file = "couchbase-4.3.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:afed137bf0edc642d7b201b6ab7b1e7117bb4c8eac6b2f253cc6e106f334a2a1"},
|
||||
{file = "couchbase-4.3.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:954d991377d47883aaf903934c5d0f19577680a2abf80d3ce5bb9b3c80991fc7"},
|
||||
{file = "couchbase-4.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5552b9fa684630698dc98d6f3b1082540634c1b7ad5bf53b843b5da57b0169c"},
|
||||
{file = "couchbase-4.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:f88f2b7e0c894f7237d9f3fb5c46abc44b8151a97b3ca8e75f57d23ebf59f9da"},
|
||||
{file = "couchbase-4.3.3-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:769e1e2367ea1d4de181fcd4b4e353e9abef97d15b581a6c5aea49ece3dc7d59"},
|
||||
{file = "couchbase-4.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:47f59a0b35ffce060583fd11f98f049f3b70701cf14aab9ac092594aca486aeb"},
|
||||
{file = "couchbase-4.3.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:440bb93d611827ba0ea2403c6f204fe931467a6cb5811f0e03bf1779204ef843"},
|
||||
{file = "couchbase-4.3.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cdb4dde62e1d41c0b8707121ab68fa78b7a1508541bd48fc850be396f91bc8d9"},
|
||||
{file = "couchbase-4.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7f8cf45f317b39cc19db5c67b565662f08d6c90305b3aa14e04bc22707258213"},
|
||||
{file = "couchbase-4.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:c97d48ad486c8f201b4482d5594258f949369cb44792ed148d5159a3d12ae21b"},
|
||||
{file = "couchbase-4.3.3.tar.gz", hash = "sha256:27808500551564b39b46943cf3daab572694889c1eb638425d363edb48b20da7"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "coverage"
|
||||
version = "7.2.7"
|
||||
@@ -6850,6 +6890,19 @@ files = [
|
||||
{file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"},
|
||||
{file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"},
|
||||
{file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"},
|
||||
{file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"},
|
||||
{file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"},
|
||||
{file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"},
|
||||
{file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"},
|
||||
{file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"},
|
||||
{file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"},
|
||||
{file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"},
|
||||
{file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"},
|
||||
{file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"},
|
||||
{file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"},
|
||||
{file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"},
|
||||
{file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"},
|
||||
{file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -10866,4 +10919,4 @@ cffi = ["cffi (>=1.11)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.10,<3.13"
|
||||
content-hash = "1b268122d3d4771ba219f0e983322e0454b7b8644dba35da38d7d950d489e1ba"
|
||||
content-hash = "52552faf5f4823056eb48afe05349ab2f0e9a5bc42105211ccbbb54b59e27b59"
|
||||
|
||||
@@ -239,6 +239,7 @@ alibabacloud_gpdb20160503 = "~3.8.0"
|
||||
alibabacloud_tea_openapi = "~0.3.9"
|
||||
chromadb = "0.5.1"
|
||||
clickhouse-connect = "~0.7.16"
|
||||
couchbase = "~4.3.0"
|
||||
elasticsearch = "8.14.0"
|
||||
opensearch-py = "2.4.0"
|
||||
oracledb = "~2.2.1"
|
||||
|
||||
50
api/tests/integration_tests/vdb/couchbase/test_couchbase.py
Normal file
50
api/tests/integration_tests/vdb/couchbase/test_couchbase.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseConfig, CouchbaseVector
|
||||
from tests.integration_tests.vdb.test_vector_store import (
|
||||
AbstractVectorTest,
|
||||
get_example_text,
|
||||
setup_mock_redis,
|
||||
)
|
||||
|
||||
|
||||
def wait_for_healthy_container(service_name="couchbase-server", timeout=300):
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < timeout:
|
||||
result = subprocess.run(
|
||||
["docker", "inspect", "--format", "{{.State.Health.Status}}", service_name], capture_output=True, text=True
|
||||
)
|
||||
if result.stdout.strip() == "healthy":
|
||||
print(f"{service_name} is healthy!")
|
||||
return True
|
||||
else:
|
||||
print(f"Waiting for {service_name} to be healthy...")
|
||||
time.sleep(10)
|
||||
raise TimeoutError(f"{service_name} did not become healthy in time")
|
||||
|
||||
|
||||
class CouchbaseTest(AbstractVectorTest):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.vector = CouchbaseVector(
|
||||
collection_name=self.collection_name,
|
||||
config=CouchbaseConfig(
|
||||
connection_string="couchbase://127.0.0.1",
|
||||
user="Administrator",
|
||||
password="password",
|
||||
bucket_name="Embeddings",
|
||||
scope_name="_default",
|
||||
),
|
||||
)
|
||||
|
||||
def search_by_vector(self):
|
||||
# brief sleep to ensure document is indexed
|
||||
time.sleep(5)
|
||||
hits_by_vector = self.vector.search_by_vector(query_vector=self.example_embedding)
|
||||
assert len(hits_by_vector) == 1
|
||||
|
||||
|
||||
def test_couchbase(setup_mock_redis):
|
||||
wait_for_healthy_container("couchbase-server", timeout=60)
|
||||
CouchbaseTest().run_all_tests()
|
||||
Reference in New Issue
Block a user