feat: couchbase integration (#6165)

Co-authored-by: crazywoola <427733928@qq.com>
Co-authored-by: Elliot Scribner <elliot.scribner@couchbase.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: Bowen Liang <bowenliang@apache.org>
This commit is contained in:
roadgoat19
2024-10-29 03:00:23 -04:00
committed by GitHub
parent fc37e654fc
commit c8ef9223e5
21 changed files with 639 additions and 7 deletions

View File

@@ -375,7 +375,7 @@ SUPABASE_URL=your-server-url
# ------------------------------
# The type of vector store to use.
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`, `vikingdb`.
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`, `couchbase`, `vikingdb`.
VECTOR_STORE=weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
@@ -414,6 +414,14 @@ MYSCALE_PASSWORD=
MYSCALE_DATABASE=dify
MYSCALE_FTS_PARAMS=
# Couchbase configurations, only available when VECTOR_STORE is `couchbase`
# The connection string must include hostname defined in the docker-compose file (couchbase-server in this case)
COUCHBASE_CONNECTION_STRING=couchbase://couchbase-server
COUCHBASE_USER=Administrator
COUCHBASE_PASSWORD=password
COUCHBASE_BUCKET_NAME=Embeddings
COUCHBASE_SCOPE_NAME=_default
# pgvector configurations, only available when VECTOR_STORE is `pgvector`
PGVECTOR_HOST=pgvector
PGVECTOR_PORT=5432

View File

@@ -0,0 +1,4 @@
FROM couchbase/server:latest AS stage_base
# FROM couchbase:latest AS stage_base
COPY init-cbserver.sh /opt/couchbase/init/
RUN chmod +x /opt/couchbase/init/init-cbserver.sh

View File

@@ -0,0 +1,44 @@
#!/bin/bash
# used to start couchbase server - can't get around this as docker compose only allows you to start one command - so we have to start couchbase like the standard couchbase Dockerfile would
# https://github.com/couchbase/docker/blob/master/enterprise/couchbase-server/7.2.0/Dockerfile#L88
/entrypoint.sh couchbase-server &
# track if setup is complete so we don't try to setup again
FILE=/opt/couchbase/init/setupComplete.txt
if ! [ -f "$FILE" ]; then
# used to automatically create the cluster based on environment variables
# https://docs.couchbase.com/server/current/cli/cbcli/couchbase-cli-cluster-init.html
echo $COUCHBASE_ADMINISTRATOR_USERNAME ":" $COUCHBASE_ADMINISTRATOR_PASSWORD
sleep 20s
/opt/couchbase/bin/couchbase-cli cluster-init -c 127.0.0.1 \
--cluster-username $COUCHBASE_ADMINISTRATOR_USERNAME \
--cluster-password $COUCHBASE_ADMINISTRATOR_PASSWORD \
--services data,index,query,fts \
--cluster-ramsize $COUCHBASE_RAM_SIZE \
--cluster-index-ramsize $COUCHBASE_INDEX_RAM_SIZE \
--cluster-eventing-ramsize $COUCHBASE_EVENTING_RAM_SIZE \
--cluster-fts-ramsize $COUCHBASE_FTS_RAM_SIZE \
--index-storage-setting default
sleep 2s
# used to auto create the bucket based on environment variables
# https://docs.couchbase.com/server/current/cli/cbcli/couchbase-cli-bucket-create.html
/opt/couchbase/bin/couchbase-cli bucket-create -c localhost:8091 \
--username $COUCHBASE_ADMINISTRATOR_USERNAME \
--password $COUCHBASE_ADMINISTRATOR_PASSWORD \
--bucket $COUCHBASE_BUCKET \
--bucket-ramsize $COUCHBASE_BUCKET_RAMSIZE \
--bucket-type couchbase
# create file so we know that the cluster is setup and don't run the setup again
touch $FILE
fi
# docker compose will stop the container from running unless we do this
# known issue and workaround
tail -f /dev/null

View File

@@ -110,6 +110,11 @@ x-shared-env: &shared-api-worker-env
QDRANT_CLIENT_TIMEOUT: ${QDRANT_CLIENT_TIMEOUT:-20}
QDRANT_GRPC_ENABLED: ${QDRANT_GRPC_ENABLED:-false}
QDRANT_GRPC_PORT: ${QDRANT_GRPC_PORT:-6334}
COUCHBASE_CONNECTION_STRING: ${COUCHBASE_CONNECTION_STRING:-'couchbase-server'}
COUCHBASE_USER: ${COUCHBASE_USER:-Administrator}
COUCHBASE_PASSWORD: ${COUCHBASE_PASSWORD:-password}
COUCHBASE_BUCKET_NAME: ${COUCHBASE_BUCKET_NAME:-Embeddings}
COUCHBASE_SCOPE_NAME: ${COUCHBASE_SCOPE_NAME:-_default}
MILVUS_URI: ${MILVUS_URI:-http://127.0.0.1:19530}
MILVUS_TOKEN: ${MILVUS_TOKEN:-}
MILVUS_USER: ${MILVUS_USER:-root}
@@ -475,6 +480,39 @@ services:
environment:
QDRANT_API_KEY: ${QDRANT_API_KEY:-difyai123456}
# The Couchbase vector store.
couchbase-server:
build: ./couchbase-server
profiles:
- couchbase
restart: always
environment:
- CLUSTER_NAME=dify_search
- COUCHBASE_ADMINISTRATOR_USERNAME=${COUCHBASE_USER:-Administrator}
- COUCHBASE_ADMINISTRATOR_PASSWORD=${COUCHBASE_PASSWORD:-password}
- COUCHBASE_BUCKET=${COUCHBASE_BUCKET_NAME:-Embeddings}
- COUCHBASE_BUCKET_RAMSIZE=512
- COUCHBASE_RAM_SIZE=2048
- COUCHBASE_EVENTING_RAM_SIZE=512
- COUCHBASE_INDEX_RAM_SIZE=512
- COUCHBASE_FTS_RAM_SIZE=1024
hostname: couchbase-server
container_name: couchbase-server
working_dir: /opt/couchbase
stdin_open: true
tty: true
entrypoint: [""]
command: sh -c "/opt/couchbase/init/init-cbserver.sh"
volumes:
- ./volumes/couchbase/data:/opt/couchbase/var/lib/couchbase/data
healthcheck:
# ensure bucket was created before proceeding
test: [ "CMD-SHELL", "curl -s -f -u Administrator:password http://localhost:8091/pools/default/buckets | grep -q '\\[{' || exit 1" ]
interval: 10s
retries: 10
start_period: 30s
timeout: 10s
# The pgvector vector database.
pgvector:
image: pgvector/pgvector:pg16