feat: Add Clickzetta Lakehouse vector database integration (#22551)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
yunqiqiliang
2025-08-07 14:21:46 +08:00
committed by GitHub
parent 2931c891a7
commit e01510e2a6
25 changed files with 4788 additions and 9 deletions

View File

@@ -333,6 +333,25 @@ OPENDAL_SCHEME=fs
# Configurations for OpenDAL Local File System.
OPENDAL_FS_ROOT=storage
# ClickZetta Volume Configuration (for storage backend)
# To use ClickZetta Volume as storage backend, set STORAGE_TYPE=clickzetta-volume
# Note: ClickZetta Volume will reuse the existing CLICKZETTA_* connection parameters
# Volume type selection (three types available):
# - user: Personal/small team use, simple config, user-level permissions
# - table: Enterprise multi-tenant, smart routing, table-level + user-level permissions
# - external: Data lake integration, external storage connection, volume-level + storage-level permissions
CLICKZETTA_VOLUME_TYPE=user
# External Volume name (required only when TYPE=external)
CLICKZETTA_VOLUME_NAME=
# Table Volume table prefix (used only when TYPE=table)
CLICKZETTA_VOLUME_TABLE_PREFIX=dataset_
# Dify file directory prefix (isolates from other apps, recommended to keep default)
CLICKZETTA_VOLUME_DIFY_PREFIX=dify_km
# S3 Configuration
#
S3_ENDPOINT=
@@ -416,7 +435,7 @@ SUPABASE_URL=your-server-url
# ------------------------------
# The type of vector store to use.
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`.
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `clickzetta`.
VECTOR_STORE=weaviate
# Prefix used to create collection name in vector database
VECTOR_INDEX_NAME_PREFIX=Vector_index
@@ -655,6 +674,20 @@ TABLESTORE_ACCESS_KEY_ID=xxx
TABLESTORE_ACCESS_KEY_SECRET=xxx
TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE=false
# Clickzetta configuration, only available when VECTOR_STORE is `clickzetta`
CLICKZETTA_USERNAME=
CLICKZETTA_PASSWORD=
CLICKZETTA_INSTANCE=
CLICKZETTA_SERVICE=api.clickzetta.com
CLICKZETTA_WORKSPACE=quick_start
CLICKZETTA_VCLUSTER=default_ap
CLICKZETTA_SCHEMA=dify
CLICKZETTA_BATCH_SIZE=100
CLICKZETTA_ENABLE_INVERTED_INDEX=true
CLICKZETTA_ANALYZER_TYPE=chinese
CLICKZETTA_ANALYZER_MODE=smart
CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance
# ------------------------------
# Knowledge Configuration
# ------------------------------

View File

@@ -93,6 +93,10 @@ x-shared-env: &shared-api-worker-env
STORAGE_TYPE: ${STORAGE_TYPE:-opendal}
OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs}
OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
CLICKZETTA_VOLUME_TYPE: ${CLICKZETTA_VOLUME_TYPE:-user}
CLICKZETTA_VOLUME_NAME: ${CLICKZETTA_VOLUME_NAME:-}
CLICKZETTA_VOLUME_TABLE_PREFIX: ${CLICKZETTA_VOLUME_TABLE_PREFIX:-dataset_}
CLICKZETTA_VOLUME_DIFY_PREFIX: ${CLICKZETTA_VOLUME_DIFY_PREFIX:-dify_km}
S3_ENDPOINT: ${S3_ENDPOINT:-}
S3_REGION: ${S3_REGION:-us-east-1}
S3_BUCKET_NAME: ${S3_BUCKET_NAME:-difyai}
@@ -313,6 +317,18 @@ x-shared-env: &shared-api-worker-env
TABLESTORE_ACCESS_KEY_ID: ${TABLESTORE_ACCESS_KEY_ID:-xxx}
TABLESTORE_ACCESS_KEY_SECRET: ${TABLESTORE_ACCESS_KEY_SECRET:-xxx}
TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE: ${TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE:-false}
CLICKZETTA_USERNAME: ${CLICKZETTA_USERNAME:-}
CLICKZETTA_PASSWORD: ${CLICKZETTA_PASSWORD:-}
CLICKZETTA_INSTANCE: ${CLICKZETTA_INSTANCE:-}
CLICKZETTA_SERVICE: ${CLICKZETTA_SERVICE:-api.clickzetta.com}
CLICKZETTA_WORKSPACE: ${CLICKZETTA_WORKSPACE:-quick_start}
CLICKZETTA_VCLUSTER: ${CLICKZETTA_VCLUSTER:-default_ap}
CLICKZETTA_SCHEMA: ${CLICKZETTA_SCHEMA:-dify}
CLICKZETTA_BATCH_SIZE: ${CLICKZETTA_BATCH_SIZE:-100}
CLICKZETTA_ENABLE_INVERTED_INDEX: ${CLICKZETTA_ENABLE_INVERTED_INDEX:-true}
CLICKZETTA_ANALYZER_TYPE: ${CLICKZETTA_ANALYZER_TYPE:-chinese}
CLICKZETTA_ANALYZER_MODE: ${CLICKZETTA_ANALYZER_MODE:-smart}
CLICKZETTA_VECTOR_DISTANCE_FUNCTION: ${CLICKZETTA_VECTOR_DISTANCE_FUNCTION:-cosine_distance}
UPLOAD_FILE_SIZE_LIMIT: ${UPLOAD_FILE_SIZE_LIMIT:-15}
UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5}
ETL_TYPE: ${ETL_TYPE:-dify}