feat: integrate opendal storage (#11508)

Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
-LAN-
2024-12-11 14:50:54 +08:00
committed by GitHub
parent 1765fe2a29
commit 8d4bb9b40d
17 changed files with 798 additions and 597 deletions

View File

@@ -7,9 +7,6 @@ from collections.abc import Generator
class BaseStorage(ABC):
"""Interface for file storage."""
def __init__(self): # noqa: B027
pass
@abstractmethod
def save(self, filename, data):
raise NotImplementedError

View File

@@ -1,62 +0,0 @@
import os
import shutil
from collections.abc import Generator
from pathlib import Path
from flask import current_app
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
class LocalFsStorage(BaseStorage):
"""Implementation for local filesystem storage."""
def __init__(self):
super().__init__()
folder = dify_config.STORAGE_LOCAL_PATH
if not os.path.isabs(folder):
folder = os.path.join(current_app.root_path, folder)
self.folder = folder
def _build_filepath(self, filename: str) -> str:
"""Build the full file path based on the folder and filename."""
if not self.folder or self.folder.endswith("/"):
return self.folder + filename
else:
return self.folder + "/" + filename
def save(self, filename, data):
filepath = self._build_filepath(filename)
folder = os.path.dirname(filepath)
os.makedirs(folder, exist_ok=True)
Path(os.path.join(os.getcwd(), filepath)).write_bytes(data)
def load_once(self, filename: str) -> bytes:
filepath = self._build_filepath(filename)
if not os.path.exists(filepath):
raise FileNotFoundError("File not found")
return Path(filepath).read_bytes()
def load_stream(self, filename: str) -> Generator:
filepath = self._build_filepath(filename)
if not os.path.exists(filepath):
raise FileNotFoundError("File not found")
with open(filepath, "rb") as f:
while chunk := f.read(4096): # Read in chunks of 4KB
yield chunk
def download(self, filename, target_filepath):
filepath = self._build_filepath(filename)
if not os.path.exists(filepath):
raise FileNotFoundError("File not found")
shutil.copyfile(filepath, target_filepath)
def exists(self, filename):
filepath = self._build_filepath(filename)
return os.path.exists(filepath)
def delete(self, filename):
filepath = self._build_filepath(filename)
if os.path.exists(filepath):
os.remove(filepath)

View File

@@ -0,0 +1,66 @@
from collections.abc import Generator
from pathlib import Path
from urllib.parse import urlparse
import opendal
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
from extensions.storage.base_storage import BaseStorage
S3_R2_HOSTNAME = "r2.cloudflarestorage.com"
S3_R2_COMPATIBLE_KWARGS = {
"delete_max_size": "700",
"disable_stat_with_override": "true",
"region": "auto",
}
S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS = {
"server_side_encryption": "aws:kms",
}
def is_r2_endpoint(endpoint: str) -> bool:
if not endpoint:
return False
parsed_url = urlparse(endpoint)
return bool(parsed_url.hostname and parsed_url.hostname.endswith(S3_R2_HOSTNAME))
class OpenDALStorage(BaseStorage):
def __init__(self, scheme: OpenDALScheme, **kwargs):
if scheme == OpenDALScheme.FS:
Path(kwargs["root"]).mkdir(parents=True, exist_ok=True)
self.op = opendal.Operator(scheme=scheme, **kwargs)
def save(self, filename: str, data: bytes) -> None:
self.op.write(path=filename, bs=data)
def load_once(self, filename: str) -> bytes:
if not self.exists(filename):
raise FileNotFoundError("File not found")
return self.op.read(path=filename)
def load_stream(self, filename: str) -> Generator:
if not self.exists(filename):
raise FileNotFoundError("File not found")
batch_size = 4096
file = self.op.open(path=filename, mode="rb")
while chunk := file.read(batch_size):
yield chunk
def download(self, filename: str, target_filepath: str):
if not self.exists(filename):
raise FileNotFoundError("File not found")
with Path(target_filepath).open("wb") as f:
f.write(self.op.read(path=filename))
def exists(self, filename: str):
return self.op.stat(path=filename).mode.is_file()
def delete(self, filename: str):
if self.exists(filename):
self.op.delete(path=filename)

View File

@@ -9,6 +9,7 @@ class StorageType(StrEnum):
HUAWEI_OBS = "huawei-obs"
LOCAL = "local"
OCI_STORAGE = "oci-storage"
OPENDAL = "opendal"
S3 = "s3"
TENCENT_COS = "tencent-cos"
VOLCENGINE_TOS = "volcengine-tos"