feat: knowledge pipeline (#25360)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
@@ -3,6 +3,8 @@ import os
|
||||
import uuid
|
||||
from typing import Literal, Union
|
||||
|
||||
from sqlalchemy import Engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from configs import dify_config
|
||||
@@ -14,11 +16,9 @@ from constants import (
|
||||
)
|
||||
from core.file import helpers as file_helpers
|
||||
from core.rag.extractor.extract_processor import ExtractProcessor
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_storage import storage
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from libs.helper import extract_tenant_id
|
||||
from libs.login import current_user
|
||||
from models.account import Account
|
||||
from models.enums import CreatorUserRole
|
||||
from models.model import EndUser, UploadFile
|
||||
@@ -29,8 +29,18 @@ PREVIEW_WORDS_LIMIT = 3000
|
||||
|
||||
|
||||
class FileService:
|
||||
@staticmethod
|
||||
_session_maker: sessionmaker
|
||||
|
||||
def __init__(self, session_factory: sessionmaker | Engine | None = None):
|
||||
if isinstance(session_factory, Engine):
|
||||
self._session_maker = sessionmaker(bind=session_factory)
|
||||
elif isinstance(session_factory, sessionmaker):
|
||||
self._session_maker = session_factory
|
||||
else:
|
||||
raise AssertionError("must be a sessionmaker or an Engine.")
|
||||
|
||||
def upload_file(
|
||||
self,
|
||||
*,
|
||||
filename: str,
|
||||
content: bytes,
|
||||
@@ -85,14 +95,14 @@ class FileService:
|
||||
hash=hashlib.sha3_256(content).hexdigest(),
|
||||
source_url=source_url,
|
||||
)
|
||||
|
||||
db.session.add(upload_file)
|
||||
db.session.commit()
|
||||
|
||||
# The `UploadFile` ID is generated within its constructor, so flushing to retrieve the ID is unnecessary.
|
||||
# We can directly generate the `source_url` here before committing.
|
||||
if not upload_file.source_url:
|
||||
upload_file.source_url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
|
||||
db.session.add(upload_file)
|
||||
db.session.commit()
|
||||
|
||||
with self._session_maker(expire_on_commit=False) as session:
|
||||
session.add(upload_file)
|
||||
session.commit()
|
||||
|
||||
return upload_file
|
||||
|
||||
@@ -109,45 +119,42 @@ class FileService:
|
||||
|
||||
return file_size <= file_size_limit
|
||||
|
||||
@staticmethod
|
||||
def upload_text(text: str, text_name: str) -> UploadFile:
|
||||
assert isinstance(current_user, Account)
|
||||
assert current_user.current_tenant_id is not None
|
||||
|
||||
def upload_text(self, text: str, text_name: str, user_id: str, tenant_id: str) -> UploadFile:
|
||||
if len(text_name) > 200:
|
||||
text_name = text_name[:200]
|
||||
# user uuid as file name
|
||||
file_uuid = str(uuid.uuid4())
|
||||
file_key = "upload_files/" + current_user.current_tenant_id + "/" + file_uuid + ".txt"
|
||||
file_key = "upload_files/" + tenant_id + "/" + file_uuid + ".txt"
|
||||
|
||||
# save file to storage
|
||||
storage.save(file_key, text.encode("utf-8"))
|
||||
|
||||
# save file to db
|
||||
upload_file = UploadFile(
|
||||
tenant_id=current_user.current_tenant_id,
|
||||
tenant_id=tenant_id,
|
||||
storage_type=dify_config.STORAGE_TYPE,
|
||||
key=file_key,
|
||||
name=text_name,
|
||||
size=len(text),
|
||||
extension="txt",
|
||||
mime_type="text/plain",
|
||||
created_by=current_user.id,
|
||||
created_by=user_id,
|
||||
created_by_role=CreatorUserRole.ACCOUNT,
|
||||
created_at=naive_utc_now(),
|
||||
used=True,
|
||||
used_by=current_user.id,
|
||||
used_by=user_id,
|
||||
used_at=naive_utc_now(),
|
||||
)
|
||||
|
||||
db.session.add(upload_file)
|
||||
db.session.commit()
|
||||
with self._session_maker(expire_on_commit=False) as session:
|
||||
session.add(upload_file)
|
||||
session.commit()
|
||||
|
||||
return upload_file
|
||||
|
||||
@staticmethod
|
||||
def get_file_preview(file_id: str):
|
||||
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
def get_file_preview(self, file_id: str):
|
||||
with self._session_maker(expire_on_commit=False) as session:
|
||||
upload_file = session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
|
||||
if not upload_file:
|
||||
raise NotFound("File not found")
|
||||
@@ -162,15 +169,14 @@ class FileService:
|
||||
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def get_image_preview(file_id: str, timestamp: str, nonce: str, sign: str):
|
||||
def get_image_preview(self, file_id: str, timestamp: str, nonce: str, sign: str):
|
||||
result = file_helpers.verify_image_signature(
|
||||
upload_file_id=file_id, timestamp=timestamp, nonce=nonce, sign=sign
|
||||
)
|
||||
if not result:
|
||||
raise NotFound("File not found or signature is invalid")
|
||||
|
||||
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
with self._session_maker(expire_on_commit=False) as session:
|
||||
upload_file = session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
|
||||
if not upload_file:
|
||||
raise NotFound("File not found or signature is invalid")
|
||||
@@ -184,13 +190,13 @@ class FileService:
|
||||
|
||||
return generator, upload_file.mime_type
|
||||
|
||||
@staticmethod
|
||||
def get_file_generator_by_file_id(file_id: str, timestamp: str, nonce: str, sign: str):
|
||||
def get_file_generator_by_file_id(self, file_id: str, timestamp: str, nonce: str, sign: str):
|
||||
result = file_helpers.verify_file_signature(upload_file_id=file_id, timestamp=timestamp, nonce=nonce, sign=sign)
|
||||
if not result:
|
||||
raise NotFound("File not found or signature is invalid")
|
||||
|
||||
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
with self._session_maker(expire_on_commit=False) as session:
|
||||
upload_file = session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
|
||||
if not upload_file:
|
||||
raise NotFound("File not found or signature is invalid")
|
||||
@@ -199,9 +205,9 @@ class FileService:
|
||||
|
||||
return generator, upload_file
|
||||
|
||||
@staticmethod
|
||||
def get_public_image_preview(file_id: str):
|
||||
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
def get_public_image_preview(self, file_id: str):
|
||||
with self._session_maker(expire_on_commit=False) as session:
|
||||
upload_file = session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
|
||||
if not upload_file:
|
||||
raise NotFound("File not found or signature is invalid")
|
||||
@@ -214,3 +220,23 @@ class FileService:
|
||||
generator = storage.load(upload_file.key)
|
||||
|
||||
return generator, upload_file.mime_type
|
||||
|
||||
def get_file_content(self, file_id: str) -> str:
|
||||
with self._session_maker(expire_on_commit=False) as session:
|
||||
upload_file: UploadFile | None = session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
|
||||
if not upload_file:
|
||||
raise NotFound("File not found")
|
||||
content = storage.load(upload_file.key)
|
||||
|
||||
return content.decode("utf-8")
|
||||
|
||||
def delete_file(self, file_id: str):
|
||||
with self._session_maker(expire_on_commit=False) as session:
|
||||
upload_file: UploadFile | None = session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
|
||||
if not upload_file:
|
||||
return
|
||||
storage.delete(upload_file.key)
|
||||
session.delete(upload_file)
|
||||
session.commit()
|
||||
|
||||
Reference in New Issue
Block a user