feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -3,6 +3,8 @@ import os
import uuid
from typing import Literal, Union
from sqlalchemy import Engine
from sqlalchemy.orm import sessionmaker
from werkzeug.exceptions import NotFound
from configs import dify_config
@@ -14,11 +16,9 @@ from constants import (
)
from core.file import helpers as file_helpers
from core.rag.extractor.extract_processor import ExtractProcessor
from extensions.ext_database import db
from extensions.ext_storage import storage
from libs.datetime_utils import naive_utc_now
from libs.helper import extract_tenant_id
from libs.login import current_user
from models.account import Account
from models.enums import CreatorUserRole
from models.model import EndUser, UploadFile
@@ -29,8 +29,18 @@ PREVIEW_WORDS_LIMIT = 3000
class FileService:
@staticmethod
_session_maker: sessionmaker
def __init__(self, session_factory: sessionmaker | Engine | None = None):
if isinstance(session_factory, Engine):
self._session_maker = sessionmaker(bind=session_factory)
elif isinstance(session_factory, sessionmaker):
self._session_maker = session_factory
else:
raise AssertionError("must be a sessionmaker or an Engine.")
def upload_file(
self,
*,
filename: str,
content: bytes,
@@ -85,14 +95,14 @@ class FileService:
hash=hashlib.sha3_256(content).hexdigest(),
source_url=source_url,
)
db.session.add(upload_file)
db.session.commit()
# The `UploadFile` ID is generated within its constructor, so flushing to retrieve the ID is unnecessary.
# We can directly generate the `source_url` here before committing.
if not upload_file.source_url:
upload_file.source_url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
db.session.add(upload_file)
db.session.commit()
with self._session_maker(expire_on_commit=False) as session:
session.add(upload_file)
session.commit()
return upload_file
@@ -109,45 +119,42 @@ class FileService:
return file_size <= file_size_limit
@staticmethod
def upload_text(text: str, text_name: str) -> UploadFile:
assert isinstance(current_user, Account)
assert current_user.current_tenant_id is not None
def upload_text(self, text: str, text_name: str, user_id: str, tenant_id: str) -> UploadFile:
if len(text_name) > 200:
text_name = text_name[:200]
# user uuid as file name
file_uuid = str(uuid.uuid4())
file_key = "upload_files/" + current_user.current_tenant_id + "/" + file_uuid + ".txt"
file_key = "upload_files/" + tenant_id + "/" + file_uuid + ".txt"
# save file to storage
storage.save(file_key, text.encode("utf-8"))
# save file to db
upload_file = UploadFile(
tenant_id=current_user.current_tenant_id,
tenant_id=tenant_id,
storage_type=dify_config.STORAGE_TYPE,
key=file_key,
name=text_name,
size=len(text),
extension="txt",
mime_type="text/plain",
created_by=current_user.id,
created_by=user_id,
created_by_role=CreatorUserRole.ACCOUNT,
created_at=naive_utc_now(),
used=True,
used_by=current_user.id,
used_by=user_id,
used_at=naive_utc_now(),
)
db.session.add(upload_file)
db.session.commit()
with self._session_maker(expire_on_commit=False) as session:
session.add(upload_file)
session.commit()
return upload_file
@staticmethod
def get_file_preview(file_id: str):
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
def get_file_preview(self, file_id: str):
with self._session_maker(expire_on_commit=False) as session:
upload_file = session.query(UploadFile).where(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("File not found")
@@ -162,15 +169,14 @@ class FileService:
return text
@staticmethod
def get_image_preview(file_id: str, timestamp: str, nonce: str, sign: str):
def get_image_preview(self, file_id: str, timestamp: str, nonce: str, sign: str):
result = file_helpers.verify_image_signature(
upload_file_id=file_id, timestamp=timestamp, nonce=nonce, sign=sign
)
if not result:
raise NotFound("File not found or signature is invalid")
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
with self._session_maker(expire_on_commit=False) as session:
upload_file = session.query(UploadFile).where(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("File not found or signature is invalid")
@@ -184,13 +190,13 @@ class FileService:
return generator, upload_file.mime_type
@staticmethod
def get_file_generator_by_file_id(file_id: str, timestamp: str, nonce: str, sign: str):
def get_file_generator_by_file_id(self, file_id: str, timestamp: str, nonce: str, sign: str):
result = file_helpers.verify_file_signature(upload_file_id=file_id, timestamp=timestamp, nonce=nonce, sign=sign)
if not result:
raise NotFound("File not found or signature is invalid")
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
with self._session_maker(expire_on_commit=False) as session:
upload_file = session.query(UploadFile).where(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("File not found or signature is invalid")
@@ -199,9 +205,9 @@ class FileService:
return generator, upload_file
@staticmethod
def get_public_image_preview(file_id: str):
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
def get_public_image_preview(self, file_id: str):
with self._session_maker(expire_on_commit=False) as session:
upload_file = session.query(UploadFile).where(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("File not found or signature is invalid")
@@ -214,3 +220,23 @@ class FileService:
generator = storage.load(upload_file.key)
return generator, upload_file.mime_type
def get_file_content(self, file_id: str) -> str:
with self._session_maker(expire_on_commit=False) as session:
upload_file: UploadFile | None = session.query(UploadFile).where(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("File not found")
content = storage.load(upload_file.key)
return content.decode("utf-8")
def delete_file(self, file_id: str):
with self._session_maker(expire_on_commit=False) as session:
upload_file: UploadFile | None = session.query(UploadFile).where(UploadFile.id == file_id).first()
if not upload_file:
return
storage.delete(upload_file.key)
session.delete(upload_file)
session.commit()