fix: replace all dataset.Model.query to db.session.query(Model) (#19509)

This commit is contained in:
非法操作
2025-05-12 13:52:33 +08:00
committed by GitHub
parent 49af07f444
commit b00f94df64
21 changed files with 430 additions and 265 deletions

View File

@@ -2,7 +2,7 @@ import datetime
import time
import click
from sqlalchemy import func
from sqlalchemy import func, select
from werkzeug.exceptions import NotFound
import app
@@ -51,8 +51,9 @@ def clean_unused_datasets_task():
)
# Main query with join and filter
datasets = (
Dataset.query.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
stmt = (
select(Dataset)
.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
.outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id)
.filter(
Dataset.created_at < plan_sandbox_clean_day,
@@ -60,9 +61,10 @@ def clean_unused_datasets_task():
func.coalesce(document_subquery_old.c.document_count, 0) > 0,
)
.order_by(Dataset.created_at.desc())
.paginate(page=1, per_page=50)
)
datasets = db.paginate(stmt, page=1, per_page=50)
except NotFound:
break
if datasets.items is None or len(datasets.items) == 0:
@@ -99,7 +101,7 @@ def clean_unused_datasets_task():
# update document
update_params = {Document.enabled: False}
Document.query.filter_by(dataset_id=dataset.id).update(update_params)
db.session.query(Document).filter_by(dataset_id=dataset.id).update(update_params)
db.session.commit()
click.echo(click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green"))
except Exception as e:
@@ -135,8 +137,9 @@ def clean_unused_datasets_task():
)
# Main query with join and filter
datasets = (
Dataset.query.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
stmt = (
select(Dataset)
.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
.outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id)
.filter(
Dataset.created_at < plan_pro_clean_day,
@@ -144,8 +147,8 @@ def clean_unused_datasets_task():
func.coalesce(document_subquery_old.c.document_count, 0) > 0,
)
.order_by(Dataset.created_at.desc())
.paginate(page=1, per_page=50)
)
datasets = db.paginate(stmt, page=1, per_page=50)
except NotFound:
break
@@ -175,7 +178,7 @@ def clean_unused_datasets_task():
# update document
update_params = {Document.enabled: False}
Document.query.filter_by(dataset_id=dataset.id).update(update_params)
db.session.query(Document).filter_by(dataset_id=dataset.id).update(update_params)
db.session.commit()
click.echo(
click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green")

View File

@@ -19,7 +19,9 @@ def create_tidb_serverless_task():
while True:
try:
# check the number of idle tidb serverless
idle_tidb_serverless_number = TidbAuthBinding.query.filter(TidbAuthBinding.active == False).count()
idle_tidb_serverless_number = (
db.session.query(TidbAuthBinding).filter(TidbAuthBinding.active == False).count()
)
if idle_tidb_serverless_number >= tidb_serverless_number:
break
# create tidb serverless

View File

@@ -29,7 +29,9 @@ def mail_clean_document_notify_task():
# send document clean notify mail
try:
dataset_auto_disable_logs = DatasetAutoDisableLog.query.filter(DatasetAutoDisableLog.notified == False).all()
dataset_auto_disable_logs = (
db.session.query(DatasetAutoDisableLog).filter(DatasetAutoDisableLog.notified == False).all()
)
# group by tenant_id
dataset_auto_disable_logs_map: dict[str, list[DatasetAutoDisableLog]] = defaultdict(list)
for dataset_auto_disable_log in dataset_auto_disable_logs:
@@ -65,7 +67,7 @@ def mail_clean_document_notify_task():
)
for dataset_id, document_ids in dataset_auto_dataset_map.items():
dataset = Dataset.query.filter(Dataset.id == dataset_id).first()
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if dataset:
document_count = len(document_ids)
knowledge_details.append(rf"Knowledge base {dataset.name}: {document_count} documents")

View File

@@ -5,6 +5,7 @@ import click
import app
from configs import dify_config
from core.rag.datasource.vdb.tidb_on_qdrant.tidb_service import TidbService
from extensions.ext_database import db
from models.dataset import TidbAuthBinding
@@ -14,9 +15,11 @@ def update_tidb_serverless_status_task():
start_at = time.perf_counter()
try:
# check the number of idle tidb serverless
tidb_serverless_list = TidbAuthBinding.query.filter(
TidbAuthBinding.active == False, TidbAuthBinding.status == "CREATING"
).all()
tidb_serverless_list = (
db.session.query(TidbAuthBinding)
.filter(TidbAuthBinding.active == False, TidbAuthBinding.status == "CREATING")
.all()
)
if len(tidb_serverless_list) == 0:
return
# update tidb serverless status