feat:mysql adaptation for metadb (#28188)

This commit is contained in:
longbingljw
2025-11-20 09:44:39 +08:00
committed by GitHub
parent 012877d8d4
commit c0b7ffd5d0
131 changed files with 6312 additions and 2602 deletions

View File

@@ -7,8 +7,7 @@ from collections.abc import Generator, Mapping
from typing import Any, Union, cast
from flask import Flask, current_app
from sqlalchemy import Float, and_, or_, select, text
from sqlalchemy import cast as sqlalchemy_cast
from sqlalchemy import and_, or_, select
from core.app.app_config.entities import (
DatasetEntity,
@@ -1023,60 +1022,55 @@ class DatasetRetrieval:
self, sequence: int, condition: str, metadata_name: str, value: Any | None, filters: list
):
if value is None and condition not in ("empty", "not empty"):
return
return filters
json_field = DatasetDocument.doc_metadata[metadata_name].as_string()
key = f"{metadata_name}_{sequence}"
key_value = f"{metadata_name}_{sequence}_value"
match condition:
case "contains":
filters.append(
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}%"}
)
)
filters.append(json_field.like(f"%{value}%"))
case "not contains":
filters.append(
(text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}%"}
)
)
filters.append(json_field.notlike(f"%{value}%"))
case "start with":
filters.append(
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"{value}%"}
)
)
filters.append(json_field.like(f"{value}%"))
case "end with":
filters.append(
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}"}
)
)
filters.append(json_field.like(f"%{value}"))
case "is" | "=":
if isinstance(value, str):
filters.append(DatasetDocument.doc_metadata[metadata_name] == f'"{value}"')
else:
filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) == value)
filters.append(json_field == value)
elif isinstance(value, (int, float)):
filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() == value)
case "is not" | "":
if isinstance(value, str):
filters.append(DatasetDocument.doc_metadata[metadata_name] != f'"{value}"')
else:
filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != value)
filters.append(json_field != value)
elif isinstance(value, (int, float)):
filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() != value)
case "empty":
filters.append(DatasetDocument.doc_metadata[metadata_name].is_(None))
case "not empty":
filters.append(DatasetDocument.doc_metadata[metadata_name].isnot(None))
case "before" | "<":
filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) < value)
filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() < value)
case "after" | ">":
filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) > value)
filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() > value)
case "" | "<=":
filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) <= value)
filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() <= value)
case "" | ">=":
filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) >= value)
filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() >= value)
case _:
pass
return filters
def _fetch_model_config(

View File

@@ -13,6 +13,7 @@ from sqlalchemy.orm import Session
from yarl import URL
import contexts
from configs import dify_config
from core.helper.provider_cache import ToolProviderCredentialsCache
from core.plugin.impl.tool import PluginToolManager
from core.tools.__base.tool_provider import ToolProviderController
@@ -32,7 +33,6 @@ from services.tools.mcp_tools_manage_service import MCPToolManageService
if TYPE_CHECKING:
from core.workflow.nodes.tool.entities import ToolEntity
from configs import dify_config
from core.agent.entities import AgentToolEntity
from core.app.entities.app_invoke_entities import InvokeFrom
from core.helper.module_import_helper import load_single_subclass_from_source
@@ -618,12 +618,28 @@ class ToolManager:
"""
# according to multi credentials, select the one with is_default=True first, then created_at oldest
# for compatibility with old version
sql = """
if dify_config.SQLALCHEMY_DATABASE_URI_SCHEME == "postgresql":
# PostgreSQL: Use DISTINCT ON
sql = """
SELECT DISTINCT ON (tenant_id, provider) id
FROM tool_builtin_providers
WHERE tenant_id = :tenant_id
ORDER BY tenant_id, provider, is_default DESC, created_at DESC
"""
else:
# MySQL: Use window function to achieve same result
sql = """
SELECT id FROM (
SELECT id,
ROW_NUMBER() OVER (
PARTITION BY tenant_id, provider
ORDER BY is_default DESC, created_at DESC
) as rn
FROM tool_builtin_providers
WHERE tenant_id = :tenant_id
) ranked WHERE rn = 1
"""
with Session(db.engine, autoflush=False) as session:
ids = [row.id for row in session.execute(sa.text(sql), {"tenant_id": tenant_id}).all()]
return session.query(BuiltinToolProvider).where(BuiltinToolProvider.id.in_(ids)).all()

View File

@@ -6,8 +6,7 @@ from collections import defaultdict
from collections.abc import Mapping, Sequence
from typing import TYPE_CHECKING, Any, cast
from sqlalchemy import Float, and_, func, or_, select, text
from sqlalchemy import cast as sqlalchemy_cast
from sqlalchemy import and_, func, literal, or_, select
from sqlalchemy.orm import sessionmaker
from core.app.app_config.entities import DatasetRetrieveConfigEntity
@@ -597,79 +596,79 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node):
if value is None and condition not in ("empty", "not empty"):
return filters
key = f"{metadata_name}_{sequence}"
key_value = f"{metadata_name}_{sequence}_value"
json_field = Document.doc_metadata[metadata_name].as_string()
match condition:
case "contains":
filters.append(
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}%"}
)
)
filters.append(json_field.like(f"%{value}%"))
case "not contains":
filters.append(
(text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}%"}
)
)
filters.append(json_field.notlike(f"%{value}%"))
case "start with":
filters.append(
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"{value}%"}
)
)
filters.append(json_field.like(f"{value}%"))
case "end with":
filters.append(
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}"}
)
)
filters.append(json_field.like(f"%{value}"))
case "in":
if isinstance(value, str):
escaped_values = [v.strip().replace("'", "''") for v in str(value).split(",")]
escaped_value_str = ",".join(escaped_values)
value_list = [v.strip() for v in value.split(",") if v.strip()]
elif isinstance(value, (list, tuple)):
value_list = [str(v) for v in value if v is not None]
else:
escaped_value_str = str(value)
filters.append(
(text(f"documents.doc_metadata ->> :{key} = any(string_to_array(:{key_value},','))")).params(
**{key: metadata_name, key_value: escaped_value_str}
)
)
value_list = [str(value)] if value is not None else []
if not value_list:
filters.append(literal(False))
else:
filters.append(json_field.in_(value_list))
case "not in":
if isinstance(value, str):
escaped_values = [v.strip().replace("'", "''") for v in str(value).split(",")]
escaped_value_str = ",".join(escaped_values)
value_list = [v.strip() for v in value.split(",") if v.strip()]
elif isinstance(value, (list, tuple)):
value_list = [str(v) for v in value if v is not None]
else:
escaped_value_str = str(value)
filters.append(
(text(f"documents.doc_metadata ->> :{key} != all(string_to_array(:{key_value},','))")).params(
**{key: metadata_name, key_value: escaped_value_str}
)
)
case "=" | "is":
value_list = [str(value)] if value is not None else []
if not value_list:
filters.append(literal(True))
else:
filters.append(json_field.notin_(value_list))
case "is" | "=":
if isinstance(value, str):
filters.append(Document.doc_metadata[metadata_name] == f'"{value}"')
else:
filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) == value)
filters.append(json_field == value)
elif isinstance(value, (int, float)):
filters.append(Document.doc_metadata[metadata_name].as_float() == value)
case "is not" | "":
if isinstance(value, str):
filters.append(Document.doc_metadata[metadata_name] != f'"{value}"')
else:
filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) != value)
filters.append(json_field != value)
elif isinstance(value, (int, float)):
filters.append(Document.doc_metadata[metadata_name].as_float() != value)
case "empty":
filters.append(Document.doc_metadata[metadata_name].is_(None))
case "not empty":
filters.append(Document.doc_metadata[metadata_name].isnot(None))
case "before" | "<":
filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) < value)
filters.append(Document.doc_metadata[metadata_name].as_float() < value)
case "after" | ">":
filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) > value)
filters.append(Document.doc_metadata[metadata_name].as_float() > value)
case "" | "<=":
filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) <= value)
filters.append(Document.doc_metadata[metadata_name].as_float() <= value)
case "" | ">=":
filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) >= value)
filters.append(Document.doc_metadata[metadata_name].as_float() >= value)
case _:
pass
return filters
@classmethod