feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -4,8 +4,8 @@ import logging
import click
import sqlalchemy as sa
from core.plugin.entities.plugin import GenericProviderID, ModelProviderID, ToolProviderID
from models.engine import db
from extensions.ext_database import db
from models.provider_ids import GenericProviderID, ModelProviderID, ToolProviderID
logger = logging.getLogger(__name__)

View File

@@ -1,7 +1,13 @@
import re
from configs import dify_config
from core.helper import marketplace
from core.plugin.entities.plugin import ModelProviderID, PluginDependency, PluginInstallationSource, ToolProviderID
from core.plugin.entities.plugin import PluginDependency, PluginInstallationSource
from core.plugin.impl.plugin import PluginInstaller
from models.provider_ids import ModelProviderID, ToolProviderID
# Compile regex pattern for version extraction at module level for better performance
_VERSION_REGEX = re.compile(r":(?P<version>[0-9]+(?:\.[0-9]+){2}(?:[+-][0-9A-Za-z.-]+)?)(?:@|$)")
class DependenciesAnalysisService:
@@ -48,6 +54,13 @@ class DependenciesAnalysisService:
for dependency in dependencies:
unique_identifier = dependency.value.plugin_unique_identifier
if unique_identifier in missing_plugin_unique_identifiers:
# Extract version for Marketplace dependencies
if dependency.type == PluginDependency.Type.Marketplace:
version_match = _VERSION_REGEX.search(unique_identifier)
if version_match:
dependency.value.version = version_match.group("version")
# Create and append the dependency (same for all types)
leaked_dependencies.append(
PluginDependency(
type=dependency.type,

View File

@@ -11,7 +11,13 @@ class OAuthProxyService(BasePluginClient):
__KEY_PREFIX__ = "oauth_proxy_context:"
@staticmethod
def create_proxy_context(user_id: str, tenant_id: str, plugin_id: str, provider: str):
def create_proxy_context(
user_id: str,
tenant_id: str,
plugin_id: str,
provider: str,
credential_id: str | None = None,
):
"""
Create a proxy context for an OAuth 2.0 authorization request.
@@ -31,6 +37,8 @@ class OAuthProxyService(BasePluginClient):
"tenant_id": tenant_id,
"provider": provider,
}
if credential_id:
data["credential_id"] = credential_id
redis_client.setex(
f"{OAuthProxyService.__KEY_PREFIX__}{context_id}",
OAuthProxyService.__MAX_AGE__,

View File

@@ -16,15 +16,17 @@ from sqlalchemy.orm import Session
from core.agent.entities import AgentToolEntity
from core.helper import marketplace
from core.plugin.entities.plugin import ModelProviderID, PluginInstallationSource, ToolProviderID
from core.plugin.entities.plugin import PluginInstallationSource
from core.plugin.entities.plugin_daemon import PluginInstallTaskStatus
from core.plugin.impl.plugin import PluginInstaller
from core.tools.entities.tool_entities import ToolProviderType
from extensions.ext_database import db
from models.account import Tenant
from models.engine import db
from models.model import App, AppMode, AppModelConfig
from models.provider_ids import ModelProviderID, ToolProviderID
from models.tools import BuiltinToolProvider
from models.workflow import Workflow
from services.plugin.plugin_service import PluginService
logger = logging.getLogger(__name__)
@@ -421,6 +423,94 @@ class PluginMigration:
)
)
@classmethod
def install_rag_pipeline_plugins(cls, extracted_plugins: str, output_file: str, workers: int = 100) -> None:
"""
Install rag pipeline plugins.
"""
manager = PluginInstaller()
plugins = cls.extract_unique_plugins(extracted_plugins)
plugin_install_failed = []
# use a fake tenant id to install all the plugins
fake_tenant_id = uuid4().hex
logger.info("Installing %s plugin instances for fake tenant %s", len(plugins["plugins"]), fake_tenant_id)
thread_pool = ThreadPoolExecutor(max_workers=workers)
response = cls.handle_plugin_instance_install(fake_tenant_id, plugins["plugins"])
if response.get("failed"):
plugin_install_failed.extend(response.get("failed", []))
def install(
tenant_id: str, plugin_ids: dict[str, str], total_success_tenant: int, total_failed_tenant: int
) -> None:
logger.info("Installing %s plugins for tenant %s", len(plugin_ids), tenant_id)
try:
# fetch plugin already installed
installed_plugins = manager.list_plugins(tenant_id)
installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
# at most 64 plugins one batch
for i in range(0, len(plugin_ids), 64):
batch_plugin_ids = list(plugin_ids.keys())[i : i + 64]
batch_plugin_identifiers = [
plugin_ids[plugin_id]
for plugin_id in batch_plugin_ids
if plugin_id not in installed_plugins_ids and plugin_id in plugin_ids
]
PluginService.install_from_marketplace_pkg(tenant_id, batch_plugin_identifiers)
total_success_tenant += 1
except Exception:
logger.exception("Failed to install plugins for tenant %s", tenant_id)
total_failed_tenant += 1
page = 1
total_success_tenant = 0
total_failed_tenant = 0
while True:
# paginate
tenants = db.paginate(db.select(Tenant).order_by(Tenant.created_at.desc()), page=page, per_page=100)
if tenants.items is None or len(tenants.items) == 0:
break
for tenant in tenants:
tenant_id = tenant.id
# get plugin unique identifier
thread_pool.submit(
install,
tenant_id,
plugins.get("plugins", {}),
total_success_tenant,
total_failed_tenant,
)
page += 1
thread_pool.shutdown(wait=True)
# uninstall all the plugins for fake tenant
try:
installation = manager.list_plugins(fake_tenant_id)
while installation:
for plugin in installation:
manager.uninstall(fake_tenant_id, plugin.installation_id)
installation = manager.list_plugins(fake_tenant_id)
except Exception:
logger.exception("Failed to get installation for tenant %s", fake_tenant_id)
Path(output_file).write_text(
json.dumps(
{
"total_success_tenant": total_success_tenant,
"total_failed_tenant": total_failed_tenant,
"plugin_install_failed": plugin_install_failed,
}
)
)
@classmethod
def handle_plugin_instance_install(
cls, tenant_id: str, plugin_identifiers_map: Mapping[str, str]

View File

@@ -10,7 +10,6 @@ from core.helper.download import download_with_size_limit
from core.helper.marketplace import download_plugin_pkg
from core.plugin.entities.bundle import PluginBundleDependency
from core.plugin.entities.plugin import (
GenericProviderID,
PluginDeclaration,
PluginEntity,
PluginInstallation,
@@ -26,6 +25,7 @@ from core.plugin.impl.asset import PluginAssetManager
from core.plugin.impl.debugging import PluginDebuggingClient
from core.plugin.impl.plugin import PluginInstaller
from extensions.ext_redis import redis_client
from models.provider_ids import GenericProviderID
from services.errors.plugin import PluginInstallationForbiddenError
from services.feature_service import FeatureService, PluginInstallationScope