feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
2025-09-18 12:49:10 +08:00
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions
--- a/api/configs/init.py
+++ b/api/configs/init.py
@@ -1,3 +1,3 @@
 from .app_config import DifyConfig

-dify_config = DifyConfig()
+dify_config = DifyConfig()  # type: ignore
--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@@ -505,6 +505,22 @@ class UpdateConfig(BaseSettings):
    )


+class WorkflowVariableTruncationConfig(BaseSettings):
+    WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE: PositiveInt = Field(
+        # 100KB
+        1024_000,
+        description="Maximum size for variable to trigger final truncation.",
+    )
+    WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH: PositiveInt = Field(
+        100000,
+        description="maximum length for string to trigger tuncation, measure in number of characters",
+    )
+    WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH: PositiveInt = Field(
+        1000,
+        description="maximum length for array to trigger truncation.",
+    )
+
+
 class WorkflowConfig(BaseSettings):
    """
    Configuration for workflow execution
@@ -535,6 +551,28 @@ class WorkflowConfig(BaseSettings):
        default=200 * 1024,
    )

+    # GraphEngine Worker Pool Configuration
+    GRAPH_ENGINE_MIN_WORKERS: PositiveInt = Field(
+        description="Minimum number of workers per GraphEngine instance",
+        default=1,
+    )
+
+    GRAPH_ENGINE_MAX_WORKERS: PositiveInt = Field(
+        description="Maximum number of workers per GraphEngine instance",
+        default=10,
+    )
+
+    GRAPH_ENGINE_SCALE_UP_THRESHOLD: PositiveInt = Field(
+        description="Queue depth threshold that triggers worker scale up",
+        default=3,
+    )
+
+    GRAPH_ENGINE_SCALE_DOWN_IDLE_TIME: float = Field(
+        description="Seconds of idle time before scaling down workers",
+        default=5.0,
+        ge=0.1,
+    )
+

 class WorkflowNodeExecutionConfig(BaseSettings):
    """
@@ -1041,5 +1079,6 @@ class FeatureConfig(
    CeleryBeatConfig,
    CeleryScheduleTasksConfig,
    WorkflowLogConfig,
+    WorkflowVariableTruncationConfig,
 ):
    pass
--- a/api/configs/feature/hosted_service/init.py
+++ b/api/configs/feature/hosted_service/init.py
@@ -220,11 +220,28 @@ class HostedFetchAppTemplateConfig(BaseSettings):
    )


+class HostedFetchPipelineTemplateConfig(BaseSettings):
+    """
+    Configuration for fetching pipeline templates
+    """
+
+    HOSTED_FETCH_PIPELINE_TEMPLATES_MODE: str = Field(
+        description="Mode for fetching pipeline templates: remote, db, or builtin default to remote,",
+        default="remote",
+    )
+
+    HOSTED_FETCH_PIPELINE_TEMPLATES_REMOTE_DOMAIN: str = Field(
+        description="Domain for fetching remote pipeline templates",
+        default="https://tmpl.dify.ai",
+    )
+
+
 class HostedServiceConfig(
    # place the configs in alphabet order
    HostedAnthropicConfig,
    HostedAzureOpenAiConfig,
    HostedFetchAppTemplateConfig,
+    HostedFetchPipelineTemplateConfig,
    HostedMinmaxConfig,
    HostedOpenAiConfig,
    HostedSparkConfig,
--- a/api/configs/remote_settings_sources/apollo/utils.py
+++ b/api/configs/remote_settings_sources/apollo/utils.py
@@ -29,7 +29,7 @@ def no_key_cache_key(namespace: str, key: str) -> str:


 # Returns whether the obtained value is obtained, and None if it does not
-def get_value_from_dict(namespace_cache: dict[str, Any] | None, key: str) -> Any | None:
+def get_value_from_dict(namespace_cache: dict[str, Any] | None, key: str) -> Any:
    if namespace_cache:
        kv_data = namespace_cache.get(CONFIGURATIONS)
        if kv_data is None: