feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -1,54 +1,85 @@
import { InputVarType } from '@/app/components/workflow/types'
import { AgentStrategy } from '@/types/app'
import { PromptRole } from '@/models/debug'
import { PipelineInputVarType } from '@/models/pipeline'
import { DatasetAttr } from '@/types/feature'
import pkg from '../package.json'
const getBooleanConfig = (envVar: string | undefined, dataAttrKey: DatasetAttr, defaultValue: boolean = true) => {
if (envVar !== undefined && envVar !== '')
return envVar === 'true'
const getBooleanConfig = (
envVar: string | undefined,
dataAttrKey: DatasetAttr,
defaultValue: boolean = true,
) => {
if (envVar !== undefined && envVar !== '') return envVar === 'true'
const attrValue = globalThis.document?.body?.getAttribute(dataAttrKey)
if (attrValue !== undefined && attrValue !== '')
return attrValue === 'true'
if (attrValue !== undefined && attrValue !== '') return attrValue === 'true'
return defaultValue
}
const getNumberConfig = (envVar: string | undefined, dataAttrKey: DatasetAttr, defaultValue: number) => {
const getNumberConfig = (
envVar: string | undefined,
dataAttrKey: DatasetAttr,
defaultValue: number,
) => {
if (envVar) {
const parsed = Number.parseInt(envVar)
if (!Number.isNaN(parsed) && parsed > 0)
return parsed
if (!Number.isNaN(parsed) && parsed > 0) return parsed
}
const attrValue = globalThis.document?.body?.getAttribute(dataAttrKey)
if (attrValue) {
const parsed = Number.parseInt(attrValue)
if (!Number.isNaN(parsed) && parsed > 0)
return parsed
if (!Number.isNaN(parsed) && parsed > 0) return parsed
}
return defaultValue
}
const getStringConfig = (envVar: string | undefined, dataAttrKey: DatasetAttr, defaultValue: string) => {
if (envVar)
return envVar
const getStringConfig = (
envVar: string | undefined,
dataAttrKey: DatasetAttr,
defaultValue: string,
) => {
if (envVar) return envVar
const attrValue = globalThis.document?.body?.getAttribute(dataAttrKey)
if (attrValue)
return attrValue
if (attrValue) return attrValue
return defaultValue
}
export const API_PREFIX = getStringConfig(process.env.NEXT_PUBLIC_API_PREFIX, DatasetAttr.DATA_API_PREFIX, 'http://localhost:5001/console/api')
export const PUBLIC_API_PREFIX = getStringConfig(process.env.NEXT_PUBLIC_PUBLIC_API_PREFIX, DatasetAttr.DATA_PUBLIC_API_PREFIX, 'http://localhost:5001/api')
export const MARKETPLACE_API_PREFIX = getStringConfig(process.env.NEXT_PUBLIC_MARKETPLACE_API_PREFIX, DatasetAttr.DATA_MARKETPLACE_API_PREFIX, 'http://localhost:5002/api')
export const MARKETPLACE_URL_PREFIX = getStringConfig(process.env.NEXT_PUBLIC_MARKETPLACE_URL_PREFIX, DatasetAttr.DATA_MARKETPLACE_URL_PREFIX, '')
export const API_PREFIX = getStringConfig(
process.env.NEXT_PUBLIC_API_PREFIX,
DatasetAttr.DATA_API_PREFIX,
'http://localhost:5001/console/api',
)
export const PUBLIC_API_PREFIX = getStringConfig(
process.env.NEXT_PUBLIC_PUBLIC_API_PREFIX,
DatasetAttr.DATA_PUBLIC_API_PREFIX,
'http://localhost:5001/api',
)
export const MARKETPLACE_API_PREFIX = getStringConfig(
process.env.NEXT_PUBLIC_MARKETPLACE_API_PREFIX,
DatasetAttr.DATA_MARKETPLACE_API_PREFIX,
'http://localhost:5002/api',
)
export const MARKETPLACE_URL_PREFIX = getStringConfig(
process.env.NEXT_PUBLIC_MARKETPLACE_URL_PREFIX,
DatasetAttr.DATA_MARKETPLACE_URL_PREFIX,
'',
)
const EDITION = getStringConfig(process.env.NEXT_PUBLIC_EDITION, DatasetAttr.DATA_PUBLIC_EDITION, 'SELF_HOSTED')
const EDITION = getStringConfig(
process.env.NEXT_PUBLIC_EDITION,
DatasetAttr.DATA_PUBLIC_EDITION,
'SELF_HOSTED',
)
export const IS_CE_EDITION = EDITION === 'SELF_HOSTED'
export const IS_CLOUD_EDITION = EDITION === 'CLOUD'
export const SUPPORT_MAIL_LOGIN = !!(process.env.NEXT_PUBLIC_SUPPORT_MAIL_LOGIN || globalThis.document?.body?.getAttribute('data-public-support-mail-login'))
export const SUPPORT_MAIL_LOGIN = !!(
process.env.NEXT_PUBLIC_SUPPORT_MAIL_LOGIN
|| globalThis.document?.body?.getAttribute('data-public-support-mail-login')
)
export const TONE_LIST = [
{
@@ -107,7 +138,7 @@ export const DEFAULT_COMPLETION_PROMPT_CONFIG = {
}
export const getMaxToken = (modelId: string) => {
return (modelId === 'gpt-4' || modelId === 'gpt-3.5-turbo-16k') ? 8000 : 4000
return modelId === 'gpt-4' || modelId === 'gpt-3.5-turbo-16k' ? 8000 : 4000
}
export const LOCALE_COOKIE_NAME = 'locale'
@@ -121,8 +152,7 @@ export const emailRegex = /^[\w.!#$%&'*+\-/=?^{|}~]+@([\w-]+\.)+[\w-]{2,}$/m
const MAX_ZN_VAR_NAME_LENGTH = 8
const MAX_EN_VAR_VALUE_LENGTH = 30
export const getMaxVarNameLength = (value: string) => {
if (zhRegex.test(value))
return MAX_ZN_VAR_NAME_LENGTH
if (zhRegex.test(value)) return MAX_ZN_VAR_NAME_LENGTH
return MAX_EN_VAR_VALUE_LENGTH
}
@@ -148,6 +178,15 @@ export const VAR_ITEM_TEMPLATE_IN_WORKFLOW = {
options: [],
}
export const VAR_ITEM_TEMPLATE_IN_PIPELINE = {
variable: '',
label: '',
type: PipelineInputVarType.textInput,
max_length: DEFAULT_VALUE_MAX_LEN,
required: true,
options: [],
}
export const appDefaultIconBackground = '#D5F5F6'
export const NEED_REFRESH_APP_LIST_KEY = 'needRefreshAppList'
@@ -257,40 +296,110 @@ Thought: {{agent_scratchpad}}
`,
}
export const VAR_REGEX = /\{\{(#[a-zA-Z0-9_-]{1,50}(\.[a-zA-Z_]\w{0,29}){1,10}#)\}\}/gi
export const VAR_REGEX
= /\{\{(#[a-zA-Z0-9_-]{1,50}(\.\d+)?(\.[a-zA-Z_]\w{0,29}){1,10}#)\}\}/gi
export const resetReg = () => VAR_REGEX.lastIndex = 0
export const resetReg = () => (VAR_REGEX.lastIndex = 0)
export const DISABLE_UPLOAD_IMAGE_AS_ICON = process.env.NEXT_PUBLIC_DISABLE_UPLOAD_IMAGE_AS_ICON === 'true'
export const DISABLE_UPLOAD_IMAGE_AS_ICON
= process.env.NEXT_PUBLIC_DISABLE_UPLOAD_IMAGE_AS_ICON === 'true'
export const GITHUB_ACCESS_TOKEN = process.env.NEXT_PUBLIC_GITHUB_ACCESS_TOKEN || ''
export const GITHUB_ACCESS_TOKEN
= process.env.NEXT_PUBLIC_GITHUB_ACCESS_TOKEN || ''
export const SUPPORT_INSTALL_LOCAL_FILE_EXTENSIONS = '.difypkg,.difybndl'
export const FULL_DOC_PREVIEW_LENGTH = 50
export const JSON_SCHEMA_MAX_DEPTH = 10
export const MAX_TOOLS_NUM = getNumberConfig(process.env.NEXT_PUBLIC_MAX_TOOLS_NUM, DatasetAttr.DATA_PUBLIC_MAX_TOOLS_NUM, 10)
export const MAX_PARALLEL_LIMIT = getNumberConfig(process.env.NEXT_PUBLIC_MAX_PARALLEL_LIMIT, DatasetAttr.DATA_PUBLIC_MAX_PARALLEL_LIMIT, 10)
export const TEXT_GENERATION_TIMEOUT_MS = getNumberConfig(process.env.NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS, DatasetAttr.DATA_PUBLIC_TEXT_GENERATION_TIMEOUT_MS, 60000)
export const LOOP_NODE_MAX_COUNT = getNumberConfig(process.env.NEXT_PUBLIC_LOOP_NODE_MAX_COUNT, DatasetAttr.DATA_PUBLIC_LOOP_NODE_MAX_COUNT, 100)
export const MAX_ITERATIONS_NUM = getNumberConfig(process.env.NEXT_PUBLIC_MAX_ITERATIONS_NUM, DatasetAttr.DATA_PUBLIC_MAX_ITERATIONS_NUM, 99)
export const MAX_TREE_DEPTH = getNumberConfig(process.env.NEXT_PUBLIC_MAX_TREE_DEPTH, DatasetAttr.DATA_PUBLIC_MAX_TREE_DEPTH, 50)
export const MAX_TOOLS_NUM = getNumberConfig(
process.env.NEXT_PUBLIC_MAX_TOOLS_NUM,
DatasetAttr.DATA_PUBLIC_MAX_TOOLS_NUM,
10,
)
export const MAX_PARALLEL_LIMIT = getNumberConfig(
process.env.NEXT_PUBLIC_MAX_PARALLEL_LIMIT,
DatasetAttr.DATA_PUBLIC_MAX_PARALLEL_LIMIT,
10,
)
export const TEXT_GENERATION_TIMEOUT_MS = getNumberConfig(
process.env.NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS,
DatasetAttr.DATA_PUBLIC_TEXT_GENERATION_TIMEOUT_MS,
60000,
)
export const LOOP_NODE_MAX_COUNT = getNumberConfig(
process.env.NEXT_PUBLIC_LOOP_NODE_MAX_COUNT,
DatasetAttr.DATA_PUBLIC_LOOP_NODE_MAX_COUNT,
100,
)
export const MAX_ITERATIONS_NUM = getNumberConfig(
process.env.NEXT_PUBLIC_MAX_ITERATIONS_NUM,
DatasetAttr.DATA_PUBLIC_MAX_ITERATIONS_NUM,
99,
)
export const MAX_TREE_DEPTH = getNumberConfig(
process.env.NEXT_PUBLIC_MAX_TREE_DEPTH,
DatasetAttr.DATA_PUBLIC_MAX_TREE_DEPTH,
50,
)
export const ALLOW_UNSAFE_DATA_SCHEME = getBooleanConfig(process.env.NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME, DatasetAttr.DATA_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME, false)
export const ENABLE_WEBSITE_JINAREADER = getBooleanConfig(process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER, DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_JINAREADER, true)
export const ENABLE_WEBSITE_FIRECRAWL = getBooleanConfig(process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL, DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_FIRECRAWL, true)
export const ENABLE_WEBSITE_WATERCRAWL = getBooleanConfig(process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL, DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_WATERCRAWL, false)
export const ALLOW_UNSAFE_DATA_SCHEME = getBooleanConfig(
process.env.NEXT_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME,
DatasetAttr.DATA_PUBLIC_ALLOW_UNSAFE_DATA_SCHEME,
false,
)
export const ENABLE_WEBSITE_JINAREADER = getBooleanConfig(
process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER,
DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_JINAREADER,
true,
)
export const ENABLE_WEBSITE_FIRECRAWL = getBooleanConfig(
process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL,
DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_FIRECRAWL,
true,
)
export const ENABLE_WEBSITE_WATERCRAWL = getBooleanConfig(
process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL,
DatasetAttr.DATA_PUBLIC_ENABLE_WEBSITE_WATERCRAWL,
false,
)
export const VALUE_SELECTOR_DELIMITER = '@@@'
export const validPassword = /^(?=.*[a-zA-Z])(?=.*\d)\S{8,}$/
export const ZENDESK_WIDGET_KEY = getStringConfig(process.env.NEXT_PUBLIC_ZENDESK_WIDGET_KEY, DatasetAttr.NEXT_PUBLIC_ZENDESK_WIDGET_KEY, '')
export const ZENDESK_WIDGET_KEY = getStringConfig(
process.env.NEXT_PUBLIC_ZENDESK_WIDGET_KEY,
DatasetAttr.NEXT_PUBLIC_ZENDESK_WIDGET_KEY,
'',
)
export const ZENDESK_FIELD_IDS = {
ENVIRONMENT: getStringConfig(process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_ENVIRONMENT, DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_ENVIRONMENT, ''),
VERSION: getStringConfig(process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_VERSION, DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_VERSION, ''),
EMAIL: getStringConfig(process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_EMAIL, DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_EMAIL, ''),
WORKSPACE_ID: getStringConfig(process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_WORKSPACE_ID, DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_WORKSPACE_ID, ''),
PLAN: getStringConfig(process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_PLAN, DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_PLAN, ''),
ENVIRONMENT: getStringConfig(
process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_ENVIRONMENT,
DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_ENVIRONMENT,
'',
),
VERSION: getStringConfig(
process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_VERSION,
DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_VERSION,
'',
),
EMAIL: getStringConfig(
process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_EMAIL,
DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_EMAIL,
'',
),
WORKSPACE_ID: getStringConfig(
process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_WORKSPACE_ID,
DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_WORKSPACE_ID,
'',
),
PLAN: getStringConfig(
process.env.NEXT_PUBLIC_ZENDESK_FIELD_ID_PLAN,
DatasetAttr.NEXT_PUBLIC_ZENDESK_FIELD_ID_PLAN,
'',
),
}
export const APP_VERSION = pkg.version
export const RAG_PIPELINE_PREVIEW_CHUNK_NUM = 20