feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -1,6 +1,6 @@
'use client'
import type { FC, PropsWithChildren } from 'react'
import React, { useCallback, useEffect, useRef, useState } from 'react'
import React, { useCallback, useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
import {
@@ -10,14 +10,13 @@ import {
} from '@remixicon/react'
import Link from 'next/link'
import Image from 'next/image'
import { useHover } from 'ahooks'
import SettingCog from '../assets/setting-gear-mod.svg'
import OrangeEffect from '../assets/option-card-effect-orange.svg'
import FamilyMod from '../assets/family-mod.svg'
import Note from '../assets/note-mod.svg'
import FileList from '../assets/file-list-3-fill.svg'
import { indexMethodIcon } from '../icons'
import { PreviewContainer } from '../../preview/container'
import PreviewContainer from '../../preview/container'
import { ChunkContainer, QAPreview } from '../../chunk'
import { PreviewHeader } from '../../preview/header'
import { FormattedText } from '../../formatted-text/formatted'
@@ -42,7 +41,7 @@ import { isReRankModelSelected } from '@/app/components/datasets/common/check-re
import Toast from '@/app/components/base/toast'
import type { NotionPage } from '@/models/common'
import { DataSourceProvider } from '@/models/common'
import { useDatasetDetailContext } from '@/context/dataset-detail'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import I18n from '@/context/i18n'
import { RETRIEVE_METHOD } from '@/types/app'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
@@ -60,10 +59,10 @@ import Badge from '@/app/components/base/badge'
import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
import Tooltip from '@/app/components/base/tooltip'
import CustomDialog from '@/app/components/base/dialog'
import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '@/app/components/base/portal-to-follow-elem'
import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
import { noop } from 'lodash-es'
import { useDocLink } from '@/context/i18n'
import { useInvalidDatasetList } from '@/service/knowledge/use-dataset'
const TextLabel: FC<PropsWithChildren> = (props) => {
return <label className='system-sm-semibold text-text-secondary'>{props.children}</label>
@@ -80,6 +79,7 @@ type StepTwoProps = {
dataSourceType: DataSourceType
files: CustomFile[]
notionPages?: NotionPage[]
notionCredentialId: string
websitePages?: CrawlResultItem[]
crawlOptions?: CrawlOptions
websiteCrawlProvider?: DataSourceProvider
@@ -135,9 +135,10 @@ const StepTwo = ({
dataSourceType: inCreatePageDataSourceType,
files,
notionPages = [],
notionCredentialId,
websitePages = [],
crawlOptions,
websiteCrawlProvider = DataSourceProvider.fireCrawl,
websiteCrawlProvider = DataSourceProvider.jinaReader,
websiteCrawlJobId = '',
onStepChange,
updateIndexingTypeCache,
@@ -152,7 +153,8 @@ const StepTwo = ({
const media = useBreakpoints()
const isMobile = media === MediaType.mobile
const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext()
const currentDataset = useDatasetDetailContextWithSelector(state => state.dataset)
const mutateDatasetRes = useDatasetDetailContextWithSelector(state => state.mutateDatasetRes)
const isInUpload = Boolean(currentDataset)
const isUploadInEmptyDataset = isInUpload && !currentDataset?.doc_form
@@ -282,6 +284,7 @@ const StepTwo = ({
indexingTechnique: getIndexing_technique() as any,
processRule: getProcessRule(),
dataset_id: datasetId || '',
credential_id: notionCredentialId,
})
const websiteIndexingEstimateQuery = useFetchFileIndexingEstimateForWeb({
@@ -469,7 +472,7 @@ const StepTwo = ({
}
}
if (dataSourceType === DataSourceType.NOTION)
params.data_source.info_list.notion_info_list = getNotionInfo(notionPages)
params.data_source.info_list.notion_info_list = getNotionInfo(notionPages, notionCredentialId)
if (dataSourceType === DataSourceType.WEB) {
params.data_source.info_list.website_info_list = getWebsiteInfo({
@@ -507,7 +510,7 @@ const StepTwo = ({
const max = rules.segmentation.max_tokens
const overlap = rules.segmentation.chunk_overlap
const isHierarchicalDocument = documentDetail.doc_form === ChunkingMode.parentChild
|| (rules.parent_mode && rules.subchunk_segmentation)
|| (rules.parent_mode && rules.subchunk_segmentation)
setSegmentIdentifier(separator)
setMaxChunkLength(max)
setOverlap(overlap!)
@@ -553,6 +556,7 @@ const StepTwo = ({
})
const isCreating = createFirstDocumentMutation.isPending || createDocumentMutation.isPending
const invalidDatasetList = useInvalidDatasetList()
const createHandle = async () => {
const params = getCreationParams()
@@ -582,6 +586,7 @@ const StepTwo = ({
}
if (mutateDatasetRes)
mutateDatasetRes()
invalidDatasetList()
onStepChange && onStepChange(+1)
isSetting && onSave && onSave()
}
@@ -605,9 +610,6 @@ const StepTwo = ({
setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
}, [isAPIKeySet, indexingType, datasetId])
const economyDomRef = useRef<HTMLDivElement>(null)
const isHoveringEconomy = useHover(economyDomRef)
const isModelAndRetrievalConfigDisabled = !!datasetId && !!currentDataset?.data_source_type
return (
@@ -867,7 +869,8 @@ const StepTwo = ({
<div className={'system-md-semibold mb-1 text-text-secondary'}>{t('datasetCreation.stepTwo.indexMode')}</div>
<div className='flex items-center gap-2'>
{(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && (
<OptionCard className='flex-1 self-stretch'
<OptionCard
className='flex-1 self-stretch'
title={<div className='flex items-center'>
{t('datasetCreation.stepTwo.qualified')}
<Badge className={cn('ml-1 h-[18px]', (!hasSetIndexType && indexType === IndexingType.QUALIFIED) ? 'border-text-accent-secondary text-text-accent-secondary' : '')} uppercase>
@@ -913,26 +916,8 @@ const StepTwo = ({
</Button>
</div>
</CustomDialog>
<PortalToFollowElem
open={
isHoveringEconomy && docForm !== ChunkingMode.text
}
placement={'top'}
>
<PortalToFollowElemTrigger asChild>
<OptionCard className='flex-1 self-stretch'
title={t('datasetCreation.stepTwo.economical')}
description={t('datasetCreation.stepTwo.economicalTip')}
icon={<Image src={indexMethodIcon.economical} alt='' />}
isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL}
disabled={hasSetIndexType || docForm !== ChunkingMode.text}
ref={economyDomRef}
onSwitched={() => {
setIndexType(IndexingType.ECONOMICAL)
}}
/>
</PortalToFollowElemTrigger>
<PortalToFollowElemContent>
<Tooltip
popupContent={
<div className='rounded-lg border-components-panel-border bg-components-tooltip-bg p-3 text-xs font-medium text-text-secondary shadow-lg'>
{
docForm === ChunkingMode.qa
@@ -940,8 +925,24 @@ const StepTwo = ({
: t('datasetCreation.stepTwo.notAvailableForParentChild')
}
</div>
</PortalToFollowElemContent>
</PortalToFollowElem>
}
noDecoration
position='top'
asChild={false}
triggerClassName='flex-1 self-stretch'
>
<OptionCard
className='h-full'
title={t('datasetCreation.stepTwo.economical')}
description={t('datasetCreation.stepTwo.economicalTip')}
icon={<Image src={indexMethodIcon.economical} alt='' />}
isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL}
disabled={hasSetIndexType || docForm !== ChunkingMode.text}
onSwitched={() => {
setIndexType(IndexingType.ECONOMICAL)
}}
/>
</Tooltip>
</>)}
</div>
{!hasSetIndexType && indexType === IndexingType.QUALIFIED && (

View File

@@ -64,43 +64,45 @@ export const OptionCard: FC<OptionCardProps> = (
},
) => {
const { icon, className, title, description, isActive, children, actions, activeHeaderClassName, style, effectImg, onSwitched, noHighlight, disabled, ...rest } = props
return <div
className={classNames(
'rounded-xl bg-components-option-card-option-bg shadow-xs',
(isActive && !noHighlight)
? 'border-[1.5px] border-components-option-card-option-selected-border'
: 'border border-components-option-card-option-border',
disabled && 'pointer-events-none opacity-50',
className,
)}
style={{
...style,
}}
onClick={() => {
if (!isActive && !disabled)
onSwitched?.()
}}
{...rest}
ref={ref}
>
<OptionCardHeader
icon={icon}
title={title}
description={description}
isActive={isActive && !noHighlight}
activeClassName={activeHeaderClassName}
effectImg={effectImg}
disabled={disabled}
/>
{/** Body */}
{isActive && (children || actions) && <div className='rounded-b-xl bg-components-panel-bg px-4 py-3'>
{children}
{actions && <div className='mt-4 flex gap-2'>
{actions}
</div>
}
</div>}
</div>
return (
<div
className={classNames(
'rounded-xl bg-components-option-card-option-bg shadow-xs',
(isActive && !noHighlight)
? 'border-[1.5px] border-components-option-card-option-selected-border'
: 'border border-components-option-card-option-border',
disabled && 'pointer-events-none opacity-50',
className,
)}
style={{
...style,
}}
onClick={() => {
if (!isActive && !disabled)
onSwitched?.()
}}
{...rest}
ref={ref}
>
<OptionCardHeader
icon={icon}
title={title}
description={description}
isActive={isActive && !noHighlight}
activeClassName={activeHeaderClassName}
effectImg={effectImg}
disabled={disabled}
/>
{/** Body */}
{isActive && (children || actions) && <div className='rounded-b-xl bg-components-panel-bg px-4 py-3'>
{children}
{actions && <div className='mt-4 flex gap-2'>
{actions}
</div>
}
</div>}
</div>
)
}
OptionCard.displayName = 'OptionCard'