feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -3,7 +3,7 @@ import type { FC } from 'react'
import React from 'react'
import { useTranslation } from 'react-i18next'
import Badge from '@/app/components/base/badge'
import { GeneralType, ParentChildType } from '@/app/components/base/icons/src/public/knowledge'
import { GeneralChunk, ParentChildChunk } from '@/app/components/base/icons/src/vender/knowledge'
type Props = {
isGeneralMode: boolean
@@ -15,13 +15,14 @@ const ChunkingModeLabel: FC<Props> = ({
isQAMode,
}) => {
const { t } = useTranslation()
const TypeIcon = isGeneralMode ? GeneralType : ParentChildType
const TypeIcon = isGeneralMode ? GeneralChunk : ParentChildChunk
const generalSuffix = isQAMode ? ' · QA' : ''
return (
<Badge>
<div className='flex h-full items-center space-x-0.5 text-text-tertiary'>
<TypeIcon className='h-3 w-3' />
<span className='system-2xs-medium-uppercase'>{isGeneralMode ? `${t('dataset.chunkingMode.general')}${isQAMode ? ' · QA' : ''}` : t('dataset.chunkingMode.parentChild')}</span>
<span className='system-2xs-medium-uppercase'>{isGeneralMode ? `${t('dataset.chunkingMode.general')}${generalSuffix}` : t('dataset.chunkingMode.parentChild')}</span>
</div>
</Badge>
)

View File

@@ -0,0 +1,63 @@
import cn from '@/utils/classnames'
import React, { useCallback, useMemo, useState } from 'react'
type CredentialIconProps = {
avatar_url?: string
name: string
size?: number
className?: string
}
const ICON_BG_COLORS = [
'bg-components-icon-bg-orange-dark-solid',
'bg-components-icon-bg-pink-solid',
'bg-components-icon-bg-indigo-solid',
'bg-components-icon-bg-teal-solid',
]
export const CredentialIcon: React.FC<CredentialIconProps> = ({
avatar_url,
name,
size = 20,
className = '',
}) => {
const [showAvatar, setShowAvatar] = useState(!!avatar_url && avatar_url !== 'default')
const firstLetter = useMemo(() => name.charAt(0).toUpperCase(), [name])
const bgColor = useMemo(() => ICON_BG_COLORS[firstLetter.charCodeAt(0) % ICON_BG_COLORS.length], [firstLetter])
const onImgLoadError = useCallback(() => {
setShowAvatar(false)
}, [])
if (avatar_url && avatar_url !== 'default' && showAvatar) {
return (
<div
className='flex shrink-0 items-center justify-center overflow-hidden rounded-md border border-divider-regular'
style={{ width: `${size}px`, height: `${size}px` }}
>
<img
src={avatar_url}
width={size}
height={size}
className={cn('shrink-0 object-contain', className)}
onError={onImgLoadError}
/>
</div>
)
}
return (
<div
className={cn(
'flex shrink-0 items-center justify-center rounded-md border border-divider-regular',
bgColor,
className,
)}
style={{ width: `${size}px`, height: `${size}px` }}
>
<span className='bg-gradient-to-b from-components-avatar-shape-fill-stop-0 to-components-avatar-shape-fill-stop-100 bg-clip-text text-[13px] font-semibold leading-[1.2] text-transparent opacity-90'>
{firstLetter}
</span>
</div>
)
}

View File

@@ -22,7 +22,7 @@ const extendToFileTypeMap: { [key: string]: FileAppearanceType } = {
type Props = {
extension?: string
name?: string
size?: 'sm' | 'lg' | 'md'
size?: 'sm' | 'md' | 'lg' | 'xl'
className?: string
}

View File

@@ -30,7 +30,7 @@ const DocumentList: FC<Props> = ({
className='flex h-8 cursor-pointer items-center space-x-2 rounded-lg px-2 hover:bg-state-base-hover'
onClick={handleChange(item)}
>
<FileIcon name={item.name} extension={extension} size='md' />
<FileIcon name={item.name} extension={extension} size='lg' />
<div className='truncate text-sm text-text-secondary'>{name}</div>
</div>
)

View File

@@ -1,13 +1,13 @@
'use client'
import type { FC } from 'react'
import React, { useCallback, useState } from 'react'
import React, { useCallback, useMemo, useState } from 'react'
import { useBoolean } from 'ahooks'
import { RiArrowDownSLine } from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import FileIcon from '../document-file-icon'
import DocumentList from './document-list'
import type { DocumentItem, ParentMode, SimpleDocumentDetail } from '@/models/datasets'
import { ProcessMode } from '@/models/datasets'
import { ChunkingMode } from '@/models/datasets'
import {
PortalToFollowElem,
PortalToFollowElemContent,
@@ -15,7 +15,7 @@ import {
} from '@/app/components/base/portal-to-follow-elem'
import cn from '@/utils/classnames'
import SearchInput from '@/app/components/base/search-input'
import { GeneralType, ParentChildType } from '@/app/components/base/icons/src/public/knowledge'
import { GeneralChunk, ParentChildChunk } from '@/app/components/base/icons/src/vender/knowledge'
import { useDocumentList } from '@/service/knowledge/use-document'
import Loading from '@/app/components/base/loading'
@@ -24,7 +24,7 @@ type Props = {
value: {
name?: string
extension?: string
processMode?: ProcessMode
chunkingMode?: ChunkingMode
parentMode?: ParentMode
}
onChange: (value: SimpleDocumentDetail) => void
@@ -39,7 +39,7 @@ const DocumentPicker: FC<Props> = ({
const {
name,
extension,
processMode,
chunkingMode,
parentMode,
} = value
const [query, setQuery] = useState('')
@@ -53,8 +53,10 @@ const DocumentPicker: FC<Props> = ({
},
})
const documentsList = data?.data
const isParentChild = processMode === ProcessMode.parentChild
const TypeIcon = isParentChild ? ParentChildType : GeneralType
const isGeneralMode = chunkingMode === ChunkingMode.text
const isParentChild = chunkingMode === ChunkingMode.parentChild
const isQAMode = chunkingMode === ChunkingMode.qa
const TypeIcon = isParentChild ? ParentChildChunk : GeneralChunk
const [open, {
set: setOpen,
@@ -67,6 +69,12 @@ const DocumentPicker: FC<Props> = ({
setOpen(false)
}, [documentsList, onChange, setOpen])
const parentModeLabel = useMemo(() => {
if (!parentMode)
return '--'
return parentMode === 'paragraph' ? t('dataset.parentMode.paragraph') : t('dataset.parentMode.fullDoc')
}, [parentMode, t])
return (
<PortalToFollowElem
open={open}
@@ -75,7 +83,7 @@ const DocumentPicker: FC<Props> = ({
>
<PortalToFollowElemTrigger onClick={togglePopup}>
<div className={cn('ml-1 flex cursor-pointer select-none items-center rounded-lg px-2 py-0.5 hover:bg-state-base-hover', open && 'bg-state-base-hover')}>
<FileIcon name={name} extension={extension} size='lg' />
<FileIcon name={name} extension={extension} size='xl' />
<div className='ml-1 mr-0.5 flex flex-col items-start'>
<div className='flex items-center space-x-0.5'>
<span className={cn('system-md-semibold text-text-primary')}> {name || '--'}</span>
@@ -84,8 +92,9 @@ const DocumentPicker: FC<Props> = ({
<div className='flex h-3 items-center space-x-0.5 text-text-tertiary'>
<TypeIcon className='h-3 w-3' />
<span className={cn('system-2xs-medium-uppercase', isParentChild && 'mt-0.5' /* to icon problem cause not ver align */)}>
{isParentChild ? t('dataset.chunkingMode.parentChild') : t('dataset.chunkingMode.general')}
{isParentChild && ` · ${!parentMode ? '--' : parentMode === 'paragraph' ? t('dataset.parentMode.paragraph') : t('dataset.parentMode.fullDoc')}`}
{isGeneralMode && t('dataset.chunkingMode.general')}
{isQAMode && t('dataset.chunkingMode.qa')}
{isParentChild && `${t('dataset.chunkingMode.parentChild')} · ${parentModeLabel}`}
</span>
</div>
</div>

View File

@@ -51,7 +51,7 @@ const PreviewDocumentPicker: FC<Props> = ({
>
<PortalToFollowElemTrigger onClick={togglePopup}>
<div className={cn('flex h-6 select-none items-center rounded-md px-1 hover:bg-state-base-hover', open && 'bg-state-base-hover', className)}>
<FileIcon name={name} extension={extension} size='md' />
<FileIcon name={name} extension={extension} size='lg' />
<div className='ml-1 flex flex-col items-start'>
<div className='flex items-center space-x-0.5'>
<span className={cn('system-md-semibold max-w-[200px] truncate text-text-primary')}> {name || '--'}</span>

View File

@@ -2,12 +2,12 @@
import type { FC } from 'react'
import React from 'react'
import { useTranslation } from 'react-i18next'
import Image from 'next/image'
import RetrievalParamConfig from '../retrieval-param-config'
import { OptionCard } from '../../create/step-two/option-card'
import { retrievalIcon } from '../../create/icons'
import { RETRIEVE_METHOD } from '@/types/app'
import type { RetrievalConfig } from '@/types/app'
import OptionCard from '../../settings/option-card'
import { VectorSearch } from '@/app/components/base/icons/src/vender/knowledge'
import { EffectColor } from '../../settings/chunk-structure/types'
type Props = {
disabled?: boolean
@@ -23,20 +23,25 @@ const EconomicalRetrievalMethodConfig: FC<Props> = ({
const { t } = useTranslation()
return (
<div className='space-y-2'>
<OptionCard
disabled={disabled} icon={<Image className='h-4 w-4' src={retrievalIcon.vector} alt='' />}
title={t('dataset.retrieval.invertedIndex.title')}
description={t('dataset.retrieval.invertedIndex.description')} isActive
activeHeaderClassName='bg-dataset-option-card-purple-gradient'
>
<RetrievalParamConfig
type={RETRIEVE_METHOD.invertedIndex}
value={value}
onChange={onChange}
/>
</OptionCard>
</div>
<OptionCard
id={RETRIEVE_METHOD.keywordSearch}
disabled={disabled}
icon={<VectorSearch className='size-4' />}
iconActiveColor='text-util-colors-purple-purple-600'
title={t('dataset.retrieval.keyword_search.title')}
description={t('dataset.retrieval.keyword_search.description')}
isActive
effectColor={EffectColor.purple}
showEffectColor
showChildren
className='gap-x-2'
>
<RetrievalParamConfig
type={RETRIEVE_METHOD.keywordSearch}
value={value}
onChange={onChange}
/>
</OptionCard>
)
}
export default React.memo(EconomicalRetrievalMethodConfig)

View File

@@ -2,11 +2,7 @@
import type { FC } from 'react'
import React, { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import Image from 'next/image'
import RetrievalParamConfig from '../retrieval-param-config'
import { OptionCard } from '../../create/step-two/option-card'
import Effect from '../../create/assets/option-card-effect-purple.svg'
import { retrievalIcon } from '../../create/icons'
import type { RetrievalConfig } from '@/types/app'
import { RETRIEVE_METHOD } from '@/types/app'
import { useProviderContext } from '@/context/provider-context'
@@ -17,7 +13,9 @@ import {
RerankingModeEnum,
WeightedScoreEnum,
} from '@/models/datasets'
import Badge from '@/app/components/base/badge'
import OptionCard from '../../settings/option-card'
import { FullTextSearch, HybridSearch, VectorSearch } from '@/app/components/base/icons/src/vender/knowledge'
import { EffectColor } from '../../settings/chunk-structure/types'
type Props = {
disabled?: boolean
@@ -92,17 +90,21 @@ const RetrievalMethodConfig: FC<Props> = ({
}, [value, rerankDefaultModel, isRerankDefaultModelValid, onChange])
return (
<div className='space-y-2'>
<div className='flex flex-col gap-y-2'>
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
<OptionCard disabled={disabled} icon={<Image className='h-4 w-4' src={retrievalIcon.vector} alt='' />}
<OptionCard
id={RETRIEVE_METHOD.semantic}
disabled={disabled}
icon={<VectorSearch className='size-4' />}
iconActiveColor='text-util-colors-purple-purple-600'
title={t('dataset.retrieval.semantic_search.title')}
description={t('dataset.retrieval.semantic_search.description')}
isActive={
value.search_method === RETRIEVE_METHOD.semantic
}
onSwitched={() => onSwitch(RETRIEVE_METHOD.semantic)}
effectImg={Effect.src}
activeHeaderClassName='bg-dataset-option-card-purple-gradient'
isActive={value.search_method === RETRIEVE_METHOD.semantic}
onClick={onSwitch}
effectColor={EffectColor.purple}
showEffectColor
showChildren={value.search_method === RETRIEVE_METHOD.semantic}
className='gap-x-2'
>
<RetrievalParamConfig
type={RETRIEVE_METHOD.semantic}
@@ -112,15 +114,19 @@ const RetrievalMethodConfig: FC<Props> = ({
</OptionCard>
)}
{supportRetrievalMethods.includes(RETRIEVE_METHOD.fullText) && (
<OptionCard disabled={disabled} icon={<Image className='h-4 w-4' src={retrievalIcon.fullText} alt='' />}
<OptionCard
id={RETRIEVE_METHOD.fullText}
disabled={disabled}
icon={<FullTextSearch className='size-4' />}
iconActiveColor='text-util-colors-purple-purple-600'
title={t('dataset.retrieval.full_text_search.title')}
description={t('dataset.retrieval.full_text_search.description')}
isActive={
value.search_method === RETRIEVE_METHOD.fullText
}
onSwitched={() => onSwitch(RETRIEVE_METHOD.fullText)}
effectImg={Effect.src}
activeHeaderClassName='bg-dataset-option-card-purple-gradient'
isActive={value.search_method === RETRIEVE_METHOD.fullText}
onClick={onSwitch}
effectColor={EffectColor.purple}
showEffectColor
showChildren={value.search_method === RETRIEVE_METHOD.fullText}
className='gap-x-2'
>
<RetrievalParamConfig
type={RETRIEVE_METHOD.fullText}
@@ -130,19 +136,20 @@ const RetrievalMethodConfig: FC<Props> = ({
</OptionCard>
)}
{supportRetrievalMethods.includes(RETRIEVE_METHOD.hybrid) && (
<OptionCard disabled={disabled} icon={<Image className='h-4 w-4' src={retrievalIcon.hybrid} alt='' />}
title={
<div className='flex items-center space-x-1'>
<div>{t('dataset.retrieval.hybrid_search.title')}</div>
<Badge text={t('dataset.retrieval.hybrid_search.recommend')!} className='ml-1 h-[18px] border-text-accent-secondary text-text-accent-secondary' uppercase />
</div>
}
description={t('dataset.retrieval.hybrid_search.description')} isActive={
value.search_method === RETRIEVE_METHOD.hybrid
}
onSwitched={() => onSwitch(RETRIEVE_METHOD.hybrid)}
effectImg={Effect.src}
activeHeaderClassName='bg-dataset-option-card-purple-gradient'
<OptionCard
id={RETRIEVE_METHOD.hybrid}
disabled={disabled}
icon={<HybridSearch className='size-4' />}
iconActiveColor='text-util-colors-purple-purple-600'
title={t('dataset.retrieval.hybrid_search.title')}
description={t('dataset.retrieval.hybrid_search.description')}
isActive={value.search_method === RETRIEVE_METHOD.hybrid}
onClick={onSwitch}
effectColor={EffectColor.purple}
showEffectColor
isRecommended
showChildren={value.search_method === RETRIEVE_METHOD.hybrid}
className='gap-x-2'
>
<RetrievalParamConfig
type={RETRIEVE_METHOD.hybrid}

View File

@@ -38,7 +38,7 @@ const RetrievalParamConfig: FC<Props> = ({
}) => {
const { t } = useTranslation()
const canToggleRerankModalEnable = type !== RETRIEVE_METHOD.hybrid
const isEconomical = type === RETRIEVE_METHOD.invertedIndex
const isEconomical = type === RETRIEVE_METHOD.keywordSearch
const isHybridSearch = type === RETRIEVE_METHOD.hybrid
const {
modelList: rerankModelList,
@@ -201,7 +201,7 @@ const RetrievalParamConfig: FC<Props> = ({
option.value === RerankingModeEnum.WeightedScore
? ProgressIndicator
: Reranking
} alt=''/>}
} alt='' />}
title={option.label}
description={option.tips}
className='flex-1'