feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -0,0 +1,39 @@
'use client'
import React from 'react'
import cn from '@/utils/classnames'
import Checkbox from '@/app/components/base/checkbox'
import Tooltip from '@/app/components/base/tooltip'
type CheckboxWithLabelProps = {
className?: string
isChecked: boolean
onChange: (isChecked: boolean) => void
label: string
labelClassName?: string
tooltip?: string
}
const CheckboxWithLabel = ({
className = '',
isChecked,
onChange,
label,
labelClassName,
tooltip,
}: CheckboxWithLabelProps) => {
return (
<label className={cn('flex items-center space-x-2', className)}>
<Checkbox checked={isChecked} onCheck={() => onChange(!isChecked)} />
<div className={cn('system-sm-medium text-text-secondary', labelClassName)}>{label}</div>
{tooltip && (
<Tooltip
popupContent={
<div className='w-[200px]'>{tooltip}</div>
}
triggerClassName='ml-0.5 w-4 h-4'
/>
)}
</label>
)
}
export default React.memo(CheckboxWithLabel)

View File

@@ -0,0 +1,80 @@
'use client'
import React, { useCallback } from 'react'
import cn from '@/utils/classnames'
import type { CrawlResultItem as CrawlResultItemType } from '@/models/datasets'
import Checkbox from '@/app/components/base/checkbox'
import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'
import Radio from '@/app/components/base/radio/ui'
type CrawledResultItemProps = {
payload: CrawlResultItemType
isChecked: boolean
onCheckChange: (checked: boolean) => void
isPreview: boolean
showPreview: boolean
onPreview: () => void
isMultipleChoice?: boolean
}
const CrawledResultItem = ({
payload,
isChecked,
onCheckChange,
isPreview,
onPreview,
showPreview,
isMultipleChoice = true,
}: CrawledResultItemProps) => {
const { t } = useTranslation()
const handleCheckChange = useCallback(() => {
onCheckChange(!isChecked)
}, [isChecked, onCheckChange])
return (
<div className={cn(
'relative flex cursor-pointer gap-x-2 rounded-lg p-2',
isPreview ? 'bg-state-base-active' : 'group hover:bg-state-base-hover',
)}>
{
isMultipleChoice ? (
<Checkbox
className='shrink-0'
checked={isChecked}
onCheck={handleCheckChange}
/>
) : (
<Radio
isChecked={isChecked}
onCheck={handleCheckChange}
/>
)
}
<div className='flex min-w-0 grow flex-col gap-y-0.5'>
<div
className='system-sm-medium truncate text-text-secondary'
title={payload.title}
>
{payload.title}
</div>
<div
className='system-xs-regular truncate text-text-tertiary'
title={payload.source_url}
>
{payload.source_url}
</div>
</div>
{showPreview && (
<Button
size='small'
onClick={onPreview}
className='system-xs-medium-uppercase right-2 top-2 hidden px-1.5 group-hover:absolute group-hover:block'
>
{t('datasetCreation.stepOne.website.preview')}
</Button>
)}
</div>
)
}
export default React.memo(CrawledResultItem)

View File

@@ -0,0 +1,95 @@
'use client'
import React, { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import cn from '@/utils/classnames'
import type { CrawlResultItem } from '@/models/datasets'
import CheckboxWithLabel from './checkbox-with-label'
import CrawledResultItem from './crawled-result-item'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type CrawledResultProps = {
className?: string
previewIndex?: number
list: CrawlResultItem[]
checkedList: CrawlResultItem[]
onSelectedChange: (selected: CrawlResultItem[]) => void
onPreview?: (payload: CrawlResultItem, index: number) => void
showPreview?: boolean
usedTime: number
isMultipleChoice?: boolean
}
const CrawledResult = ({
className = '',
previewIndex,
list,
checkedList,
onSelectedChange,
usedTime,
onPreview,
showPreview = false,
isMultipleChoice = true,
}: CrawledResultProps) => {
const { t } = useTranslation()
const isCheckAll = checkedList.length === list.length
const handleCheckedAll = useCallback(() => {
if (!isCheckAll)
onSelectedChange(list)
else
onSelectedChange([])
}, [isCheckAll, list, onSelectedChange])
const handleItemCheckChange = useCallback((item: CrawlResultItem) => {
return (checked: boolean) => {
if (checked)
isMultipleChoice ? onSelectedChange([...checkedList, item]) : onSelectedChange([item])
else
onSelectedChange(checkedList.filter(checkedItem => checkedItem.source_url !== item.source_url))
}
}, [checkedList, onSelectedChange, isMultipleChoice])
const handlePreview = useCallback((index: number) => {
if (!onPreview) return
onPreview(list[index], index)
}, [list, onPreview])
return (
<div className={cn('flex flex-col gap-y-2', className)}>
<div className='system-sm-medium pt-2 text-text-primary'>
{t(`${I18N_PREFIX}.scrapTimeInfo`, {
total: list.length,
time: usedTime.toFixed(1),
})}
</div>
<div className='overflow-hidden rounded-xl border border-components-panel-border bg-components-panel-bg'>
{isMultipleChoice && (
<div className='flex items-center px-4 py-2'>
<CheckboxWithLabel
isChecked={isCheckAll}
onChange={handleCheckedAll} label={isCheckAll ? t(`${I18N_PREFIX}.resetAll`) : t(`${I18N_PREFIX}.selectAll`)}
/>
</div>
)}
<div className='flex flex-col gap-y-px border-t border-divider-subtle bg-background-default-subtle p-2'>
{list.map((item, index) => (
<CrawledResultItem
key={item.source_url}
payload={item}
isChecked={checkedList.some(checkedItem => checkedItem.source_url === item.source_url)}
onCheckChange={handleItemCheckChange(item)}
isPreview={index === previewIndex}
onPreview={handlePreview.bind(null, index)}
showPreview={showPreview}
isMultipleChoice={isMultipleChoice}
/>
))}
</div>
</div>
</div>
)
}
export default React.memo(CrawledResult)

View File

@@ -0,0 +1,89 @@
'use client'
import React from 'react'
import { useTranslation } from 'react-i18next'
import cn from '@/utils/classnames'
type CrawlingProps = {
className?: string
crawledNum: number
totalNum: number
}
type BlockProps = {
className?: string
}
type ItemProps = {
firstLineWidth: string
secondLineWidth: string
}
const Block = React.memo(({
className,
}: BlockProps) => {
return <div className={cn('bg-text-quaternary opacity-20', className)} />
})
const Item = React.memo(({
firstLineWidth,
secondLineWidth,
}: ItemProps) => {
return (
<div className='flex gap-x-2 px-2 py-[5px]'>
<div className='py-0.5'>
<Block className='size-4 rounded-[4px]' />
</div>
<div className='flex grow flex-col'>
<div className='flex h-5 w-full items-center'>
<Block className={cn('h-2.5 rounded-sm', firstLineWidth)} />
</div>
<div className='flex h-[18px] w-full items-center'>
<Block className={cn('h-1.5 rounded-sm', secondLineWidth)} />
</div>
</div>
</div>
)
})
const Crawling = ({
className = '',
crawledNum,
totalNum,
}: CrawlingProps) => {
const { t } = useTranslation()
const itemsConfig = [{
firstLineWidth: 'w-[35%]',
secondLineWidth: 'w-[50%]',
}, {
firstLineWidth: 'w-[40%]',
secondLineWidth: 'w-[45%]',
}, {
firstLineWidth: 'w-[30%]',
secondLineWidth: 'w-[36%]',
}]
return (
<div className={cn('mt-2 flex flex-col gap-y-2 pt-2', className)}>
<div className='system-sm-medium text-text-primary'>
{t('datasetCreation.stepOne.website.totalPageScraped')} {crawledNum}/{totalNum}
</div>
<div className='overflow-hidden rounded-xl border border-components-panel-border bg-components-panel-bg'>
<div className='flex items-center gap-x-2 px-4 py-2'>
<Block className='size-4 rounded-[4px]' />
<Block className='h-2.5 w-14 rounded-sm' />
</div>
<div className='flex flex-col gap-px border-t border-divider-subtle bg-background-default-subtle p-2'>
{itemsConfig.map((item, index) => (
<Item
key={index}
firstLineWidth={item.firstLineWidth}
secondLineWidth={item.secondLineWidth}
/>
))}
</div>
</div>
</div>
)
}
export default React.memo(Crawling)

View File

@@ -0,0 +1,34 @@
import React from 'react'
import cn from '@/utils/classnames'
import { RiErrorWarningFill } from '@remixicon/react'
type ErrorMessageProps = {
className?: string
title: string
errorMsg?: string
}
const ErrorMessage = ({
className,
title,
errorMsg,
}: ErrorMessageProps) => {
return (
// eslint-disable-next-line tailwindcss/migration-from-tailwind-2
<div className={cn(
'flex gap-x-0.5 rounded-xl border-[0.5px] border-components-panel-border bg-opacity-40 bg-toast-error-bg p-2 shadow-xs shadow-shadow-shadow-3',
className,
)}>
<div className='flex size-6 items-center justify-center'>
<RiErrorWarningFill className='h-4 w-4 text-text-destructive' />
</div>
<div className='flex flex-col gap-y-0.5 py-1'>
<div className='system-xs-medium text-text-primary'>{title}</div>
{errorMsg && (
<div className='system-xs-regular text-text-secondary'>{errorMsg}</div>
)}
</div>
</div>
)
}
export default React.memo(ErrorMessage)

View File

@@ -0,0 +1,123 @@
import Button from '@/app/components/base/button'
import { useAppForm } from '@/app/components/base/form'
import BaseField from '@/app/components/base/form/form-scenarios/base/field'
import { ArrowDownRoundFill } from '@/app/components/base/icons/src/vender/solid/general'
import cn from '@/utils/classnames'
import { RiPlayLargeLine } from '@remixicon/react'
import { useBoolean } from 'ahooks'
import { useEffect, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import Toast from '@/app/components/base/toast'
import type { RAGPipelineVariables } from '@/models/pipeline'
import { useConfigurations, useInitialData } from '@/app/components/rag-pipeline/hooks/use-input-fields'
import { generateZodSchema } from '@/app/components/base/form/form-scenarios/base/utils'
import { CrawlStep } from '@/models/datasets'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type OptionsProps = {
variables: RAGPipelineVariables
step: CrawlStep
runDisabled?: boolean
onSubmit: (data: Record<string, any>) => void
}
const Options = ({
variables,
step,
runDisabled,
onSubmit,
}: OptionsProps) => {
const { t } = useTranslation()
const initialData = useInitialData(variables)
const configurations = useConfigurations(variables)
const schema = useMemo(() => {
return generateZodSchema(configurations)
}, [configurations])
const form = useAppForm({
defaultValues: initialData,
validators: {
onSubmit: ({ value }) => {
const result = schema.safeParse(value)
if (!result.success) {
const issues = result.error.issues
const firstIssue = issues[0]
const errorMessage = `"${firstIssue.path.join('.')}" ${firstIssue.message}`
Toast.notify({
type: 'error',
message: errorMessage,
})
return errorMessage
}
return undefined
},
},
onSubmit: ({ value }) => {
onSubmit(value)
},
})
const [fold, {
toggle: foldToggle,
setTrue: foldHide,
setFalse: foldShow,
}] = useBoolean(false)
useEffect(() => {
// When the step change
if (step !== CrawlStep.init)
foldHide()
else
foldShow()
}, [step])
const isRunning = useMemo(() => step === CrawlStep.running, [step])
return (
<form
className='w-full'
onSubmit={(e) => {
e.preventDefault()
e.stopPropagation()
form.handleSubmit()
}}
>
<div className='flex items-center gap-x-1 px-4 py-2'>
<div
className='flex grow cursor-pointer select-none items-center gap-x-0.5'
onClick={foldToggle}
>
<span className='system-sm-semibold-uppercase text-text-secondary'>
{t(`${I18N_PREFIX}.options`)}
</span>
<ArrowDownRoundFill className={cn('h-4 w-4 shrink-0 text-text-quaternary', fold && '-rotate-90')} />
</div>
<Button
variant='primary'
onClick={form.handleSubmit}
disabled={runDisabled || isRunning}
loading={isRunning}
className='shrink-0 gap-x-0.5'
spinnerClassName='!ml-0'
>
<RiPlayLargeLine className='size-4' />
<span className='px-0.5'>{!isRunning ? t(`${I18N_PREFIX}.run`) : t(`${I18N_PREFIX}.running`)}</span>
</Button>
</div>
{!fold && (
<div className='flex flex-col gap-3 border-t border-divider-subtle px-4 py-3'>
{configurations.map((config, index) => {
const FieldComponent = BaseField({
initialData,
config,
})
return <FieldComponent key={index} form={form} />
})}
</div>
)}
</form>
)
}
export default Options