feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -1,6 +1,6 @@
'use client'
import type { FC } from 'react'
import React, { useCallback, useEffect, useState } from 'react'
import React, { useCallback, useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
import s from './index.module.css'
import NoData from './no-data'
@@ -10,9 +10,9 @@ import JinaReader from './jina-reader'
import cn from '@/utils/classnames'
import { useModalContext } from '@/context/modal-context'
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
import { fetchDataSources } from '@/service/datasets'
import { type DataSourceItem, DataSourceProvider } from '@/models/common'
import { DataSourceProvider } from '@/models/common'
import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config'
import type { DataSourceAuth } from '@/app/components/header/account-setting/data-source-page-new/types'
type Props = {
onPreview: (payload: CrawlResultItem) => void
@@ -22,6 +22,7 @@ type Props = {
onJobIdChange: (jobId: string) => void
crawlOptions: CrawlOptions
onCrawlOptionsChange: (payload: CrawlOptions) => void
authedDataSourceList: DataSourceAuth[]
}
const Website: FC<Props> = ({
@@ -32,58 +33,34 @@ const Website: FC<Props> = ({
onJobIdChange,
crawlOptions,
onCrawlOptionsChange,
authedDataSourceList,
}) => {
const { t } = useTranslation()
const { setShowAccountSettingModal } = useModalContext()
const [isLoaded, setIsLoaded] = useState(false)
const [selectedProvider, setSelectedProvider] = useState<DataSourceProvider>(DataSourceProvider.jinaReader)
const [sources, setSources] = useState<DataSourceItem[]>([])
useEffect(() => {
onCrawlProviderChange(selectedProvider)
}, [selectedProvider, onCrawlProviderChange])
const availableProviders = useMemo(() => authedDataSourceList.filter((item) => {
return [
DataSourceProvider.jinaReader,
DataSourceProvider.fireCrawl,
DataSourceProvider.waterCrawl].includes(item.provider as DataSourceProvider) && item.credentials_list.length > 0
}), [authedDataSourceList])
const checkSetApiKey = useCallback(async () => {
const res = await fetchDataSources() as any
setSources(res.sources)
// If users have configured one of the providers, select it.
const availableProviders = res.sources.filter((item: DataSourceItem) =>
[
DataSourceProvider.jinaReader,
DataSourceProvider.fireCrawl,
DataSourceProvider.waterCrawl,
].includes(item.provider),
)
if (availableProviders.length > 0)
setSelectedProvider(availableProviders[0].provider)
}, [])
useEffect(() => {
checkSetApiKey().then(() => {
setIsLoaded(true)
})
}, [])
const handleOnConfig = useCallback(() => {
setShowAccountSettingModal({
payload: 'data-source',
onCancelCallback: checkSetApiKey,
})
}, [checkSetApiKey, setShowAccountSettingModal])
}, [setShowAccountSettingModal])
if (!isLoaded)
return null
const source = sources.find(source => source.provider === selectedProvider)
const source = availableProviders.find(source => source.provider === selectedProvider)
return (
<div>
<div className="mb-4">
<div className="system-md-medium mb-2 text-text-secondary">
<div className='mb-4'>
<div className='system-md-medium mb-2 text-text-secondary'>
{t('datasetCreation.stepOne.website.chooseProvider')}
</div>
<div className="flex space-x-2">
<div className='flex space-x-2'>
{ENABLE_WEBSITE_JINAREADER && <button
className={cn('flex items-center justify-center rounded-lg px-4 py-2',
selectedProvider === DataSourceProvider.jinaReader
@@ -91,9 +68,12 @@ const Website: FC<Props> = ({
: `system-sm-regular border border-components-option-card-option-border bg-components-option-card-option-bg text-text-secondary
hover:border-components-option-card-option-border-hover hover:bg-components-option-card-option-bg-hover hover:shadow-xs hover:shadow-shadow-shadow-3`,
)}
onClick={() => setSelectedProvider(DataSourceProvider.jinaReader)}
onClick={() => {
setSelectedProvider(DataSourceProvider.jinaReader)
onCrawlProviderChange(DataSourceProvider.jinaReader)
}}
>
<span className={cn(s.jinaLogo, 'mr-2')}/>
<span className={cn(s.jinaLogo, 'mr-2')} />
<span>Jina Reader</span>
</button>}
{ENABLE_WEBSITE_FIRECRAWL && <button
@@ -103,7 +83,10 @@ const Website: FC<Props> = ({
: `system-sm-regular border border-components-option-card-option-border bg-components-option-card-option-bg text-text-secondary
hover:border-components-option-card-option-border-hover hover:bg-components-option-card-option-bg-hover hover:shadow-xs hover:shadow-shadow-shadow-3`,
)}
onClick={() => setSelectedProvider(DataSourceProvider.fireCrawl)}
onClick={() => {
setSelectedProvider(DataSourceProvider.fireCrawl)
onCrawlProviderChange(DataSourceProvider.fireCrawl)
}}
>
🔥 Firecrawl
</button>}
@@ -114,9 +97,12 @@ const Website: FC<Props> = ({
: `system-sm-regular border border-components-option-card-option-border bg-components-option-card-option-bg text-text-secondary
hover:border-components-option-card-option-border-hover hover:bg-components-option-card-option-bg-hover hover:shadow-xs hover:shadow-shadow-shadow-3`,
)}
onClick={() => setSelectedProvider(DataSourceProvider.waterCrawl)}
onClick={() => {
setSelectedProvider(DataSourceProvider.waterCrawl)
onCrawlProviderChange(DataSourceProvider.waterCrawl)
}}
>
<span className={cn(s.watercrawlLogo, 'mr-2')}/>
<span className={cn(s.watercrawlLogo, 'mr-2')} />
<span>WaterCrawl</span>
</button>}
</div>
@@ -152,7 +138,7 @@ const Website: FC<Props> = ({
/>
)}
{!source && (
<NoData onConfig={handleOnConfig} provider={selectedProvider}/>
<NoData onConfig={handleOnConfig} provider={selectedProvider} />
)}
</div>
)