feat: knowledge pipeline (#25360)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
'use client'
|
||||
import React, { useCallback, useEffect, useState } from 'react'
|
||||
import React, { useCallback, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import AppUnavailable from '../../base/app-unavailable'
|
||||
import { ModelTypeEnum } from '../../header/account-setting/model-provider-page/declarations'
|
||||
@@ -8,12 +8,14 @@ import StepTwo from './step-two'
|
||||
import StepThree from './step-three'
|
||||
import { TopBar } from './top-bar'
|
||||
import { DataSourceType } from '@/models/datasets'
|
||||
import type { CrawlOptions, CrawlResultItem, DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
|
||||
import { fetchDataSource } from '@/service/common'
|
||||
import { fetchDatasetDetail } from '@/service/datasets'
|
||||
import type { CrawlOptions, CrawlResultItem, FileItem, createDocumentResponse } from '@/models/datasets'
|
||||
import { DataSourceProvider, type NotionPage } from '@/models/common'
|
||||
import { useModalContext } from '@/context/modal-context'
|
||||
import { useModalContextSelector } from '@/context/modal-context'
|
||||
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
||||
import { useGetDefaultDataSourceListAuth } from '@/service/use-datasource'
|
||||
import produce from 'immer'
|
||||
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||
import Loading from '@/app/components/base/loading'
|
||||
|
||||
type DatasetUpdateFormProps = {
|
||||
datasetId?: string
|
||||
@@ -31,59 +33,63 @@ const DEFAULT_CRAWL_OPTIONS: CrawlOptions = {
|
||||
|
||||
const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
||||
const { t } = useTranslation()
|
||||
const { setShowAccountSettingModal } = useModalContext()
|
||||
const [hasConnection, setHasConnection] = useState(true)
|
||||
const setShowAccountSettingModal = useModalContextSelector(state => state.setShowAccountSettingModal)
|
||||
const datasetDetail = useDatasetDetailContextWithSelector(state => state.dataset)
|
||||
const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
|
||||
|
||||
const [dataSourceType, setDataSourceType] = useState<DataSourceType>(DataSourceType.FILE)
|
||||
const [step, setStep] = useState(1)
|
||||
const [indexingTypeCache, setIndexTypeCache] = useState('')
|
||||
const [retrievalMethodCache, setRetrievalMethodCache] = useState('')
|
||||
const [fileList, setFiles] = useState<FileItem[]>([])
|
||||
const [result, setResult] = useState<createDocumentResponse | undefined>()
|
||||
const [hasError, setHasError] = useState(false)
|
||||
const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
|
||||
|
||||
const [notionPages, setNotionPages] = useState<NotionPage[]>([])
|
||||
const updateNotionPages = (value: NotionPage[]) => {
|
||||
setNotionPages(value)
|
||||
}
|
||||
|
||||
const [notionCredentialId, setNotionCredentialId] = useState<string>('')
|
||||
const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
|
||||
const [crawlOptions, setCrawlOptions] = useState<CrawlOptions>(DEFAULT_CRAWL_OPTIONS)
|
||||
|
||||
const updateFileList = (preparedFiles: FileItem[]) => {
|
||||
setFiles(preparedFiles)
|
||||
}
|
||||
const [websiteCrawlProvider, setWebsiteCrawlProvider] = useState<DataSourceProvider>(DataSourceProvider.fireCrawl)
|
||||
const [websiteCrawlProvider, setWebsiteCrawlProvider] = useState<DataSourceProvider>(DataSourceProvider.jinaReader)
|
||||
const [websiteCrawlJobId, setWebsiteCrawlJobId] = useState('')
|
||||
|
||||
const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
|
||||
const {
|
||||
data: dataSourceList,
|
||||
isLoading: isLoadingAuthedDataSourceList,
|
||||
isError: fetchingAuthedDataSourceListError,
|
||||
} = useGetDefaultDataSourceListAuth()
|
||||
|
||||
const updateNotionPages = useCallback((value: NotionPage[]) => {
|
||||
setNotionPages(value)
|
||||
}, [])
|
||||
|
||||
const updateNotionCredentialId = useCallback((credentialId: string) => {
|
||||
setNotionCredentialId(credentialId)
|
||||
}, [])
|
||||
|
||||
const updateFileList = useCallback((preparedFiles: FileItem[]) => {
|
||||
setFiles(preparedFiles)
|
||||
}, [])
|
||||
|
||||
const updateFile = useCallback((fileItem: FileItem, progress: number, list: FileItem[]) => {
|
||||
const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID)
|
||||
list[targetIndex] = {
|
||||
...list[targetIndex],
|
||||
progress,
|
||||
}
|
||||
setFiles([...list])
|
||||
// use follow code would cause dirty list update problem
|
||||
// const newList = list.map((file) => {
|
||||
// if (file.fileID === fileItem.fileID) {
|
||||
// return {
|
||||
// ...fileItem,
|
||||
// progress,
|
||||
// }
|
||||
// }
|
||||
// return file
|
||||
// })
|
||||
// setFiles(newList)
|
||||
}
|
||||
const updateIndexingTypeCache = (type: string) => {
|
||||
const newList = produce(list, (draft) => {
|
||||
draft[targetIndex] = {
|
||||
...draft[targetIndex],
|
||||
progress,
|
||||
}
|
||||
})
|
||||
setFiles(newList)
|
||||
}, [])
|
||||
|
||||
const updateIndexingTypeCache = useCallback((type: string) => {
|
||||
setIndexTypeCache(type)
|
||||
}
|
||||
const updateResultCache = (res?: createDocumentResponse) => {
|
||||
}, [])
|
||||
|
||||
const updateResultCache = useCallback((res?: createDocumentResponse) => {
|
||||
setResult(res)
|
||||
}
|
||||
const updateRetrievalMethodCache = (method: string) => {
|
||||
}, [])
|
||||
|
||||
const updateRetrievalMethodCache = useCallback((method: string) => {
|
||||
setRetrievalMethodCache(method)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const nextStep = useCallback(() => {
|
||||
setStep(step + 1)
|
||||
@@ -93,82 +99,77 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
||||
setStep(step + delta)
|
||||
}, [step, setStep])
|
||||
|
||||
const checkNotionConnection = async () => {
|
||||
const { data } = await fetchDataSource({ url: '/data-source/integrates' })
|
||||
const hasConnection = data.filter(item => item.provider === 'notion') || []
|
||||
setHasConnection(hasConnection.length > 0)
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
checkNotionConnection()
|
||||
}, [])
|
||||
|
||||
const [detail, setDetail] = useState<DataSet | null>(null)
|
||||
useEffect(() => {
|
||||
(async () => {
|
||||
if (datasetId) {
|
||||
try {
|
||||
const detail = await fetchDatasetDetail(datasetId)
|
||||
setDetail(detail)
|
||||
}
|
||||
catch {
|
||||
setHasError(true)
|
||||
}
|
||||
}
|
||||
})()
|
||||
}, [datasetId])
|
||||
|
||||
if (hasError)
|
||||
if (fetchingAuthedDataSourceListError)
|
||||
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />
|
||||
|
||||
return (
|
||||
<div className='flex flex-col overflow-hidden bg-components-panel-bg' style={{ height: 'calc(100vh - 56px)' }}>
|
||||
<TopBar activeIndex={step - 1} datasetId={datasetId} />
|
||||
<div style={{ height: 'calc(100% - 52px)' }}>
|
||||
{step === 1 && <StepOne
|
||||
hasConnection={hasConnection}
|
||||
onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })}
|
||||
datasetId={datasetId}
|
||||
dataSourceType={dataSourceType}
|
||||
dataSourceTypeDisable={!!detail?.data_source_type}
|
||||
changeType={setDataSourceType}
|
||||
files={fileList}
|
||||
updateFile={updateFile}
|
||||
updateFileList={updateFileList}
|
||||
notionPages={notionPages}
|
||||
updateNotionPages={updateNotionPages}
|
||||
onStepChange={nextStep}
|
||||
websitePages={websitePages}
|
||||
updateWebsitePages={setWebsitePages}
|
||||
onWebsiteCrawlProviderChange={setWebsiteCrawlProvider}
|
||||
onWebsiteCrawlJobIdChange={setWebsiteCrawlJobId}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={setCrawlOptions}
|
||||
/>}
|
||||
{(step === 2 && (!datasetId || (datasetId && !!detail))) && <StepTwo
|
||||
isAPIKeySet={!!embeddingsDefaultModel}
|
||||
onSetting={() => setShowAccountSettingModal({ payload: 'provider' })}
|
||||
indexingType={detail?.indexing_technique}
|
||||
datasetId={datasetId}
|
||||
dataSourceType={dataSourceType}
|
||||
files={fileList.map(file => file.file)}
|
||||
notionPages={notionPages}
|
||||
websitePages={websitePages}
|
||||
websiteCrawlProvider={websiteCrawlProvider}
|
||||
websiteCrawlJobId={websiteCrawlJobId}
|
||||
onStepChange={changeStep}
|
||||
updateIndexingTypeCache={updateIndexingTypeCache}
|
||||
updateRetrievalMethodCache={updateRetrievalMethodCache}
|
||||
updateResultCache={updateResultCache}
|
||||
crawlOptions={crawlOptions}
|
||||
/>}
|
||||
{step === 3 && <StepThree
|
||||
datasetId={datasetId}
|
||||
datasetName={detail?.name}
|
||||
indexingType={detail?.indexing_technique || indexingTypeCache}
|
||||
retrievalMethod={detail?.retrieval_model_dict?.search_method || retrievalMethodCache}
|
||||
creationCache={result}
|
||||
/>}
|
||||
{
|
||||
isLoadingAuthedDataSourceList && (
|
||||
<Loading type='app' />
|
||||
)
|
||||
}
|
||||
{
|
||||
!isLoadingAuthedDataSourceList && (
|
||||
<>
|
||||
{step === 1 && (
|
||||
<StepOne
|
||||
authedDataSourceList={dataSourceList?.result || []}
|
||||
onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })}
|
||||
datasetId={datasetId}
|
||||
dataSourceType={dataSourceType}
|
||||
dataSourceTypeDisable={!!datasetDetail?.data_source_type}
|
||||
changeType={setDataSourceType}
|
||||
files={fileList}
|
||||
updateFile={updateFile}
|
||||
updateFileList={updateFileList}
|
||||
notionPages={notionPages}
|
||||
notionCredentialId={notionCredentialId}
|
||||
updateNotionPages={updateNotionPages}
|
||||
updateNotionCredentialId={updateNotionCredentialId}
|
||||
onStepChange={nextStep}
|
||||
websitePages={websitePages}
|
||||
updateWebsitePages={setWebsitePages}
|
||||
onWebsiteCrawlProviderChange={setWebsiteCrawlProvider}
|
||||
onWebsiteCrawlJobIdChange={setWebsiteCrawlJobId}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={setCrawlOptions}
|
||||
/>
|
||||
)}
|
||||
{(step === 2 && (!datasetId || (datasetId && !!datasetDetail))) && (
|
||||
<StepTwo
|
||||
isAPIKeySet={!!embeddingsDefaultModel}
|
||||
onSetting={() => setShowAccountSettingModal({ payload: 'provider' })}
|
||||
indexingType={datasetDetail?.indexing_technique}
|
||||
datasetId={datasetId}
|
||||
dataSourceType={dataSourceType}
|
||||
files={fileList.map(file => file.file)}
|
||||
notionPages={notionPages}
|
||||
notionCredentialId={notionCredentialId}
|
||||
websitePages={websitePages}
|
||||
websiteCrawlProvider={websiteCrawlProvider}
|
||||
websiteCrawlJobId={websiteCrawlJobId}
|
||||
onStepChange={changeStep}
|
||||
updateIndexingTypeCache={updateIndexingTypeCache}
|
||||
updateRetrievalMethodCache={updateRetrievalMethodCache}
|
||||
updateResultCache={updateResultCache}
|
||||
crawlOptions={crawlOptions}
|
||||
/>
|
||||
)}
|
||||
{step === 3 && (
|
||||
<StepThree
|
||||
datasetId={datasetId}
|
||||
datasetName={datasetDetail?.name}
|
||||
indexingType={datasetDetail?.indexing_technique || indexingTypeCache}
|
||||
retrievalMethod={datasetDetail?.retrieval_model_dict?.search_method || retrievalMethodCache}
|
||||
creationCache={result}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
)
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user