feat: knowledge pipeline (#25360)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
@@ -4,7 +4,6 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useRouter } from 'next/navigation'
|
||||
import { useDebounce, useDebounceFn } from 'ahooks'
|
||||
import { groupBy } from 'lodash-es'
|
||||
import { PlusIcon } from '@heroicons/react/24/solid'
|
||||
import { RiDraftLine, RiExternalLinkLine } from '@remixicon/react'
|
||||
import AutoDisabledDocument from '../common/document-status-with-action/auto-disabled-document'
|
||||
@@ -13,18 +12,12 @@ import s from './style.module.css'
|
||||
import Loading from '@/app/components/base/loading'
|
||||
import Button from '@/app/components/base/button'
|
||||
import Input from '@/app/components/base/input'
|
||||
import { get } from '@/service/base'
|
||||
import { createDocument } from '@/service/datasets'
|
||||
import { useDatasetDetailContext } from '@/context/dataset-detail'
|
||||
import { NotionPageSelectorModal } from '@/app/components/base/notion-page-selector'
|
||||
import type { NotionPage } from '@/models/common'
|
||||
import type { CreateDocumentReq } from '@/models/datasets'
|
||||
import { DataSourceType, ProcessMode } from '@/models/datasets'
|
||||
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||
import { DataSourceType } from '@/models/datasets'
|
||||
import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
|
||||
import { useProviderContext } from '@/context/provider-context'
|
||||
import cn from '@/utils/classnames'
|
||||
import { useDocumentList, useInvalidDocumentDetailKey, useInvalidDocumentList } from '@/service/knowledge/use-document'
|
||||
import { useIndexStatus } from './list'
|
||||
import { useDocumentList, useInvalidDocumentDetail, useInvalidDocumentList } from '@/service/knowledge/use-document'
|
||||
import { useInvalid } from '@/service/use-base'
|
||||
import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment'
|
||||
import useDocumentListQueryState from './hooks/use-document-list-query-state'
|
||||
@@ -32,10 +25,10 @@ import useEditDocumentMetadata from '../metadata/hooks/use-edit-dataset-metadata
|
||||
import DatasetMetadataDrawer from '../metadata/metadata-dataset/dataset-metadata-drawer'
|
||||
import StatusWithAction from '../common/document-status-with-action/status-with-action'
|
||||
import { useDocLink } from '@/context/i18n'
|
||||
import { useFetchDefaultProcessRule } from '@/service/knowledge/use-create-dataset'
|
||||
import { SimpleSelect } from '../../base/select'
|
||||
import StatusItem from './detail/completed/status-item'
|
||||
import type { Item } from '@/app/components/base/select'
|
||||
import { useIndexStatus } from './status-item/hooks'
|
||||
|
||||
const FolderPlusIcon = ({ className }: React.SVGProps<SVGElement>) => {
|
||||
return <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
|
||||
@@ -86,8 +79,6 @@ type IDocumentsProps = {
|
||||
datasetId: string
|
||||
}
|
||||
|
||||
export const fetcher = (url: string) => get(url, {}, {})
|
||||
|
||||
const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
||||
const { t } = useTranslation()
|
||||
const docLink = useDocLink()
|
||||
@@ -104,8 +95,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
||||
const [limit, setLimit] = useState<number>(query.limit)
|
||||
|
||||
const router = useRouter()
|
||||
const { dataset } = useDatasetDetailContext()
|
||||
const [notionPageSelectorModalVisible, setNotionPageSelectorModalVisible] = useState(false)
|
||||
const dataset = useDatasetDetailContextWithSelector(s => s.dataset)
|
||||
const [timerCanRun, setTimerCanRun] = useState(true)
|
||||
const isDataSourceNotion = dataset?.data_source_type === DataSourceType.NOTION
|
||||
const isDataSourceWeb = dataset?.data_source_type === DataSourceType.WEB
|
||||
@@ -164,14 +154,14 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
||||
}
|
||||
}, [debouncedSearchValue, query.keyword, updateQuery])
|
||||
|
||||
const { data: documentsRes, isFetching: isListLoading } = useDocumentList({
|
||||
const { data: documentsRes, isLoading: isListLoading } = useDocumentList({
|
||||
datasetId,
|
||||
query: {
|
||||
page: currPage + 1,
|
||||
limit,
|
||||
keyword: debouncedSearchValue,
|
||||
},
|
||||
refetchInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0,
|
||||
refetchInterval: timerCanRun ? 2500 : 0,
|
||||
})
|
||||
|
||||
const invalidDocumentList = useInvalidDocumentList(datasetId)
|
||||
@@ -184,7 +174,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
||||
}
|
||||
}, [documentsRes])
|
||||
|
||||
const invalidDocumentDetail = useInvalidDocumentDetailKey()
|
||||
const invalidDocumentDetail = useInvalidDocumentDetail()
|
||||
const invalidChunkList = useInvalid(useSegmentListKey)
|
||||
const invalidChildChunkList = useInvalid(useChildSegmentListKey)
|
||||
|
||||
@@ -197,10 +187,10 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
||||
}, 5000)
|
||||
}, [])
|
||||
|
||||
const documentsWithProgress = useMemo(() => {
|
||||
useEffect(() => {
|
||||
let completedNum = 0
|
||||
let percent = 0
|
||||
const documentsData = documentsRes?.data?.map((documentItem) => {
|
||||
documentsRes?.data?.forEach((documentItem) => {
|
||||
const { indexing_status, completed_segments, total_segments } = documentItem
|
||||
const isEmbedded = indexing_status === 'completed' || indexing_status === 'paused' || indexing_status === 'error'
|
||||
|
||||
@@ -221,73 +211,20 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
||||
percent,
|
||||
}
|
||||
})
|
||||
if (completedNum === documentsRes?.data?.length)
|
||||
setTimerCanRun(false)
|
||||
return {
|
||||
...documentsRes,
|
||||
data: documentsData,
|
||||
}
|
||||
setTimerCanRun(completedNum !== documentsRes?.data?.length)
|
||||
}, [documentsRes])
|
||||
const total = documentsRes?.total || 0
|
||||
|
||||
const routeToDocCreate = () => {
|
||||
if (isDataSourceNotion) {
|
||||
setNotionPageSelectorModalVisible(true)
|
||||
// if dataset is created from pipeline, go to create from pipeline page
|
||||
if (dataset?.runtime_mode === 'rag_pipeline') {
|
||||
router.push(`/datasets/${datasetId}/documents/create-from-pipeline`)
|
||||
return
|
||||
}
|
||||
router.push(`/datasets/${datasetId}/documents/create`)
|
||||
}
|
||||
|
||||
const fetchDefaultProcessRuleMutation = useFetchDefaultProcessRule()
|
||||
|
||||
const handleSaveNotionPageSelected = async (selectedPages: NotionPage[]) => {
|
||||
const workspacesMap = groupBy(selectedPages, 'workspace_id')
|
||||
const workspaces = Object.keys(workspacesMap).map((workspaceId) => {
|
||||
return {
|
||||
workspaceId,
|
||||
pages: workspacesMap[workspaceId],
|
||||
}
|
||||
})
|
||||
const { rules } = await fetchDefaultProcessRuleMutation.mutateAsync('/datasets/process-rule')
|
||||
const params = {
|
||||
data_source: {
|
||||
type: dataset?.data_source_type,
|
||||
info_list: {
|
||||
data_source_type: dataset?.data_source_type,
|
||||
notion_info_list: workspaces.map((workspace) => {
|
||||
return {
|
||||
workspace_id: workspace.workspaceId,
|
||||
pages: workspace.pages.map((page) => {
|
||||
const { page_id, page_name, page_icon, type } = page
|
||||
return {
|
||||
page_id,
|
||||
page_name,
|
||||
page_icon,
|
||||
type,
|
||||
}
|
||||
}),
|
||||
}
|
||||
}),
|
||||
},
|
||||
},
|
||||
indexing_technique: dataset?.indexing_technique,
|
||||
process_rule: {
|
||||
rules,
|
||||
mode: ProcessMode.general,
|
||||
},
|
||||
} as CreateDocumentReq
|
||||
|
||||
await createDocument({
|
||||
datasetId,
|
||||
body: params,
|
||||
})
|
||||
invalidDocumentList()
|
||||
setTimerCanRun(true)
|
||||
// mutateDatasetIndexingStatus(undefined, { revalidate: true })
|
||||
setNotionPageSelectorModalVisible(false)
|
||||
}
|
||||
|
||||
const documentsList = isDataSourceNotion ? documentsWithProgress?.data : documentsRes?.data
|
||||
const documentsList = documentsRes?.data
|
||||
const [selectedIds, setSelectedIds] = useState<string[]>([])
|
||||
|
||||
// Clear selection when search changes to avoid confusion
|
||||
@@ -396,33 +333,35 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
||||
</div>
|
||||
{isListLoading
|
||||
? <Loading type='app' />
|
||||
// eslint-disable-next-line sonarjs/no-nested-conditional
|
||||
: total > 0
|
||||
? <List
|
||||
embeddingAvailable={embeddingAvailable}
|
||||
documents={documentsList || []}
|
||||
datasetId={datasetId}
|
||||
onUpdate={handleUpdate}
|
||||
selectedIds={selectedIds}
|
||||
onSelectedIdChange={setSelectedIds}
|
||||
statusFilter={statusFilter}
|
||||
onStatusFilterChange={setStatusFilter}
|
||||
pagination={{
|
||||
total,
|
||||
limit,
|
||||
onLimitChange: handleLimitChange,
|
||||
current: currPage,
|
||||
onChange: handlePageChange,
|
||||
}}
|
||||
onManageMetadata={showEditMetadataModal}
|
||||
/>
|
||||
: <EmptyElement canAdd={embeddingAvailable} onClick={routeToDocCreate} type={isDataSourceNotion ? 'sync' : 'upload'} />
|
||||
? (
|
||||
<List
|
||||
embeddingAvailable={embeddingAvailable}
|
||||
documents={documentsList || []}
|
||||
datasetId={datasetId}
|
||||
onUpdate={handleUpdate}
|
||||
selectedIds={selectedIds}
|
||||
onSelectedIdChange={setSelectedIds}
|
||||
statusFilter={statusFilter}
|
||||
pagination={{
|
||||
total,
|
||||
limit,
|
||||
onLimitChange: handleLimitChange,
|
||||
current: currPage,
|
||||
onChange: handlePageChange,
|
||||
}}
|
||||
onManageMetadata={showEditMetadataModal}
|
||||
/>
|
||||
)
|
||||
: (
|
||||
<EmptyElement
|
||||
canAdd={embeddingAvailable}
|
||||
onClick={routeToDocCreate}
|
||||
type={isDataSourceNotion ? 'sync' : 'upload'}
|
||||
/>
|
||||
)
|
||||
}
|
||||
<NotionPageSelectorModal
|
||||
isShow={notionPageSelectorModalVisible}
|
||||
onClose={() => setNotionPageSelectorModalVisible(false)}
|
||||
onSave={handleSaveNotionPageSelected}
|
||||
datasetId={dataset?.id || ''}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user