feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -4,7 +4,6 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useRouter } from 'next/navigation'
import { useDebounce, useDebounceFn } from 'ahooks'
import { groupBy } from 'lodash-es'
import { PlusIcon } from '@heroicons/react/24/solid'
import { RiDraftLine, RiExternalLinkLine } from '@remixicon/react'
import AutoDisabledDocument from '../common/document-status-with-action/auto-disabled-document'
@@ -13,18 +12,12 @@ import s from './style.module.css'
import Loading from '@/app/components/base/loading'
import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import { get } from '@/service/base'
import { createDocument } from '@/service/datasets'
import { useDatasetDetailContext } from '@/context/dataset-detail'
import { NotionPageSelectorModal } from '@/app/components/base/notion-page-selector'
import type { NotionPage } from '@/models/common'
import type { CreateDocumentReq } from '@/models/datasets'
import { DataSourceType, ProcessMode } from '@/models/datasets'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { DataSourceType } from '@/models/datasets'
import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
import { useProviderContext } from '@/context/provider-context'
import cn from '@/utils/classnames'
import { useDocumentList, useInvalidDocumentDetailKey, useInvalidDocumentList } from '@/service/knowledge/use-document'
import { useIndexStatus } from './list'
import { useDocumentList, useInvalidDocumentDetail, useInvalidDocumentList } from '@/service/knowledge/use-document'
import { useInvalid } from '@/service/use-base'
import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment'
import useDocumentListQueryState from './hooks/use-document-list-query-state'
@@ -32,10 +25,10 @@ import useEditDocumentMetadata from '../metadata/hooks/use-edit-dataset-metadata
import DatasetMetadataDrawer from '../metadata/metadata-dataset/dataset-metadata-drawer'
import StatusWithAction from '../common/document-status-with-action/status-with-action'
import { useDocLink } from '@/context/i18n'
import { useFetchDefaultProcessRule } from '@/service/knowledge/use-create-dataset'
import { SimpleSelect } from '../../base/select'
import StatusItem from './detail/completed/status-item'
import type { Item } from '@/app/components/base/select'
import { useIndexStatus } from './status-item/hooks'
const FolderPlusIcon = ({ className }: React.SVGProps<SVGElement>) => {
return <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
@@ -86,8 +79,6 @@ type IDocumentsProps = {
datasetId: string
}
export const fetcher = (url: string) => get(url, {}, {})
const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const { t } = useTranslation()
const docLink = useDocLink()
@@ -104,8 +95,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const [limit, setLimit] = useState<number>(query.limit)
const router = useRouter()
const { dataset } = useDatasetDetailContext()
const [notionPageSelectorModalVisible, setNotionPageSelectorModalVisible] = useState(false)
const dataset = useDatasetDetailContextWithSelector(s => s.dataset)
const [timerCanRun, setTimerCanRun] = useState(true)
const isDataSourceNotion = dataset?.data_source_type === DataSourceType.NOTION
const isDataSourceWeb = dataset?.data_source_type === DataSourceType.WEB
@@ -164,14 +154,14 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
}
}, [debouncedSearchValue, query.keyword, updateQuery])
const { data: documentsRes, isFetching: isListLoading } = useDocumentList({
const { data: documentsRes, isLoading: isListLoading } = useDocumentList({
datasetId,
query: {
page: currPage + 1,
limit,
keyword: debouncedSearchValue,
},
refetchInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0,
refetchInterval: timerCanRun ? 2500 : 0,
})
const invalidDocumentList = useInvalidDocumentList(datasetId)
@@ -184,7 +174,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
}
}, [documentsRes])
const invalidDocumentDetail = useInvalidDocumentDetailKey()
const invalidDocumentDetail = useInvalidDocumentDetail()
const invalidChunkList = useInvalid(useSegmentListKey)
const invalidChildChunkList = useInvalid(useChildSegmentListKey)
@@ -197,10 +187,10 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
}, 5000)
}, [])
const documentsWithProgress = useMemo(() => {
useEffect(() => {
let completedNum = 0
let percent = 0
const documentsData = documentsRes?.data?.map((documentItem) => {
documentsRes?.data?.forEach((documentItem) => {
const { indexing_status, completed_segments, total_segments } = documentItem
const isEmbedded = indexing_status === 'completed' || indexing_status === 'paused' || indexing_status === 'error'
@@ -221,73 +211,20 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
percent,
}
})
if (completedNum === documentsRes?.data?.length)
setTimerCanRun(false)
return {
...documentsRes,
data: documentsData,
}
setTimerCanRun(completedNum !== documentsRes?.data?.length)
}, [documentsRes])
const total = documentsRes?.total || 0
const routeToDocCreate = () => {
if (isDataSourceNotion) {
setNotionPageSelectorModalVisible(true)
// if dataset is created from pipeline, go to create from pipeline page
if (dataset?.runtime_mode === 'rag_pipeline') {
router.push(`/datasets/${datasetId}/documents/create-from-pipeline`)
return
}
router.push(`/datasets/${datasetId}/documents/create`)
}
const fetchDefaultProcessRuleMutation = useFetchDefaultProcessRule()
const handleSaveNotionPageSelected = async (selectedPages: NotionPage[]) => {
const workspacesMap = groupBy(selectedPages, 'workspace_id')
const workspaces = Object.keys(workspacesMap).map((workspaceId) => {
return {
workspaceId,
pages: workspacesMap[workspaceId],
}
})
const { rules } = await fetchDefaultProcessRuleMutation.mutateAsync('/datasets/process-rule')
const params = {
data_source: {
type: dataset?.data_source_type,
info_list: {
data_source_type: dataset?.data_source_type,
notion_info_list: workspaces.map((workspace) => {
return {
workspace_id: workspace.workspaceId,
pages: workspace.pages.map((page) => {
const { page_id, page_name, page_icon, type } = page
return {
page_id,
page_name,
page_icon,
type,
}
}),
}
}),
},
},
indexing_technique: dataset?.indexing_technique,
process_rule: {
rules,
mode: ProcessMode.general,
},
} as CreateDocumentReq
await createDocument({
datasetId,
body: params,
})
invalidDocumentList()
setTimerCanRun(true)
// mutateDatasetIndexingStatus(undefined, { revalidate: true })
setNotionPageSelectorModalVisible(false)
}
const documentsList = isDataSourceNotion ? documentsWithProgress?.data : documentsRes?.data
const documentsList = documentsRes?.data
const [selectedIds, setSelectedIds] = useState<string[]>([])
// Clear selection when search changes to avoid confusion
@@ -396,33 +333,35 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
</div>
{isListLoading
? <Loading type='app' />
// eslint-disable-next-line sonarjs/no-nested-conditional
: total > 0
? <List
embeddingAvailable={embeddingAvailable}
documents={documentsList || []}
datasetId={datasetId}
onUpdate={handleUpdate}
selectedIds={selectedIds}
onSelectedIdChange={setSelectedIds}
statusFilter={statusFilter}
onStatusFilterChange={setStatusFilter}
pagination={{
total,
limit,
onLimitChange: handleLimitChange,
current: currPage,
onChange: handlePageChange,
}}
onManageMetadata={showEditMetadataModal}
/>
: <EmptyElement canAdd={embeddingAvailable} onClick={routeToDocCreate} type={isDataSourceNotion ? 'sync' : 'upload'} />
? (
<List
embeddingAvailable={embeddingAvailable}
documents={documentsList || []}
datasetId={datasetId}
onUpdate={handleUpdate}
selectedIds={selectedIds}
onSelectedIdChange={setSelectedIds}
statusFilter={statusFilter}
pagination={{
total,
limit,
onLimitChange: handleLimitChange,
current: currPage,
onChange: handlePageChange,
}}
onManageMetadata={showEditMetadataModal}
/>
)
: (
<EmptyElement
canAdd={embeddingAvailable}
onClick={routeToDocCreate}
type={isDataSourceNotion ? 'sync' : 'upload'}
/>
)
}
<NotionPageSelectorModal
isShow={notionPageSelectorModalVisible}
onClose={() => setNotionPageSelectorModalVisible(false)}
onSave={handleSaveNotionPageSelected}
datasetId={dataset?.id || ''}
/>
</div>
</div>
)