fix: Add dataset file upload restrictions (#29397)

Co-authored-by: kurokobo <kuro664@gmail.com>
Co-authored-by: yyh <92089059+lyzno1@users.noreply.github.com>
This commit is contained in:
Wu Tianwei
2025-12-10 16:41:05 +08:00
committed by GitHub
parent 88b20bc6d0
commit bafd093fa9
35 changed files with 206 additions and 151 deletions

View File

@@ -6,6 +6,7 @@ import cn from '@/utils/classnames'
import type { CrawlResultItem as CrawlResultItemType } from '@/models/datasets'
import Checkbox from '@/app/components/base/checkbox'
import Button from '@/app/components/base/button'
import Radio from '@/app/components/base/radio/ui'
type Props = {
payload: CrawlResultItemType
@@ -13,6 +14,7 @@ type Props = {
isPreview: boolean
onCheckChange: (checked: boolean) => void
onPreview: () => void
isMultipleChoice: boolean
}
const CrawledResultItem: FC<Props> = ({
@@ -21,6 +23,7 @@ const CrawledResultItem: FC<Props> = ({
isChecked,
onCheckChange,
onPreview,
isMultipleChoice,
}) => {
const { t } = useTranslation()
@@ -31,7 +34,21 @@ const CrawledResultItem: FC<Props> = ({
<div className={cn(isPreview ? 'bg-state-base-active' : 'group hover:bg-state-base-hover', 'cursor-pointer rounded-lg p-2')}>
<div className='relative flex'>
<div className='flex h-5 items-center'>
<Checkbox className='mr-2 shrink-0' checked={isChecked} onCheck={handleCheckChange} />
{
isMultipleChoice ? (
<Checkbox
className='mr-2 shrink-0'
checked={isChecked}
onCheck={handleCheckChange}
/>
) : (
<Radio
className='mr-2 shrink-0'
isChecked={isChecked}
onCheck={handleCheckChange}
/>
)
}
</div>
<div className='flex min-w-0 grow flex-col'>
<div

View File

@@ -16,6 +16,7 @@ type Props = {
onSelectedChange: (selected: CrawlResultItem[]) => void
onPreview: (payload: CrawlResultItem) => void
usedTime: number
isMultipleChoice: boolean
}
const CrawledResult: FC<Props> = ({
@@ -25,6 +26,7 @@ const CrawledResult: FC<Props> = ({
onSelectedChange,
onPreview,
usedTime,
isMultipleChoice,
}) => {
const { t } = useTranslation()
@@ -40,13 +42,17 @@ const CrawledResult: FC<Props> = ({
const handleItemCheckChange = useCallback((item: CrawlResultItem) => {
return (checked: boolean) => {
if (checked)
onSelectedChange([...checkedList, item])
else
if (checked) {
if (isMultipleChoice)
onSelectedChange([...checkedList, item])
else
onSelectedChange([item])
}
else {
onSelectedChange(checkedList.filter(checkedItem => checkedItem.source_url !== item.source_url))
}
}
}, [checkedList, onSelectedChange])
}, [checkedList, isMultipleChoice, onSelectedChange])
const [previewIndex, setPreviewIndex] = React.useState<number>(-1)
const handlePreview = useCallback((index: number) => {
@@ -59,11 +65,13 @@ const CrawledResult: FC<Props> = ({
return (
<div className={cn(className, 'border-t-[0.5px] border-divider-regular shadow-xs shadow-shadow-shadow-3')}>
<div className='flex h-[34px] items-center justify-between px-4'>
<CheckboxWithLabel
isChecked={isCheckAll}
onChange={handleCheckedAll} label={isCheckAll ? t(`${I18N_PREFIX}.resetAll`) : t(`${I18N_PREFIX}.selectAll`)}
labelClassName='system-[13px] leading-[16px] font-medium text-text-secondary'
/>
{isMultipleChoice && (
<CheckboxWithLabel
isChecked={isCheckAll}
onChange={handleCheckedAll} label={isCheckAll ? t(`${I18N_PREFIX}.resetAll`) : t(`${I18N_PREFIX}.selectAll`)}
labelClassName='system-[13px] leading-[16px] font-medium text-text-secondary'
/>
)}
<div className='text-xs text-text-tertiary'>
{t(`${I18N_PREFIX}.scrapTimeInfo`, {
total: list.length,
@@ -80,6 +88,7 @@ const CrawledResult: FC<Props> = ({
payload={item}
isChecked={checkedList.some(checkedItem => checkedItem.source_url === item.source_url)}
onCheckChange={handleItemCheckChange(item)}
isMultipleChoice={isMultipleChoice}
/>
))}
</div>

View File

@@ -26,6 +26,7 @@ type Props = {
onJobIdChange: (jobId: string) => void
crawlOptions: CrawlOptions
onCrawlOptionsChange: (payload: CrawlOptions) => void
supportBatchUpload: boolean
}
enum Step {
@@ -41,6 +42,7 @@ const FireCrawl: FC<Props> = ({
onJobIdChange,
crawlOptions,
onCrawlOptionsChange,
supportBatchUpload,
}) => {
const { t } = useTranslation()
const [step, setStep] = useState<Step>(Step.init)
@@ -171,7 +173,7 @@ const FireCrawl: FC<Props> = ({
content: item.markdown,
}))
setCrawlResult(data)
onCheckedCrawlResultChange(data.data || []) // default select the crawl result
onCheckedCrawlResultChange(supportBatchUpload ? (data.data || []) : (data.data?.slice(0, 1) || [])) // default select the crawl result
setCrawlErrorMessage('')
}
}
@@ -182,7 +184,7 @@ const FireCrawl: FC<Props> = ({
finally {
setStep(Step.finished)
}
}, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished, onCheckedCrawlResultChange])
}, [checkValid, crawlOptions, onJobIdChange, waitForCrawlFinished, t, onCheckedCrawlResultChange, supportBatchUpload])
return (
<div>
@@ -221,6 +223,7 @@ const FireCrawl: FC<Props> = ({
onSelectedChange={onCheckedCrawlResultChange}
onPreview={onPreview}
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
isMultipleChoice={supportBatchUpload}
/>
}
</div>

View File

@@ -24,6 +24,7 @@ type Props = {
crawlOptions: CrawlOptions
onCrawlOptionsChange: (payload: CrawlOptions) => void
authedDataSourceList: DataSourceAuth[]
supportBatchUpload?: boolean
}
const Website: FC<Props> = ({
@@ -35,6 +36,7 @@ const Website: FC<Props> = ({
crawlOptions,
onCrawlOptionsChange,
authedDataSourceList,
supportBatchUpload = false,
}) => {
const { t } = useTranslation()
const { setShowAccountSettingModal } = useModalContext()
@@ -116,6 +118,7 @@ const Website: FC<Props> = ({
onJobIdChange={onJobIdChange}
crawlOptions={crawlOptions}
onCrawlOptionsChange={onCrawlOptionsChange}
supportBatchUpload={supportBatchUpload}
/>
)}
{source && selectedProvider === DataSourceProvider.waterCrawl && (
@@ -126,6 +129,7 @@ const Website: FC<Props> = ({
onJobIdChange={onJobIdChange}
crawlOptions={crawlOptions}
onCrawlOptionsChange={onCrawlOptionsChange}
supportBatchUpload={supportBatchUpload}
/>
)}
{source && selectedProvider === DataSourceProvider.jinaReader && (
@@ -136,6 +140,7 @@ const Website: FC<Props> = ({
onJobIdChange={onJobIdChange}
crawlOptions={crawlOptions}
onCrawlOptionsChange={onCrawlOptionsChange}
supportBatchUpload={supportBatchUpload}
/>
)}
{!source && (

View File

@@ -26,6 +26,7 @@ type Props = {
onJobIdChange: (jobId: string) => void
crawlOptions: CrawlOptions
onCrawlOptionsChange: (payload: CrawlOptions) => void
supportBatchUpload: boolean
}
enum Step {
@@ -41,6 +42,7 @@ const JinaReader: FC<Props> = ({
onJobIdChange,
crawlOptions,
onCrawlOptionsChange,
supportBatchUpload,
}) => {
const { t } = useTranslation()
const [step, setStep] = useState<Step>(Step.init)
@@ -157,7 +159,7 @@ const JinaReader: FC<Props> = ({
total: 1,
data: [{
title,
content,
markdown: content,
description,
source_url: url,
}],
@@ -176,7 +178,7 @@ const JinaReader: FC<Props> = ({
}
else {
setCrawlResult(data)
onCheckedCrawlResultChange(data.data || []) // default select the crawl result
onCheckedCrawlResultChange(supportBatchUpload ? (data.data || []) : (data.data?.slice(0, 1) || [])) // default select the crawl result
setCrawlErrorMessage('')
}
}
@@ -188,7 +190,7 @@ const JinaReader: FC<Props> = ({
finally {
setStep(Step.finished)
}
}, [checkValid, crawlOptions, onCheckedCrawlResultChange, onJobIdChange, t, waitForCrawlFinished])
}, [checkValid, crawlOptions, onCheckedCrawlResultChange, onJobIdChange, supportBatchUpload, t, waitForCrawlFinished])
return (
<div>
@@ -227,6 +229,7 @@ const JinaReader: FC<Props> = ({
onSelectedChange={onCheckedCrawlResultChange}
onPreview={onPreview}
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
isMultipleChoice={supportBatchUpload}
/>
}
</div>

View File

@@ -32,7 +32,7 @@ const WebsitePreview = ({
<div className='system-xs-medium truncate text-text-tertiary' title={payload.source_url}>{payload.source_url}</div>
</div>
<div className={cn(s.previewContent, 'body-md-regular')}>
<div className={cn(s.fileContent)}>{payload.content}</div>
<div className={cn(s.fileContent)}>{payload.markdown}</div>
</div>
</div>
)

View File

@@ -26,6 +26,7 @@ type Props = {
onJobIdChange: (jobId: string) => void
crawlOptions: CrawlOptions
onCrawlOptionsChange: (payload: CrawlOptions) => void
supportBatchUpload: boolean
}
enum Step {
@@ -41,6 +42,7 @@ const WaterCrawl: FC<Props> = ({
onJobIdChange,
crawlOptions,
onCrawlOptionsChange,
supportBatchUpload,
}) => {
const { t } = useTranslation()
const [step, setStep] = useState<Step>(Step.init)
@@ -132,7 +134,7 @@ const WaterCrawl: FC<Props> = ({
},
}
}
}, [crawlOptions.limit])
}, [crawlOptions.limit, onCheckedCrawlResultChange])
const handleRun = useCallback(async (url: string) => {
const { isValid, errorMsg } = checkValid(url)
@@ -163,7 +165,7 @@ const WaterCrawl: FC<Props> = ({
}
else {
setCrawlResult(data)
onCheckedCrawlResultChange(data.data || []) // default select the crawl result
onCheckedCrawlResultChange(supportBatchUpload ? (data.data || []) : (data.data?.slice(0, 1) || [])) // default select the crawl result
setCrawlErrorMessage('')
}
}
@@ -174,7 +176,7 @@ const WaterCrawl: FC<Props> = ({
finally {
setStep(Step.finished)
}
}, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished])
}, [checkValid, crawlOptions, onCheckedCrawlResultChange, onJobIdChange, supportBatchUpload, t, waitForCrawlFinished])
return (
<div>
@@ -213,6 +215,7 @@ const WaterCrawl: FC<Props> = ({
onSelectedChange={onCheckedCrawlResultChange}
onPreview={onPreview}
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
isMultipleChoice={supportBatchUpload}
/>
}
</div>