feat(website-crawl): add jina reader as additional alternative for website crawling (#8761)

This commit is contained in:
Zhaofeng Miao
2024-09-30 09:57:19 +08:00
committed by GitHub
parent fb49413a41
commit 369e1e6f58
38 changed files with 927 additions and 75 deletions

View File

@@ -23,7 +23,7 @@ import type {
SegmentsResponse,
createDocumentResponse,
} from '@/models/datasets'
import type { CommonResponse, DataSourceNotionWorkspace } from '@/models/common'
import { type CommonResponse, type DataSourceNotionWorkspace, DataSourceProvider } from '@/models/common'
import type {
ApiKeysListResponse,
CreateApiKeyResponse,
@@ -253,7 +253,7 @@ export const createFirecrawlTask: Fetcher<CommonResponse, Record<string, any>> =
return post<CommonResponse>('website/crawl', {
body: {
...body,
provider: 'firecrawl',
provider: DataSourceProvider.fireCrawl,
},
})
}
@@ -261,7 +261,26 @@ export const createFirecrawlTask: Fetcher<CommonResponse, Record<string, any>> =
export const checkFirecrawlTaskStatus: Fetcher<CommonResponse, string> = (jobId: string) => {
return get<CommonResponse>(`website/crawl/status/${jobId}`, {
params: {
provider: 'firecrawl',
provider: DataSourceProvider.fireCrawl,
},
}, {
silent: true,
})
}
export const createJinaReaderTask: Fetcher<CommonResponse, Record<string, any>> = (body) => {
return post<CommonResponse>('website/crawl', {
body: {
...body,
provider: DataSourceProvider.jinaReader,
},
})
}
export const checkJinaReaderTaskStatus: Fetcher<CommonResponse, string> = (jobId: string) => {
return get<CommonResponse>(`website/crawl/status/${jobId}`, {
params: {
provider: 'jinareader',
},
}, {
silent: true,