Feat: pptx and xlsx loader (#1118)

* perf: plan tip

* perf: upload size controller

* feat: add image ttl index

* feat: new upload file ux

* remove file

* feat: support read pptx

* feat: support xlsx

* fix: rerank docker flie
This commit is contained in:
Archer
2024-04-01 19:01:26 +08:00
committed by GitHub
parent f9d266a6af
commit 21288d1736
90 changed files with 2707 additions and 1678 deletions

View File

@@ -1,4 +1,4 @@
import { GET, POST, PUT, DELETE } from '@/web/common/api/request';
import { GET, POST } from '@/web/common/api/request';
import type { UploadImgProps } from '@fastgpt/global/common/file/api.d';
import { AxiosProgressEvent } from 'axios';
@@ -8,10 +8,16 @@ export const postUploadFiles = (
data: FormData,
onUploadProgress: (progressEvent: AxiosProgressEvent) => void
) =>
POST<string[]>('/common/file/upload', data, {
POST<string>('/common/file/upload', data, {
timeout: 480000,
onUploadProgress,
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
export const getPreviewFileContent = (data: { fileId: string; csvFormat: boolean }) =>
POST<{
previewContent: string;
totalLength: number;
}>('/common/file/previewContent', data);

View File

@@ -7,13 +7,13 @@ import { compressBase64Img, type CompressImgProps } from '@fastgpt/web/common/fi
/**
* upload file to mongo gridfs
*/
export const uploadFiles = ({
files,
export const uploadFile2DB = ({
file,
bucketName,
metadata = {},
percentListen
}: {
files: File[];
file: File;
bucketName: `${BucketNameEnum}`;
metadata?: Record<string, any>;
percentListen?: (percent: number) => void;
@@ -21,9 +21,7 @@ export const uploadFiles = ({
const form = new FormData();
form.append('metadata', JSON.stringify(metadata));
form.append('bucketName', bucketName);
files.forEach((file) => {
form.append('file', file, encodeURIComponent(file.name));
});
form.append('file', file, encodeURIComponent(file.name));
return postUploadFiles(form, (e) => {
if (!e.total) return;

View File

@@ -23,14 +23,18 @@ export const useSelectFile = (props?: {
accept={fileType}
multiple={multiple}
onChange={(e) => {
if (!e.target.files || e.target.files?.length === 0) return;
if (e.target.files.length > maxCount) {
return toast({
const files = e.target.files;
if (!files || files?.length === 0) return;
let fileList = Array.from(files);
if (fileList.length > maxCount) {
toast({
status: 'warning',
title: t('common.file.Select file amount limit', { max: maxCount })
});
fileList = fileList.slice(0, maxCount);
}
onSelect(Array.from(e.target.files), openSign.current);
onSelect(fileList, openSign.current);
}}
/>
</Box>

View File

@@ -77,15 +77,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
let options = {};
if (MediaRecorder.isTypeSupported('audio/webm')) {
options = { type: 'audio/webm' };
} else if (MediaRecorder.isTypeSupported('video/mp4')) {
options = { type: 'video/mp4' };
} else if (MediaRecorder.isTypeSupported('video/mp3')) {
options = { type: 'video/mp3' };
} else {
console.error('no suitable mimetype found for this device');
}
const blob = new Blob(chunks, options);
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
formData.append('file', blob, 'recording.mp4');
formData.append('file', blob, 'recording.mp3');
formData.append(
'data',
JSON.stringify({

View File

@@ -8,13 +8,19 @@ import type {
} from '@/global/core/api/datasetReq.d';
import type {
CreateDatasetCollectionParams,
CsvTableCreateDatasetCollectionParams,
DatasetUpdateBody,
FileIdCreateDatasetCollectionParams,
LinkCreateDatasetCollectionParams,
PostWebsiteSyncParams
PostWebsiteSyncParams,
TextCreateDatasetCollectionParams
} from '@fastgpt/global/core/dataset/api.d';
import type {
GetTrainingQueueProps,
GetTrainingQueueResponse,
PostPreviewFilesChunksProps,
PostPreviewFilesChunksResponse,
PostPreviewTableChunksResponse,
SearchTestProps,
SearchTestResponse
} from '@/global/core/dataset/api.d';
@@ -23,10 +29,7 @@ import type {
CreateDatasetParams,
InsertOneDatasetDataProps
} from '@/global/core/dataset/api.d';
import type {
PushDatasetDataProps,
PushDatasetDataResponse
} from '@fastgpt/global/core/dataset/api.d';
import type { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api.d';
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import {
DatasetCollectionSyncResultEnum,
@@ -75,8 +78,14 @@ export const getDatasetCollectionById = (id: string) =>
GET<DatasetCollectionItemType>(`/core/dataset/collection/detail`, { id });
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data);
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/text`, data);
export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data);
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
POST(`/core/dataset/collection/update`, data);
@@ -95,12 +104,6 @@ export const getDatasetDataList = (data: GetDatasetDataListProps) =>
export const getDatasetDataItemById = (id: string) =>
GET<DatasetDataItemType>(`/core/dataset/data/detail`, { id });
/**
* push data to training queue
*/
export const postChunks2Dataset = (data: PushDatasetDataProps) =>
POST<PushDatasetDataResponse>(`/core/dataset/data/pushData`, data);
/**
* insert one data to dataset (immediately insert)
*/
@@ -122,6 +125,8 @@ export const delOneDatasetDataById = (id: string) =>
/* get length of system training queue */
export const getTrainingQueueLen = (data: GetTrainingQueueProps) =>
GET<GetTrainingQueueResponse>(`/core/dataset/training/getQueueLen`, data);
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
POST<{ q: string; a: string }[]>('/core/dataset/file/getPreviewChunks', data);
/* ================== file ======================== */
export const getFileViewUrl = (fileId: string) =>

View File

@@ -1,200 +0,0 @@
import MyBox from '@/components/common/MyBox';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { Box, FlexProps } from '@chakra-ui/react';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import React, { DragEvent, useCallback, useState } from 'react';
export type SelectFileItemType = {
folderPath: string;
file: File;
};
const FileSelector = ({
fileType,
multiple,
maxCount,
maxSize,
isLoading,
onSelectFile,
...props
}: {
fileType: string;
multiple?: boolean;
maxCount?: number;
maxSize?: number;
isLoading?: boolean;
onSelectFile: (e: SelectFileItemType[]) => any;
} & FlexProps) => {
const { t } = useTranslation();
const { toast } = useToast();
const { File, onOpen } = useSelectFile({
fileType,
multiple,
maxCount
});
const [isDragging, setIsDragging] = useState(false);
const filterTypeReg = new RegExp(
`(${fileType
.split(',')
.map((item) => item.trim())
.join('|')})$`,
'i'
);
const selectFileCallback = useCallback(
(files: SelectFileItemType[]) => {
// size check
if (!maxSize) {
return onSelectFile(files);
}
const filterFiles = files.filter((item) => item.file.size <= maxSize);
if (filterFiles.length < files.length) {
toast({
status: 'warning',
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
});
}
return onSelectFile(filterFiles);
},
[maxSize, onSelectFile, t, toast]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const items = e.dataTransfer.items;
const fileList: SelectFileItemType[] = [];
if (e.dataTransfer.items.length <= 1) {
const traverseFileTree = async (item: any) => {
return new Promise<void>((resolve, reject) => {
if (item.isFile) {
item.file((file: File) => {
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
if (filterTypeReg.test(file.name)) {
fileList.push({
folderPath,
file
});
}
resolve();
});
} else if (item.isDirectory) {
const dirReader = item.createReader();
dirReader.readEntries(async (entries: any[]) => {
for (let i = 0; i < entries.length; i++) {
await traverseFileTree(entries[i]);
}
resolve();
});
}
});
};
for await (const item of items) {
await traverseFileTree(item.webkitGetAsEntry());
}
} else {
const files = Array.from(e.dataTransfer.files);
let isErr = files.some((item) => item.type === '');
if (isErr) {
return toast({
title: t('file.upload error description'),
status: 'error'
});
}
fileList.push(
...files
.filter((item) => filterTypeReg.test(item.name))
.map((file) => ({
folderPath: '',
file
}))
);
}
selectFileCallback(fileList.slice(0, maxCount));
};
return (
<MyBox
isLoading={isLoading}
display={'flex'}
flexDirection={'column'}
alignItems={'center'}
justifyContent={'center'}
px={3}
py={[4, 7]}
borderWidth={'1.5px'}
borderStyle={'dashed'}
borderRadius={'md'}
cursor={'pointer'}
_hover={{
bg: 'primary.50',
borderColor: 'primary.600'
}}
{...(isDragging
? {
borderColor: 'primary.600'
}
: {
borderColor: 'borderColor.high'
})}
{...props}
onDragEnter={handleDragEnter}
onDragOver={(e) => e.preventDefault()}
onDragLeave={handleDragLeave}
onDrop={handleDrop}
onClick={onOpen}
>
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
<Box fontWeight={'bold'}>
{isDragging
? t('file.Release the mouse to upload the file')
: t('common.file.Select and drag file tip')}
</Box>
{/* file type */}
<Box color={'myGray.500'} fontSize={'xs'}>
{t('common.file.Support file type', { fileType })}
</Box>
<Box color={'myGray.500'} fontSize={'xs'}>
{/* max count */}
{maxCount && t('common.file.Support max count', { maxCount })}
{/* max size */}
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
</Box>
<File
onSelect={(files) =>
selectFileCallback(
files.map((file) => ({
folderPath: '',
file
}))
)
}
/>
</MyBox>
);
};
export default React.memo(FileSelector);

View File

@@ -1,6 +1,6 @@
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from './constants';
import { ImportProcessWayEnum, ImportSourceTypeEnum } from './constants';
import { UseFormReturn } from 'react-hook-form';
export type ImportDataComponentProps = {
@@ -10,19 +10,27 @@ export type ImportDataComponentProps = {
export type ImportSourceItemType = {
id: string;
rawText: string;
chunks: PushDatasetDataChunkProps[];
chunkChars: number;
sourceFolderPath?: string;
sourceName: string;
sourceSize?: string;
icon: string;
createStatus: 'waiting' | 'creating' | 'finish';
metadata?: Record<string, any>;
errorMsg?: string;
// source
sourceName: string;
sourceSize?: string;
icon: string;
// file
isUploading?: boolean;
uploadedFileRate?: number;
dbFileId?: string; // 存储在数据库里的文件Id这个 ID 还是图片和集合的 metadata 中 relateId
file?: File;
// link
link?: string;
// custom text
rawText?: string;
};
export type ImportSourceParamsType = UseFormReturn<

View File

@@ -1,95 +1,5 @@
import { getFileViewUrl, postChunks2Dataset } from '@/web/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { delay } from '@fastgpt/global/common/system/utils';
import { getFileViewUrl } from '@/web/core/dataset/api';
import { strIsLink } from '@fastgpt/global/common/string/tools';
import type {
FileCreateDatasetCollectionParams,
PushDatasetDataChunkProps
} from '@fastgpt/global/core/dataset/api.d';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { POST } from '@/web/common/api/request';
/* upload a file to create collection */
export const fileCollectionCreate = ({
file,
metadata = {},
data,
percentListen
}: {
file: File;
metadata?: Record<string, any>;
data: FileCreateDatasetCollectionParams;
percentListen: (percent: number) => void;
}) => {
const form = new FormData();
form.append('data', JSON.stringify(data));
form.append('metadata', JSON.stringify(metadata));
form.append('bucketName', BucketNameEnum.dataset);
form.append('file', file, encodeURIComponent(file.name));
return POST<string>(`/core/dataset/collection/create/file?datasetId=${data.datasetId}`, form, {
timeout: 480000,
onUploadProgress: (e) => {
if (!e.total) return;
const percent = Math.round((e.loaded / e.total) * 100);
percentListen && percentListen(percent);
},
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
};
export async function chunksUpload({
billId,
collectionId,
trainingMode,
chunks,
prompt,
rate = 50,
onUploading
}: {
billId: string;
collectionId: string;
trainingMode: `${TrainingModeEnum}`;
chunks: PushDatasetDataChunkProps[];
prompt?: string;
rate?: number;
onUploading?: (rate: number) => void;
}) {
async function upload(data: PushDatasetDataChunkProps[]) {
return postChunks2Dataset({
collectionId,
trainingMode,
data,
prompt,
billId
});
}
let successInsert = 0;
let retryTimes = 10;
for (let i = 0; i < chunks.length; i += rate) {
try {
const uploadChunks = chunks.slice(i, i + rate);
const { insertLen } = await upload(uploadChunks);
if (onUploading) {
onUploading(Math.round(((i + uploadChunks.length) / chunks.length) * 100));
}
successInsert += insertLen;
} catch (error) {
if (retryTimes === 0) {
return Promise.reject(error);
}
await delay(1000);
retryTimes--;
i -= rate;
}
}
return { insertLen: successInsert };
}
export async function getFileAndOpen(fileId: string) {
if (strIsLink(fileId)) {