mirror of
https://github.com/labring/FastGPT.git
synced 2025-08-01 03:48:24 +00:00
Feat: pptx and xlsx loader (#1118)
* perf: plan tip * perf: upload size controller * feat: add image ttl index * feat: new upload file ux * remove file * feat: support read pptx * feat: support xlsx * fix: rerank docker flie
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { GET, POST, PUT, DELETE } from '@/web/common/api/request';
|
||||
import { GET, POST } from '@/web/common/api/request';
|
||||
import type { UploadImgProps } from '@fastgpt/global/common/file/api.d';
|
||||
import { AxiosProgressEvent } from 'axios';
|
||||
|
||||
@@ -8,10 +8,16 @@ export const postUploadFiles = (
|
||||
data: FormData,
|
||||
onUploadProgress: (progressEvent: AxiosProgressEvent) => void
|
||||
) =>
|
||||
POST<string[]>('/common/file/upload', data, {
|
||||
POST<string>('/common/file/upload', data, {
|
||||
timeout: 480000,
|
||||
onUploadProgress,
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
|
||||
export const getPreviewFileContent = (data: { fileId: string; csvFormat: boolean }) =>
|
||||
POST<{
|
||||
previewContent: string;
|
||||
totalLength: number;
|
||||
}>('/common/file/previewContent', data);
|
||||
|
@@ -7,13 +7,13 @@ import { compressBase64Img, type CompressImgProps } from '@fastgpt/web/common/fi
|
||||
/**
|
||||
* upload file to mongo gridfs
|
||||
*/
|
||||
export const uploadFiles = ({
|
||||
files,
|
||||
export const uploadFile2DB = ({
|
||||
file,
|
||||
bucketName,
|
||||
metadata = {},
|
||||
percentListen
|
||||
}: {
|
||||
files: File[];
|
||||
file: File;
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
metadata?: Record<string, any>;
|
||||
percentListen?: (percent: number) => void;
|
||||
@@ -21,9 +21,7 @@ export const uploadFiles = ({
|
||||
const form = new FormData();
|
||||
form.append('metadata', JSON.stringify(metadata));
|
||||
form.append('bucketName', bucketName);
|
||||
files.forEach((file) => {
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
});
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
return postUploadFiles(form, (e) => {
|
||||
if (!e.total) return;
|
||||
|
||||
|
@@ -23,14 +23,18 @@ export const useSelectFile = (props?: {
|
||||
accept={fileType}
|
||||
multiple={multiple}
|
||||
onChange={(e) => {
|
||||
if (!e.target.files || e.target.files?.length === 0) return;
|
||||
if (e.target.files.length > maxCount) {
|
||||
return toast({
|
||||
const files = e.target.files;
|
||||
if (!files || files?.length === 0) return;
|
||||
|
||||
let fileList = Array.from(files);
|
||||
if (fileList.length > maxCount) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Select file amount limit', { max: maxCount })
|
||||
});
|
||||
fileList = fileList.slice(0, maxCount);
|
||||
}
|
||||
onSelect(Array.from(e.target.files), openSign.current);
|
||||
onSelect(fileList, openSign.current);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
|
@@ -77,15 +77,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps) => {
|
||||
let options = {};
|
||||
if (MediaRecorder.isTypeSupported('audio/webm')) {
|
||||
options = { type: 'audio/webm' };
|
||||
} else if (MediaRecorder.isTypeSupported('video/mp4')) {
|
||||
options = { type: 'video/mp4' };
|
||||
} else if (MediaRecorder.isTypeSupported('video/mp3')) {
|
||||
options = { type: 'video/mp3' };
|
||||
} else {
|
||||
console.error('no suitable mimetype found for this device');
|
||||
}
|
||||
const blob = new Blob(chunks, options);
|
||||
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
|
||||
|
||||
formData.append('file', blob, 'recording.mp4');
|
||||
formData.append('file', blob, 'recording.mp3');
|
||||
formData.append(
|
||||
'data',
|
||||
JSON.stringify({
|
||||
|
@@ -8,13 +8,19 @@ import type {
|
||||
} from '@/global/core/api/datasetReq.d';
|
||||
import type {
|
||||
CreateDatasetCollectionParams,
|
||||
CsvTableCreateDatasetCollectionParams,
|
||||
DatasetUpdateBody,
|
||||
FileIdCreateDatasetCollectionParams,
|
||||
LinkCreateDatasetCollectionParams,
|
||||
PostWebsiteSyncParams
|
||||
PostWebsiteSyncParams,
|
||||
TextCreateDatasetCollectionParams
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import type {
|
||||
GetTrainingQueueProps,
|
||||
GetTrainingQueueResponse,
|
||||
PostPreviewFilesChunksProps,
|
||||
PostPreviewFilesChunksResponse,
|
||||
PostPreviewTableChunksResponse,
|
||||
SearchTestProps,
|
||||
SearchTestResponse
|
||||
} from '@/global/core/dataset/api.d';
|
||||
@@ -23,10 +29,7 @@ import type {
|
||||
CreateDatasetParams,
|
||||
InsertOneDatasetDataProps
|
||||
} from '@/global/core/dataset/api.d';
|
||||
import type {
|
||||
PushDatasetDataProps,
|
||||
PushDatasetDataResponse
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import type { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api.d';
|
||||
import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
DatasetCollectionSyncResultEnum,
|
||||
@@ -75,8 +78,14 @@ export const getDatasetCollectionById = (id: string) =>
|
||||
GET<DatasetCollectionItemType>(`/core/dataset/collection/detail`, { id });
|
||||
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
|
||||
POST<string>(`/core/dataset/collection/create`, data);
|
||||
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data);
|
||||
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
|
||||
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/text`, data);
|
||||
export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data);
|
||||
|
||||
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
|
||||
POST(`/core/dataset/collection/update`, data);
|
||||
@@ -95,12 +104,6 @@ export const getDatasetDataList = (data: GetDatasetDataListProps) =>
|
||||
export const getDatasetDataItemById = (id: string) =>
|
||||
GET<DatasetDataItemType>(`/core/dataset/data/detail`, { id });
|
||||
|
||||
/**
|
||||
* push data to training queue
|
||||
*/
|
||||
export const postChunks2Dataset = (data: PushDatasetDataProps) =>
|
||||
POST<PushDatasetDataResponse>(`/core/dataset/data/pushData`, data);
|
||||
|
||||
/**
|
||||
* insert one data to dataset (immediately insert)
|
||||
*/
|
||||
@@ -122,6 +125,8 @@ export const delOneDatasetDataById = (id: string) =>
|
||||
/* get length of system training queue */
|
||||
export const getTrainingQueueLen = (data: GetTrainingQueueProps) =>
|
||||
GET<GetTrainingQueueResponse>(`/core/dataset/training/getQueueLen`, data);
|
||||
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
|
||||
POST<{ q: string; a: string }[]>('/core/dataset/file/getPreviewChunks', data);
|
||||
|
||||
/* ================== file ======================== */
|
||||
export const getFileViewUrl = (fileId: string) =>
|
||||
|
@@ -1,200 +0,0 @@
|
||||
import MyBox from '@/components/common/MyBox';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { Box, FlexProps } from '@chakra-ui/react';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import React, { DragEvent, useCallback, useState } from 'react';
|
||||
|
||||
export type SelectFileItemType = {
|
||||
folderPath: string;
|
||||
file: File;
|
||||
};
|
||||
|
||||
const FileSelector = ({
|
||||
fileType,
|
||||
multiple,
|
||||
maxCount,
|
||||
maxSize,
|
||||
isLoading,
|
||||
onSelectFile,
|
||||
...props
|
||||
}: {
|
||||
fileType: string;
|
||||
multiple?: boolean;
|
||||
maxCount?: number;
|
||||
maxSize?: number;
|
||||
isLoading?: boolean;
|
||||
onSelectFile: (e: SelectFileItemType[]) => any;
|
||||
} & FlexProps) => {
|
||||
const { t } = useTranslation();
|
||||
const { toast } = useToast();
|
||||
const { File, onOpen } = useSelectFile({
|
||||
fileType,
|
||||
multiple,
|
||||
maxCount
|
||||
});
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
|
||||
const filterTypeReg = new RegExp(
|
||||
`(${fileType
|
||||
.split(',')
|
||||
.map((item) => item.trim())
|
||||
.join('|')})$`,
|
||||
'i'
|
||||
);
|
||||
|
||||
const selectFileCallback = useCallback(
|
||||
(files: SelectFileItemType[]) => {
|
||||
// size check
|
||||
if (!maxSize) {
|
||||
return onSelectFile(files);
|
||||
}
|
||||
const filterFiles = files.filter((item) => item.file.size <= maxSize);
|
||||
|
||||
if (filterFiles.length < files.length) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
|
||||
});
|
||||
}
|
||||
|
||||
return onSelectFile(filterFiles);
|
||||
},
|
||||
[maxSize, onSelectFile, t, toast]
|
||||
);
|
||||
|
||||
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(true);
|
||||
};
|
||||
|
||||
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
};
|
||||
|
||||
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
setIsDragging(false);
|
||||
|
||||
const items = e.dataTransfer.items;
|
||||
const fileList: SelectFileItemType[] = [];
|
||||
|
||||
if (e.dataTransfer.items.length <= 1) {
|
||||
const traverseFileTree = async (item: any) => {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
if (item.isFile) {
|
||||
item.file((file: File) => {
|
||||
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
|
||||
|
||||
if (filterTypeReg.test(file.name)) {
|
||||
fileList.push({
|
||||
folderPath,
|
||||
file
|
||||
});
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
} else if (item.isDirectory) {
|
||||
const dirReader = item.createReader();
|
||||
dirReader.readEntries(async (entries: any[]) => {
|
||||
for (let i = 0; i < entries.length; i++) {
|
||||
await traverseFileTree(entries[i]);
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
for await (const item of items) {
|
||||
await traverseFileTree(item.webkitGetAsEntry());
|
||||
}
|
||||
} else {
|
||||
const files = Array.from(e.dataTransfer.files);
|
||||
let isErr = files.some((item) => item.type === '');
|
||||
if (isErr) {
|
||||
return toast({
|
||||
title: t('file.upload error description'),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
|
||||
fileList.push(
|
||||
...files
|
||||
.filter((item) => filterTypeReg.test(item.name))
|
||||
.map((file) => ({
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
selectFileCallback(fileList.slice(0, maxCount));
|
||||
};
|
||||
|
||||
return (
|
||||
<MyBox
|
||||
isLoading={isLoading}
|
||||
display={'flex'}
|
||||
flexDirection={'column'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
px={3}
|
||||
py={[4, 7]}
|
||||
borderWidth={'1.5px'}
|
||||
borderStyle={'dashed'}
|
||||
borderRadius={'md'}
|
||||
cursor={'pointer'}
|
||||
_hover={{
|
||||
bg: 'primary.50',
|
||||
borderColor: 'primary.600'
|
||||
}}
|
||||
{...(isDragging
|
||||
? {
|
||||
borderColor: 'primary.600'
|
||||
}
|
||||
: {
|
||||
borderColor: 'borderColor.high'
|
||||
})}
|
||||
{...props}
|
||||
onDragEnter={handleDragEnter}
|
||||
onDragOver={(e) => e.preventDefault()}
|
||||
onDragLeave={handleDragLeave}
|
||||
onDrop={handleDrop}
|
||||
onClick={onOpen}
|
||||
>
|
||||
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
|
||||
<Box fontWeight={'bold'}>
|
||||
{isDragging
|
||||
? t('file.Release the mouse to upload the file')
|
||||
: t('common.file.Select and drag file tip')}
|
||||
</Box>
|
||||
{/* file type */}
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{t('common.file.Support file type', { fileType })}
|
||||
</Box>
|
||||
<Box color={'myGray.500'} fontSize={'xs'}>
|
||||
{/* max count */}
|
||||
{maxCount && t('common.file.Support max count', { maxCount })}
|
||||
{/* max size */}
|
||||
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
|
||||
</Box>
|
||||
|
||||
<File
|
||||
onSelect={(files) =>
|
||||
selectFileCallback(
|
||||
files.map((file) => ({
|
||||
folderPath: '',
|
||||
file
|
||||
}))
|
||||
)
|
||||
}
|
||||
/>
|
||||
</MyBox>
|
||||
);
|
||||
};
|
||||
|
||||
export default React.memo(FileSelector);
|
24
projects/app/src/web/core/dataset/type.d.ts
vendored
24
projects/app/src/web/core/dataset/type.d.ts
vendored
@@ -1,6 +1,6 @@
|
||||
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { ImportProcessWayEnum } from './constants';
|
||||
import { ImportProcessWayEnum, ImportSourceTypeEnum } from './constants';
|
||||
import { UseFormReturn } from 'react-hook-form';
|
||||
|
||||
export type ImportDataComponentProps = {
|
||||
@@ -10,19 +10,27 @@ export type ImportDataComponentProps = {
|
||||
|
||||
export type ImportSourceItemType = {
|
||||
id: string;
|
||||
rawText: string;
|
||||
chunks: PushDatasetDataChunkProps[];
|
||||
chunkChars: number;
|
||||
sourceFolderPath?: string;
|
||||
sourceName: string;
|
||||
sourceSize?: string;
|
||||
icon: string;
|
||||
|
||||
createStatus: 'waiting' | 'creating' | 'finish';
|
||||
metadata?: Record<string, any>;
|
||||
errorMsg?: string;
|
||||
|
||||
// source
|
||||
sourceName: string;
|
||||
sourceSize?: string;
|
||||
icon: string;
|
||||
|
||||
// file
|
||||
isUploading?: boolean;
|
||||
uploadedFileRate?: number;
|
||||
dbFileId?: string; // 存储在数据库里的文件Id,这个 ID 还是图片和集合的 metadata 中 relateId
|
||||
file?: File;
|
||||
|
||||
// link
|
||||
link?: string;
|
||||
|
||||
// custom text
|
||||
rawText?: string;
|
||||
};
|
||||
|
||||
export type ImportSourceParamsType = UseFormReturn<
|
||||
|
@@ -1,95 +1,5 @@
|
||||
import { getFileViewUrl, postChunks2Dataset } from '@/web/core/dataset/api';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { getFileViewUrl } from '@/web/core/dataset/api';
|
||||
import { strIsLink } from '@fastgpt/global/common/string/tools';
|
||||
import type {
|
||||
FileCreateDatasetCollectionParams,
|
||||
PushDatasetDataChunkProps
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { POST } from '@/web/common/api/request';
|
||||
|
||||
/* upload a file to create collection */
|
||||
export const fileCollectionCreate = ({
|
||||
file,
|
||||
metadata = {},
|
||||
data,
|
||||
percentListen
|
||||
}: {
|
||||
file: File;
|
||||
metadata?: Record<string, any>;
|
||||
data: FileCreateDatasetCollectionParams;
|
||||
percentListen: (percent: number) => void;
|
||||
}) => {
|
||||
const form = new FormData();
|
||||
form.append('data', JSON.stringify(data));
|
||||
form.append('metadata', JSON.stringify(metadata));
|
||||
form.append('bucketName', BucketNameEnum.dataset);
|
||||
form.append('file', file, encodeURIComponent(file.name));
|
||||
|
||||
return POST<string>(`/core/dataset/collection/create/file?datasetId=${data.datasetId}`, form, {
|
||||
timeout: 480000,
|
||||
onUploadProgress: (e) => {
|
||||
if (!e.total) return;
|
||||
|
||||
const percent = Math.round((e.loaded / e.total) * 100);
|
||||
percentListen && percentListen(percent);
|
||||
},
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
export async function chunksUpload({
|
||||
billId,
|
||||
collectionId,
|
||||
trainingMode,
|
||||
chunks,
|
||||
prompt,
|
||||
rate = 50,
|
||||
onUploading
|
||||
}: {
|
||||
billId: string;
|
||||
collectionId: string;
|
||||
trainingMode: `${TrainingModeEnum}`;
|
||||
chunks: PushDatasetDataChunkProps[];
|
||||
prompt?: string;
|
||||
rate?: number;
|
||||
onUploading?: (rate: number) => void;
|
||||
}) {
|
||||
async function upload(data: PushDatasetDataChunkProps[]) {
|
||||
return postChunks2Dataset({
|
||||
collectionId,
|
||||
trainingMode,
|
||||
data,
|
||||
prompt,
|
||||
billId
|
||||
});
|
||||
}
|
||||
|
||||
let successInsert = 0;
|
||||
let retryTimes = 10;
|
||||
for (let i = 0; i < chunks.length; i += rate) {
|
||||
try {
|
||||
const uploadChunks = chunks.slice(i, i + rate);
|
||||
const { insertLen } = await upload(uploadChunks);
|
||||
if (onUploading) {
|
||||
onUploading(Math.round(((i + uploadChunks.length) / chunks.length) * 100));
|
||||
}
|
||||
successInsert += insertLen;
|
||||
} catch (error) {
|
||||
if (retryTimes === 0) {
|
||||
return Promise.reject(error);
|
||||
}
|
||||
await delay(1000);
|
||||
retryTimes--;
|
||||
i -= rate;
|
||||
}
|
||||
}
|
||||
|
||||
return { insertLen: successInsert };
|
||||
}
|
||||
|
||||
export async function getFileAndOpen(fileId: string) {
|
||||
if (strIsLink(fileId)) {
|
||||
|
Reference in New Issue
Block a user