Feat: pptx and xlsx loader (#1118)

* perf: plan tip

* perf: upload size controller

* feat: add image ttl index

* feat: new upload file ux

* remove file

* feat: support read pptx

* feat: support xlsx

* fix: rerank docker flie
This commit is contained in:
Archer
2024-04-01 19:01:26 +08:00
committed by GitHub
parent f9d266a6af
commit 21288d1736
90 changed files with 2707 additions and 1678 deletions

View File

@@ -32,7 +32,6 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import MyInput from '@/components/MyInput';
import dayjs from 'dayjs';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useLoading } from '@fastgpt/web/hooks/useLoading';
import { useRouter } from 'next/router';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyMenu from '@/components/MyMenu';
@@ -62,11 +61,11 @@ import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { DatasetCollectionSyncResultEnum } from '@fastgpt/global/core/dataset/constants';
import MyBox from '@/components/common/MyBox';
import { ImportDataSourceEnum } from './Import';
import { usePagination } from '@fastgpt/web/hooks/usePagination';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
const WebSiteConfigModal = dynamic(() => import('./Import/WebsiteConfig'), {});
const FileSourceSelector = dynamic(() => import('./Import/sourceSelector/FileSourceSelector'), {});
const FileSourceSelector = dynamic(() => import('./Import/components/FileSourceSelector'), {});
const CollectionCard = () => {
const BoxRef = useRef<HTMLDivElement>(null);
@@ -76,14 +75,14 @@ const CollectionCard = () => {
const { toast } = useToast();
const { parentId = '', datasetId } = router.query as { parentId: string; datasetId: string };
const { t } = useTranslation();
const { Loading } = useLoading();
const { isPc } = useSystemStore();
const { userInfo } = useUserStore();
const [searchText, setSearchText] = useState('');
const { datasetDetail, updateDataset, startWebsiteSync, loadDatasetDetail } = useDatasetStore();
const { openConfirm: openDeleteConfirm, ConfirmModal: ConfirmDeleteModal } = useConfirm({
content: t('dataset.Confirm to delete the file')
content: t('dataset.Confirm to delete the file'),
type: 'delete'
});
const { openConfirm: openSyncConfirm, ConfirmModal: ConfirmSyncModal } = useConfirm({
content: t('core.dataset.collection.Start Sync Tip')
@@ -452,7 +451,7 @@ const CollectionCard = () => {
query: {
...router.query,
currentTab: TabEnum.import,
source: ImportDataSourceEnum.tableLocal
source: ImportDataSourceEnum.csvTable
}
})
}

View File

@@ -1,6 +1,5 @@
import React, { useContext, useCallback, createContext, useState, useMemo, useEffect } from 'react';
import React, { useContext, createContext, useState, useMemo, useEffect } from 'react';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
@@ -8,6 +7,7 @@ import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import { UseFormReturn, useForm } from 'react-hook-form';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
type ChunkSizeFieldType = 'embeddingChunkSize';
export type FormType = {
@@ -29,14 +29,11 @@ type useImportStoreType = {
showPromptInput: boolean;
sources: ImportSourceItemType[];
setSources: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
showRePreview: boolean;
totalChunkChars: number;
totalChunks: number;
chunkSize: number;
predictPoints: number;
chunkOverlapRatio: number;
priceTip: string;
uploadRate: number;
splitSources2Chunks: () => void;
importSource: `${ImportDataSourceEnum}`;
};
const StateContext = createContext<useImportStoreType>({
processParamsForm: {} as any,
@@ -49,23 +46,22 @@ const StateContext = createContext<useImportStoreType>({
showChunkInput: false,
showPromptInput: false,
chunkSizeField: 'embeddingChunkSize',
showRePreview: false,
totalChunkChars: 0,
totalChunks: 0,
chunkSize: 0,
predictPoints: 0,
chunkOverlapRatio: 0,
priceTip: '',
uploadRate: 50,
splitSources2Chunks: () => {}
importSource: ImportDataSourceEnum.fileLocal
});
export const useImportStore = () => useContext(StateContext);
const Provider = ({
importSource,
dataset,
parentId,
children
}: {
importSource: `${ImportDataSourceEnum}`;
dataset: DatasetItemType;
parentId?: string;
children: React.ReactNode;
@@ -86,7 +82,6 @@ const Provider = ({
const { t } = useTranslation();
const [sources, setSources] = useState<ImportSourceItemType[]>([]);
const [showRePreview, setShowRePreview] = useState(false);
// watch form
const mode = processParamsForm.watch('mode');
@@ -154,68 +149,15 @@ const Provider = ({
const chunkSize = wayStaticPrams[way].chunkSize;
useEffect(() => {
setShowRePreview(true);
}, [mode, way, chunkSize, customSplitChar]);
const totalChunkChars = useMemo(
() => sources.reduce((sum, file) => sum + file.chunkChars, 0),
[sources]
);
const predictPoints = useMemo(() => {
const totalTokensPredict = totalChunkChars / 1000;
if (mode === TrainingModeEnum.auto) {
const price = totalTokensPredict * 1.3 * agentModel.charsPointsPrice;
return +price.toFixed(2);
}
if (mode === TrainingModeEnum.qa) {
const price = totalTokensPredict * 1.2 * agentModel.charsPointsPrice;
return +price.toFixed(2);
}
return +(totalTokensPredict * vectorModel.charsPointsPrice).toFixed(2);
}, [agentModel.charsPointsPrice, mode, totalChunkChars, vectorModel.charsPointsPrice]);
const totalChunks = useMemo(
() => sources.reduce((sum, file) => sum + file.chunks.length, 0),
[sources]
);
const splitSources2Chunks = useCallback(() => {
setSources((state) =>
state.map((file) => {
const { chunks, chars } = splitText2Chunks({
text: file.rawText,
chunkLen: chunkSize,
overlapRatio: selectModelStaticParam.chunkOverlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return {
...file,
chunkChars: chars,
chunks: chunks.map((chunk, i) => ({
chunkIndex: i,
q: chunk,
a: ''
}))
};
})
);
setShowRePreview(false);
}, [chunkSize, customSplitChar, selectModelStaticParam.chunkOverlapRatio]);
const value = {
const value: useImportStoreType = {
parentId,
processParamsForm,
...selectModelStaticParam,
sources,
setSources,
showRePreview,
totalChunkChars,
totalChunks,
chunkSize,
predictPoints,
splitSources2Chunks
importSource
};
return <StateContext.Provider value={value}>{children}</StateContext.Provider>;
};

View File

@@ -1,4 +1,4 @@
import React, { useEffect, useMemo, useRef, useState } from 'react';
import React, { useMemo, useRef, useState } from 'react';
import {
Box,
Flex,
@@ -21,11 +21,11 @@ import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
import MyTooltip from '@/components/MyTooltip';
import { useImportStore } from '../Provider';
import Tag from '@/components/Tag';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
import Preview from '../components/Preview';
import Tag from '@/components/Tag';
function DataProcess({
showPreviewChunks = true,
@@ -38,17 +38,11 @@ function DataProcess({
const { feConfigs } = useSystemStore();
const {
processParamsForm,
sources,
chunkSizeField,
minChunkSize,
showChunkInput,
showPromptInput,
maxChunkSize,
totalChunkChars,
totalChunks,
predictPoints,
showRePreview,
splitSources2Chunks,
priceTip
} = useImportStore();
const { getValues, setValue, register } = processParamsForm;
@@ -69,16 +63,10 @@ function DataProcess({
});
}, [feConfigs?.isPlus]);
useEffect(() => {
if (showPreviewChunks) {
splitSources2Chunks();
}
}, []);
return (
<Box h={'100%'} display={['block', 'flex']} gap={5}>
<Box flex={'1 0 0'} maxW={'600px'}>
<Flex fontWeight={'bold'} alignItems={'center'}>
<Box flex={'1 0 0'} minW={['auto', '540px']} maxW={'600px'}>
<Flex alignItems={'center'}>
<MyIcon name={'common/settingLight'} w={'20px'} />
<Box fontSize={'lg'}>{t('core.dataset.import.Data process params')}</Box>
</Flex>
@@ -273,34 +261,18 @@ function DataProcess({
}}
></LeftRadio>
</Flex>
{showPreviewChunks && (
<Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}>
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
{t('core.dataset.Total chunks', { total: totalChunks })}
</Tag>
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
{t('core.Total chars', { total: totalChunkChars })}
</Tag>
{feConfigs?.show_pay && (
<MyTooltip label={priceTip}>
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
{t('core.dataset.import.Estimated points', { points: predictPoints })}
</Tag>
</MyTooltip>
)}
</Flex>
)}
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
{showPreviewChunks && showRePreview && (
<Button variant={'primaryOutline'} onClick={splitSources2Chunks}>
{t('core.dataset.import.Re Preview')}
</Button>
<Flex mt={5} alignItems={'center'} pl={'100px'} gap={3}>
{feConfigs?.show_pay && (
<MyTooltip label={priceTip}>
<Tag colorSchema={'gray'} py={'6px'} borderRadius={'md'} px={3}>
{priceTip}
</Tag>
</MyTooltip>
)}
</Flex>
<Flex mt={5} gap={3} justifyContent={'flex-end'}>
<Button
onClick={() => {
if (showRePreview) {
splitSources2Chunks();
}
goToNext();
}}
>
@@ -308,7 +280,9 @@ function DataProcess({
</Button>
</Flex>
</Box>
<Preview sources={sources} showPreviewChunks={showPreviewChunks} />
<Box flex={'1 0 0'} w={'0'}>
<Preview showPreviewChunks={showPreviewChunks} />
</Box>
{isOpenCustomPrompt && (
<PromptTextarea

View File

@@ -1,5 +1,4 @@
import React from 'react';
import { useImportStore } from '../Provider';
import Preview from '../components/Preview';
import { Box, Button, Flex } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
@@ -12,12 +11,11 @@ const PreviewData = ({
goToNext: () => void;
}) => {
const { t } = useTranslation();
const { sources, setSources } = useImportStore();
return (
<Flex flexDirection={'column'} h={'100%'}>
<Box flex={'1 0 0 '}>
<Preview showPreviewChunks={showPreviewChunks} sources={sources} />
<Preview showPreviewChunks={showPreviewChunks} />
</Box>
<Flex mt={2} justifyContent={'flex-end'}>
<Button onClick={goToNext}>{t('common.Next Step')}</Button>

View File

@@ -1,4 +1,4 @@
import React, { useEffect, useState } from 'react';
import React from 'react';
import {
Box,
TableContainer,
@@ -8,164 +8,109 @@ import {
Th,
Td,
Tbody,
Progress,
Flex,
Button
} from '@chakra-ui/react';
import { useImportStore, type FormType } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { postCreateTrainingUsage } from '@/web/support/wallet/usage/api';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { chunksUpload, fileCollectionCreate } from '@/web/core/dataset/utils';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useRouter } from 'next/router';
import { TabEnum } from '../../../index';
import { postCreateDatasetLinkCollection, postDatasetCollection } from '@/web/core/dataset/api';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { checkTeamDatasetSizeLimit } from '@/web/support/user/team/api';
import {
postCreateDatasetCsvTableCollection,
postCreateDatasetFileCollection,
postCreateDatasetLinkCollection,
postCreateDatasetTextCollection
} from '@/web/core/dataset/api';
import { getErrText } from '@fastgpt/global/common/error/utils';
import Tag from '@/components/Tag';
const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
const Upload = () => {
const { t } = useTranslation();
const { toast } = useToast();
const router = useRouter();
const { datasetDetail } = useDatasetStore();
const { parentId, sources, processParamsForm, chunkSize, totalChunks, uploadRate } =
const { importSource, parentId, sources, setSources, processParamsForm, chunkSize } =
useImportStore();
const [uploadList, setUploadList] = useState<
(ImportSourceItemType & {
uploadedFileRate: number;
uploadedChunksRate: number;
})[]
>([]);
const { handleSubmit } = processParamsForm;
const { mutate: startUpload, isLoading } = useRequest({
mutationFn: async ({ mode, customSplitChar, qaPrompt, webSelector }: FormType) => {
if (uploadList.length === 0) return;
await checkTeamDatasetSizeLimit(totalChunks);
let totalInsertion = 0;
if (sources.length === 0) return;
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');
// Batch create collection and upload chunks
for await (const item of uploadList) {
// create collection
const collectionId = await (async () => {
const commonParams = {
parentId,
trainingType: mode,
datasetId: datasetDetail._id,
chunkSize,
chunkSplitter: customSplitChar,
qaPrompt,
name: item.sourceName,
rawTextLength: item.rawText.length,
hashRawText: hashStr(item.rawText)
};
if (item.file) {
return fileCollectionCreate({
file: item.file,
data: {
...commonParams,
collectionMetadata: {
relatedImgId: item.id
for await (const item of filterWaitingSources) {
setSources((state) =>
state.map((source) =>
source.id === item.id
? {
...source,
createStatus: 'creating'
}
},
percentListen: (e) => {
setUploadList((state) =>
state.map((uploadItem) =>
uploadItem.id === item.id
? {
...uploadItem,
uploadedFileRate: e
}
: uploadItem
)
);
}
});
} else if (item.link) {
const { collectionId } = await postCreateDatasetLinkCollection({
...commonParams,
link: item.link,
metadata: {
webPageSelector: webSelector
}
});
setUploadList((state) =>
state.map((uploadItem) =>
uploadItem.id === item.id
? {
...uploadItem,
uploadedFileRate: 100
}
: uploadItem
)
);
return collectionId;
} else if (item.rawText) {
// manual collection
return postDatasetCollection({
...commonParams,
type: DatasetCollectionTypeEnum.virtual
});
}
return '';
})();
: source
)
);
if (!collectionId) continue;
if (item.link) continue;
// create collection
const commonParams = {
parentId,
trainingType: mode,
datasetId: datasetDetail._id,
chunkSize,
chunkSplitter: customSplitChar,
qaPrompt,
const billId = await postCreateTrainingUsage({
name: item.sourceName,
datasetId: datasetDetail._id
});
name: item.sourceName
};
if (importSource === ImportDataSourceEnum.fileLocal && item.dbFileId) {
await postCreateDatasetFileCollection({
...commonParams,
fileId: item.dbFileId
});
} else if (importSource === ImportDataSourceEnum.fileLink && item.link) {
await postCreateDatasetLinkCollection({
...commonParams,
link: item.link,
metadata: {
webPageSelector: webSelector
}
});
} else if (importSource === ImportDataSourceEnum.fileCustom && item.rawText) {
// manual collection
await postCreateDatasetTextCollection({
...commonParams,
text: item.rawText
});
} else if (importSource === ImportDataSourceEnum.csvTable && item.dbFileId) {
await postCreateDatasetCsvTableCollection({
...commonParams,
fileId: item.dbFileId
});
}
// upload chunks
const chunks = item.chunks;
const { insertLen } = await chunksUpload({
collectionId,
billId,
trainingMode: mode,
chunks,
rate: uploadRate,
onUploading: (e) => {
setUploadList((state) =>
state.map((uploadItem) =>
uploadItem.id === item.id
? {
...uploadItem,
uploadedChunksRate: e
}
: uploadItem
)
);
},
prompt: qaPrompt
});
totalInsertion += insertLen;
setSources((state) =>
state.map((source) =>
source.id === item.id
? {
...source,
createStatus: 'finish'
}
: source
)
);
}
return totalInsertion;
},
onSuccess(num) {
if (showPreviewChunks) {
toast({
title: t('core.dataset.import.Import Success Tip', { num }),
status: 'success'
});
} else {
toast({
title: t('core.dataset.import.Upload success'),
status: 'success'
});
}
onSuccess() {
toast({
title: t('core.dataset.import.Import success'),
status: 'success'
});
// close import page
router.replace({
@@ -175,21 +120,21 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
}
});
},
onError() {
setSources((state) =>
state.map((source) =>
source.createStatus === 'creating'
? {
...source,
createStatus: 'waiting'
}
: source
)
);
},
errorToast: t('common.file.Upload failed')
});
useEffect(() => {
setUploadList(
sources.map((item) => {
return {
...item,
uploadedFileRate: item.file ? 0 : -1,
uploadedChunksRate: 0
};
})
);
}, []);
return (
<Box>
<TableContainer>
@@ -199,85 +144,35 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
<Th borderLeftRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
{t('core.dataset.import.Source name')}
</Th>
{showPreviewChunks ? (
<>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.Chunk amount')}
</Th>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload file progress')}
</Th>
<Th borderRightRadius={'md'} overflow={'hidden'} borderBottom={'none'} py={4}>
{t('core.dataset.import.Data file progress')}
</Th>
</>
) : (
<>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload status')}
</Th>
</>
)}
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload status')}
</Th>
</Tr>
</Thead>
<Tbody>
{uploadList.map((item) => (
{sources.map((item) => (
<Tr key={item.id}>
<Td display={'flex'} alignItems={'center'}>
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
{item.sourceName}
<Td>
<Flex alignItems={'center'}>
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
<Box whiteSpace={'wrap'} maxW={'30vw'}>
{item.sourceName}
</Box>
</Flex>
</Td>
<Td>
<Box display={'inline-block'}>
{item.createStatus === 'waiting' && (
<Tag colorSchema={'gray'}>{t('common.Waiting')}</Tag>
)}
{item.createStatus === 'creating' && (
<Tag colorSchema={'blue'}>{t('common.Creating')}</Tag>
)}
{item.createStatus === 'finish' && (
<Tag colorSchema={'green'}>{t('common.Finish')}</Tag>
)}
</Box>
</Td>
{showPreviewChunks ? (
<>
<Td>{item.chunks.length}</Td>
<Td>
{item.uploadedFileRate === -1 ? (
'-'
) : (
<Flex alignItems={'center'} fontSize={'xs'}>
<Progress
value={item.uploadedFileRate}
h={'6px'}
w={'100%'}
maxW={'210px'}
size="sm"
borderRadius={'20px'}
colorScheme={'blue'}
bg="myGray.200"
hasStripe
isAnimated
mr={2}
/>
{`${item.uploadedFileRate}%`}
</Flex>
)}
</Td>
<Td>
<Flex alignItems={'center'} fontSize={'xs'}>
<Progress
value={item.uploadedChunksRate}
h={'6px'}
w={'100%'}
maxW={'210px'}
size="sm"
borderRadius={'20px'}
colorScheme={'purple'}
bg="myGray.200"
hasStripe
isAnimated
mr={2}
/>
{`${item.uploadedChunksRate}%`}
</Flex>
</Td>
</>
) : (
<>
<Td color={item.uploadedFileRate === 100 ? 'green.600' : 'myGray.600'}>
{item.uploadedFileRate === 100 ? t('common.Finish') : t('common.Waiting')}
</Td>
</>
)}
</Tr>
))}
</Tbody>
@@ -286,8 +181,8 @@ const Upload = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
<Flex justifyContent={'flex-end'} mt={4}>
<Button isLoading={isLoading} onClick={handleSubmit((data) => startUpload(data))}>
{uploadList.length > 0
? `${t('core.dataset.import.Total files', { total: uploadList.length })} | `
{sources.length > 0
? `${t('core.dataset.import.Total files', { total: sources.length })} | `
: ''}
{t('core.dataset.import.Start upload')}
</Button>

View File

@@ -0,0 +1,296 @@
import MyBox from '@/components/common/MyBox';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { Box, FlexProps } from '@chakra-ui/react';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import React, { DragEvent, useCallback, useMemo, useState } from 'react';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { uploadFile2DB } from '@/web/common/file/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { ImportSourceItemType } from '@/web/core/dataset/type';
export type SelectFileItemType = {
fileId: string;
folderPath: string;
file: File;
};
const FileSelector = ({
fileType,
selectFiles,
setSelectFiles,
onStartSelect,
onFinishSelect,
...props
}: {
fileType: string;
selectFiles: ImportSourceItemType[];
setSelectFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
onStartSelect: () => void;
onFinishSelect: () => void;
} & FlexProps) => {
const { t } = useTranslation();
const { toast } = useToast();
const { feConfigs } = useSystemStore();
const maxCount = feConfigs?.uploadFileMaxAmount || 1000;
const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024;
const { File, onOpen } = useSelectFile({
fileType,
multiple: true,
maxCount
});
const [isDragging, setIsDragging] = useState(false);
const isMaxSelected = useMemo(
() => selectFiles.length >= maxCount,
[maxCount, selectFiles.length]
);
const filterTypeReg = new RegExp(
`(${fileType
.split(',')
.map((item) => item.trim())
.join('|')})$`,
'i'
);
const { mutate: onSelectFile, isLoading } = useRequest({
mutationFn: async (files: SelectFileItemType[]) => {
{
onStartSelect();
setSelectFiles((state) => {
const formatFiles = files.map<ImportSourceItemType>((selectFile) => {
const { fileId, file } = selectFile;
return {
id: fileId,
createStatus: 'waiting',
file,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
isUploading: true,
uploadedFileRate: 0
};
});
const results = formatFiles.concat(state).slice(0, maxCount);
return results;
});
try {
// upload file
await Promise.all(
files.map(async ({ fileId, file }) => {
const uploadFileId = await uploadFile2DB({
file,
bucketName: BucketNameEnum.dataset,
percentListen: (e) => {
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
uploadedFileRate: e
}
: item
)
);
}
});
setSelectFiles((state) =>
state.map((item) =>
item.id === fileId
? {
...item,
dbFileId: uploadFileId,
isUploading: false
}
: item
)
);
})
);
} catch (error) {
console.log(error);
}
onFinishSelect();
}
}
});
const selectFileCallback = useCallback(
(files: SelectFileItemType[]) => {
if (selectFiles.length + files.length > maxCount) {
files = files.slice(0, maxCount - selectFiles.length);
toast({
status: 'warning',
title: t('common.file.Some file count exceeds limit', { maxCount })
});
}
// size check
if (!maxSize) {
return onSelectFile(files);
}
const filterFiles = files.filter((item) => item.file.size <= maxSize);
if (filterFiles.length < files.length) {
toast({
status: 'warning',
title: t('common.file.Some file size exceeds limit', { maxSize: formatFileSize(maxSize) })
});
}
return onSelectFile(filterFiles);
},
[maxCount, maxSize, onSelectFile, selectFiles.length, t, toast]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const items = e.dataTransfer.items;
const fileList: SelectFileItemType[] = [];
if (e.dataTransfer.items.length <= 1) {
const traverseFileTree = async (item: any) => {
return new Promise<void>((resolve, reject) => {
if (item.isFile) {
item.file((file: File) => {
const folderPath = (item.fullPath || '').split('/').slice(2, -1).join('/');
if (filterTypeReg.test(file.name)) {
fileList.push({
fileId: getNanoid(),
folderPath,
file
});
}
resolve();
});
} else if (item.isDirectory) {
const dirReader = item.createReader();
dirReader.readEntries(async (entries: any[]) => {
for (let i = 0; i < entries.length; i++) {
await traverseFileTree(entries[i]);
}
resolve();
});
}
});
};
for await (const item of items) {
await traverseFileTree(item.webkitGetAsEntry());
}
} else {
const files = Array.from(e.dataTransfer.files);
let isErr = files.some((item) => item.type === '');
if (isErr) {
return toast({
title: t('file.upload error description'),
status: 'error'
});
}
fileList.push(
...files
.filter((item) => filterTypeReg.test(item.name))
.map((file) => ({
fileId: getNanoid(),
folderPath: '',
file
}))
);
}
selectFileCallback(fileList.slice(0, maxCount));
};
return (
<MyBox
isLoading={isLoading}
display={'flex'}
flexDirection={'column'}
alignItems={'center'}
justifyContent={'center'}
px={3}
py={[4, 7]}
borderWidth={'1.5px'}
borderStyle={'dashed'}
borderRadius={'md'}
{...(isMaxSelected
? {}
: {
cursor: 'pointer',
_hover: {
bg: 'primary.50',
borderColor: 'primary.600'
},
borderColor: isDragging ? 'primary.600' : 'borderColor.high',
onDragEnter: handleDragEnter,
onDragOver: (e) => e.preventDefault(),
onDragLeave: handleDragLeave,
onDrop: handleDrop,
onClick: onOpen
})}
{...props}
>
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
{isMaxSelected ? (
<>
<Box color={'myGray.500'} fontSize={'xs'}>
</Box>
</>
) : (
<>
<Box fontWeight={'bold'}>
{isDragging
? t('file.Release the mouse to upload the file')
: t('common.file.Select and drag file tip')}
</Box>
{/* file type */}
<Box color={'myGray.500'} fontSize={'xs'}>
{t('common.file.Support file type', { fileType })}
</Box>
<Box color={'myGray.500'} fontSize={'xs'}>
{/* max count */}
{maxCount && t('common.file.Support max count', { maxCount })}
{/* max size */}
{maxSize && t('common.file.Support max size', { maxSize: formatFileSize(maxSize) })}
</Box>
<File
onSelect={(files) =>
selectFileCallback(
files.map((file) => ({
fileId: getNanoid(),
folderPath: '',
file
}))
)
}
/>
</>
)}
</MyBox>
);
};
export default React.memo(FileSelector);

View File

@@ -3,9 +3,9 @@ import MyModal from '@fastgpt/web/components/common/MyModal';
import { ModalBody, ModalFooter, Button } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
import { ImportDataSourceEnum } from '..';
import { useRouter } from 'next/router';
import { TabEnum } from '../../..';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
const FileModeSelector = ({ onClose }: { onClose: () => void }) => {
const { t } = useTranslation();

View File

@@ -1,132 +1,94 @@
import React, { useMemo, useState } from 'react';
import { Box, Flex } from '@chakra-ui/react';
import React, { useState } from 'react';
import { Box, Flex, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import RowTabs from '@fastgpt/web/components/common/Tabs/RowTabs';
import { useImportStore } from '../Provider';
import MyMenu from '@/components/MyMenu';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import dynamic from 'next/dynamic';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
const PreviewChunks = dynamic(() => import('./PreviewChunks'));
enum PreviewListEnum {
chunks = 'chunks',
sources = 'sources'
}
const Preview = ({
sources,
showPreviewChunks
}: {
sources: ImportSourceItemType[];
showPreviewChunks: boolean;
}) => {
const Preview = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
const { t } = useTranslation();
const [previewListType, setPreviewListType] = useState(
showPreviewChunks ? PreviewListEnum.chunks : PreviewListEnum.sources
);
const chunks = useMemo(() => {
const oneSourceChunkLength = Math.max(4, Math.floor(50 / sources.length));
return sources
.map((source) =>
source.chunks.slice(0, oneSourceChunkLength).map((chunk, i) => ({
...chunk,
index: i + 1,
sourceName: source.sourceName,
sourceIcon: source.icon
}))
)
.flat();
}, [sources]);
const { sources } = useImportStore();
const [previewRawTextSource, setPreviewRawTextSource] = useState<ImportSourceItemType>();
const [previewChunkSource, setPreviewChunkSource] = useState<ImportSourceItemType>();
return (
<Box h={'100%'} display={['block', 'flex']} flexDirection={'column'} flex={'1 0 0'}>
<Box>
<RowTabs
list={[
...(showPreviewChunks
? [
{
icon: 'common/viewLight',
label: t('core.dataset.import.Preview chunks'),
value: PreviewListEnum.chunks
<Box h={'100%'} display={['block', 'flex']} flexDirection={'column'}>
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/fileCollection'} w={'20px'} />
<Box fontSize={'lg'}>{t('core.dataset.import.Sources list')}</Box>
</Flex>
<Box mt={3} flex={'1 0 0'} width={'100%'} overflow={'auto'}>
{sources.map((source) => (
<Flex
key={source.id}
bg={'white'}
p={4}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
mb={3}
alignItems={'center'}
>
<MyIcon name={source.icon as any} w={'16px'} />
<Box mx={1} flex={'1 0 0'} w={0} className="textEllipsis">
{source.sourceName}
</Box>
{showPreviewChunks && (
<Box fontSize={'xs'} color={'myGray.600'}>
<MyMenu
Button={
<IconButton
icon={<MyIcon name={'common/viewLight'} w={'14px'} p={2} />}
aria-label={''}
size={'sm'}
variant={'whitePrimary'}
/>
}
]
: []),
{
icon: 'core/dataset/fileCollection',
label: t('core.dataset.import.Sources list'),
value: PreviewListEnum.sources
}
]}
value={previewListType}
onChange={(e) => setPreviewListType(e as PreviewListEnum)}
/>
</Box>
<Box mt={3} flex={'1 0 0'} overflow={'auto'}>
{previewListType === PreviewListEnum.chunks ? (
<>
{chunks.map((chunk, i) => (
<Box
key={i}
p={4}
bg={'white'}
mb={3}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
whiteSpace={'pre-wrap'}
>
<Flex mb={1} alignItems={'center'} fontSize={'sm'}>
<Box
flexShrink={0}
px={1}
color={'primary.600'}
borderWidth={'1px'}
borderColor={'primary.200'}
bg={'primary.50'}
borderRadius={'sm'}
>
# {chunk.index}
</Box>
<Flex ml={2} fontWeight={'bold'} alignItems={'center'} gap={1}>
<MyIcon name={chunk.sourceIcon as any} w={'14px'} />
{chunk.sourceName}
</Flex>
</Flex>
<Box fontSize={'xs'} whiteSpace={'pre-wrap'} wordBreak={'break-all'}>
<Box color={'myGray.900'}>{chunk.q}</Box>
<Box color={'myGray.500'}>{chunk.a}</Box>
</Box>
menuList={[
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/fileCollection'} w={'14px'} mr={2} />
{t('core.dataset.import.Preview raw text')}
</Flex>
),
onClick: () => setPreviewRawTextSource(source)
},
{
label: (
<Flex alignItems={'center'}>
<MyIcon name={'core/dataset/splitLight'} w={'14px'} mr={2} />
{t('core.dataset.import.Preview chunks')}
</Flex>
),
onClick: () => setPreviewChunkSource(source)
}
]}
/>
</Box>
))}
</>
) : (
<>
{sources.map((source) => (
<Flex
key={source.id}
bg={'white'}
p={4}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
mb={3}
>
<MyIcon name={source.icon as any} w={'16px'} />
<Box mx={1} flex={'1 0 0'} className="textEllipsis">
{source.sourceName}
</Box>
{showPreviewChunks && (
<Box>
{t('core.dataset.import.File chunk amount', { amount: source.chunks.length })}
</Box>
)}
</Flex>
))}
</>
)}
)}
</Flex>
))}
</Box>
{!!previewRawTextSource && (
<PreviewRawText
previewSource={previewRawTextSource}
onClose={() => setPreviewRawTextSource(undefined)}
/>
)}
{!!previewChunkSource && (
<PreviewChunks
previewSource={previewChunkSource}
onClose={() => setPreviewChunkSource(undefined)}
/>
)}
</Box>
);
};

View File

@@ -0,0 +1,95 @@
import React, { useMemo } from 'react';
import { Box } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { useQuery } from '@tanstack/react-query';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { getPreviewChunks } from '@/web/core/dataset/api';
import { useImportStore } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
const PreviewChunks = ({
previewSource,
onClose
}: {
previewSource: ImportSourceItemType;
onClose: () => void;
}) => {
const { toast } = useToast();
const { importSource, chunkSize, chunkOverlapRatio, processParamsForm } = useImportStore();
const { data = [], isLoading } = useQuery(
['previewSource'],
() => {
if (
importSource === ImportDataSourceEnum.fileLocal ||
importSource === ImportDataSourceEnum.csvTable ||
importSource === ImportDataSourceEnum.fileLink
) {
return getPreviewChunks({
type: importSource,
sourceId: previewSource.dbFileId || previewSource.link || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar')
});
} else if (importSource === ImportDataSourceEnum.fileCustom) {
const customSplitChar = processParamsForm.getValues('customSplitChar');
const { chunks } = splitText2Chunks({
text: previewSource.rawText || '',
chunkLen: chunkSize,
overlapRatio: chunkOverlapRatio,
customReg: customSplitChar ? [customSplitChar] : []
});
return chunks.map((chunk) => ({
q: chunk,
a: ''
}));
}
return [];
},
{
onError(err) {
toast({
status: 'warning',
title: getErrText(err)
});
}
}
);
return (
<MyRightDrawer
onClose={onClose}
iconSrc={previewSource.icon}
title={previewSource.sourceName}
isLoading={isLoading}
maxW={['90vw', '40vw']}
>
{data.map((item, index) => (
<Box
key={index}
whiteSpace={'pre-wrap'}
fontSize={'sm'}
p={4}
bg={index % 2 === 0 ? 'white' : 'myWhite.600'}
mb={3}
borderRadius={'md'}
borderWidth={'1px'}
borderColor={'borderColor.low'}
boxShadow={'2'}
_notLast={{
mb: 2
}}
>
<Box color={'myGray.900'}>{item.q}</Box>
<Box color={'myGray.500'}>{item.a}</Box>
</Box>
))}
</MyRightDrawer>
);
};
export default React.memo(PreviewChunks);

View File

@@ -1,28 +1,73 @@
import React from 'react';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { ModalBody } from '@chakra-ui/react';
export type PreviewRawTextProps = {
icon: string;
title: string;
rawText: string;
};
import { Box } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { useQuery } from '@tanstack/react-query';
import { getPreviewFileContent } from '@/web/common/file/api';
import MyRightDrawer from '@fastgpt/web/components/common/MyDrawer/MyRightDrawer';
import { useImportStore } from '../Provider';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
const PreviewRawText = ({
icon,
title,
rawText,
previewSource,
onClose
}: PreviewRawTextProps & {
}: {
previewSource: ImportSourceItemType;
onClose: () => void;
}) => {
const { toast } = useToast();
const { importSource } = useImportStore();
const { data, isLoading } = useQuery(
['previewSource', previewSource?.dbFileId],
() => {
if (importSource === ImportDataSourceEnum.fileLocal && previewSource.dbFileId) {
return getPreviewFileContent({
fileId: previewSource.dbFileId,
csvFormat: true
});
}
if (importSource === ImportDataSourceEnum.csvTable && previewSource.dbFileId) {
return getPreviewFileContent({
fileId: previewSource.dbFileId,
csvFormat: false
});
}
if (importSource === ImportDataSourceEnum.fileCustom) {
return {
previewContent: (previewSource.rawText || '').slice(0, 3000)
};
}
return {
previewContent: ''
};
},
{
onError(err) {
toast({
status: 'warning',
title: getErrText(err)
});
}
}
);
const rawText = data?.previewContent || '';
return (
<MyModal isOpen onClose={onClose} iconSrc={icon} title={title}>
<ModalBody whiteSpace={'pre-wrap'} overflowY={'auto'}>
<MyRightDrawer
onClose={onClose}
iconSrc={previewSource.icon}
title={previewSource.sourceName}
isLoading={isLoading}
>
<Box whiteSpace={'pre-wrap'} overflowY={'auto'} fontSize={'sm'}>
{rawText}
</ModalBody>
</MyModal>
</Box>
</MyRightDrawer>
);
};
export default PreviewRawText;
export default React.memo(PreviewRawText);

View File

@@ -0,0 +1,119 @@
import React, { useState } from 'react';
import {
Flex,
TableContainer,
Table,
Thead,
Tr,
Th,
Td,
Tbody,
Progress,
IconButton
} from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import MyTooltip from '@/components/MyTooltip';
import dynamic from 'next/dynamic';
const PreviewRawText = dynamic(() => import('./PreviewRawText'));
export const RenderUploadFiles = ({
files,
setFiles,
showPreviewContent
}: {
files: ImportSourceItemType[];
setFiles: React.Dispatch<React.SetStateAction<ImportSourceItemType[]>>;
showPreviewContent?: boolean;
}) => {
const { t } = useTranslation();
const [previewFile, setPreviewFile] = useState<ImportSourceItemType>();
return files.length > 0 ? (
<>
<TableContainer mt={5}>
<Table variant={'simple'} fontSize={'sm'} draggable={false}>
<Thead draggable={false}>
<Tr bg={'myGray.100'} mb={2}>
<Th borderLeftRadius={'md'} borderBottom={'none'} py={4}>
{t('common.file.File Name')}
</Th>
<Th borderBottom={'none'} py={4}>
{t('core.dataset.import.Upload file progress')}
</Th>
<Th borderBottom={'none'} py={4}>
{t('common.file.File Size')}
</Th>
<Th borderRightRadius={'md'} borderBottom={'none'} py={4}>
{t('common.Action')}
</Th>
</Tr>
</Thead>
<Tbody>
{files.map((item) => (
<Tr key={item.id}>
<Td>
<Flex alignItems={'center'}>
<MyIcon name={item.icon as any} w={'16px'} mr={1} />
{item.sourceName}
</Flex>
</Td>
<Td>
<Flex alignItems={'center'} fontSize={'xs'}>
<Progress
value={item.uploadedFileRate}
h={'6px'}
w={'100%'}
maxW={'210px'}
size="sm"
borderRadius={'20px'}
colorScheme={(item.uploadedFileRate || 0) >= 100 ? 'green' : 'blue'}
bg="myGray.200"
hasStripe
isAnimated
mr={2}
/>
{`${item.uploadedFileRate}%`}
</Flex>
</Td>
<Td>{item.sourceSize}</Td>
<Td>
{!item.isUploading && (
<Flex alignItems={'center'} gap={4}>
{showPreviewContent && (
<MyTooltip label={t('core.dataset.import.Preview raw text')}>
<IconButton
variant={'whitePrimary'}
size={'sm'}
icon={<MyIcon name={'common/viewLight'} w={'18px'} />}
aria-label={''}
onClick={() => setPreviewFile(item)}
/>
</MyTooltip>
)}
<IconButton
variant={'grayDanger'}
size={'sm'}
icon={<MyIcon name={'delete'} w={'14px'} />}
aria-label={''}
onClick={() => {
setFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
)}
</Td>
</Tr>
))}
</Tbody>
</Table>
</TableContainer>
{!!previewFile && (
<PreviewRawText previewSource={previewFile} onClose={() => setPreviewFile(undefined)} />
)}
</>
) : null;
};

View File

@@ -1,4 +1,4 @@
import React, { useEffect } from 'react';
import React, { useCallback, useEffect } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
import dynamic from 'next/dynamic';
@@ -19,7 +19,7 @@ const CustomTet = ({ activeStep, goToNext }: ImportDataComponentProps) => {
<>
{activeStep === 0 && <CustomTextInput goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks />}
{activeStep === 2 && <Upload />}
</>
);
};
@@ -36,6 +36,24 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
}
});
const onSubmit = useCallback(
(data: { name: string; value: string }) => {
const fileId = getNanoid(32);
setSources([
{
id: fileId,
createStatus: 'waiting',
rawText: data.value,
sourceName: data.name,
icon: 'file/fill/manual'
}
]);
goToNext();
},
[goToNext, setSources]
);
useEffect(() => {
const source = sources[0];
if (source) {
@@ -78,25 +96,7 @@ const CustomTextInput = ({ goToNext }: { goToNext: () => void }) => {
/>
</Box>
<Flex mt={5} justifyContent={'flex-end'}>
<Button
onClick={handleSubmit((data) => {
const fileId = getNanoid(32);
setSources([
{
id: fileId,
rawText: data.value,
chunks: [],
chunkChars: 0,
sourceName: data.name,
icon: 'file/fill/manual'
}
]);
goToNext();
})}
>
{t('common.Next Step')}
</Button>
<Button onClick={handleSubmit((data) => onSubmit(data))}>{t('common.Next Step')}</Button>
</Flex>
</Box>
);

View File

@@ -23,7 +23,7 @@ const LinkCollection = ({ activeStep, goToNext }: ImportDataComponentProps) => {
<>
{activeStep === 0 && <CustomLinkImport goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks={false} goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks={false} />}
{activeStep === 2 && <Upload />}
</>
);
};
@@ -128,10 +128,8 @@ const CustomLinkImport = ({ goToNext }: { goToNext: () => void }) => {
setSources(
newLinkList.map((link) => ({
id: getNanoid(32),
createStatus: 'waiting',
link,
rawText: '',
chunks: [],
chunkChars: 0,
sourceName: link,
icon: LinkCollectionIcon
}))

View File

@@ -1,41 +1,27 @@
import React, { useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
import { Box, Button, Flex } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import React, { useCallback, useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button } from '@chakra-ui/react';
import FileSelector from '../components/FileSelector';
import { useTranslation } from 'next-i18next';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { readFileRawContent } from '@fastgpt/web/common/file/read';
import { getUploadBase64ImgController } from '@/web/common/file/controller';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import MyTooltip from '@/components/MyTooltip';
import type { PreviewRawTextProps } from '../components/PreviewRawText';
import { useImportStore } from '../Provider';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import dynamic from 'next/dynamic';
import Loading from '@fastgpt/web/components/common/MyLoading';
import { RenderUploadFiles } from '../components/RenderFiles';
const DataProcess = dynamic(() => import('../commonProgress/DataProcess'), {
loading: () => <Loading fixed={false} />
});
const Upload = dynamic(() => import('../commonProgress/Upload'));
const PreviewRawText = dynamic(() => import('../components/PreviewRawText'));
type FileItemType = ImportSourceItemType & { file: File };
const fileType = '.txt, .docx, .csv, .pdf, .md, .html';
const maxSelectFileCount = 1000;
const fileType = '.txt, .docx, .csv, .xlsx, .pdf, .md, .html, .pptx';
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
return (
<>
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
{activeStep === 1 && <DataProcess showPreviewChunks goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks />}
{activeStep === 2 && <Upload />}
</>
);
};
@@ -44,135 +30,47 @@ export default React.memo(FileLocal);
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { sources, setSources } = useImportStore();
// @ts-ignore
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
sources.map((source) => ({
isUploading: false,
...source
}))
);
const [uploading, setUploading] = useState(false);
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
const [previewRaw, setPreviewRaw] = useState<PreviewRawTextProps>();
useEffect(() => {
setSources(successFiles);
}, [successFiles]);
}, [setSources, successFiles]);
const { mutate: onSelectFile, isLoading } = useRequest({
mutationFn: async (files: SelectFileItemType[]) => {
{
for await (const selectFile of files) {
const { file, folderPath } = selectFile;
const relatedId = getNanoid(32);
const { rawText } = await (() => {
try {
return readFileRawContent({
file,
uploadBase64Controller: (base64Img) =>
getUploadBase64ImgController({
base64Img,
type: MongoImageTypeEnum.collectionImage,
metadata: {
relatedId
}
})
});
} catch (error) {
return { rawText: '' };
}
})();
const item: FileItemType = {
id: relatedId,
file,
rawText,
chunks: [],
chunkChars: 0,
sourceFolderPath: folderPath,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
errorMsg: rawText.length === 0 ? t('common.file.Empty file tip') : ''
};
setSelectFiles((state) => {
const results = [item].concat(state).slice(0, maxSelectFileCount);
return results;
});
}
}
}
});
const onclickNext = useCallback(() => {
// filter uploaded files
setSelectFiles((state) => state.filter((item) => (item.uploadedFileRate || 0) >= 100));
goToNext();
}, [goToNext]);
return (
<Box>
<FileSelector
isLoading={isLoading}
fileType={fileType}
multiple
maxCount={maxSelectFileCount}
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024}
onSelectFile={onSelectFile}
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
onStartSelect={() => setUploading(true)}
onFinishSelect={() => setUploading(false)}
/>
{/* render files */}
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}>
{selectFiles.map((item) => (
<MyTooltip key={item.id} label={t('core.dataset.import.Preview raw text')}>
<Flex
alignItems={'center'}
px={4}
py={3}
borderRadius={'md'}
bg={'myGray.100'}
cursor={'pointer'}
onClick={() =>
setPreviewRaw({
icon: item.icon,
title: item.sourceName,
rawText: item.rawText.slice(0, 10000)
})
}
>
<MyIcon name={item.icon as any} w={'16px'} />
<Box ml={1} mr={3}>
{item.sourceName}
</Box>
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
{item.sourceSize}
{item.rawText.length > 0 && (
<>,{t('common.Number of words', { amount: item.rawText.length })}</>
)}
</Box>
{item.errorMsg && (
<MyTooltip label={item.errorMsg}>
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
</MyTooltip>
)}
<MyIcon
name={'common/closeLight'}
w={'14px'}
color={'myGray.500'}
cursor={'pointer'}
onClick={(e) => {
e.stopPropagation();
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
</MyTooltip>
))}
</Flex>
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} showPreviewContent />
<Box textAlign={'right'}>
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}>
<Box textAlign={'right'} mt={5}>
<Button isDisabled={successFiles.length === 0 || uploading} onClick={onclickNext}>
{selectFiles.length > 0
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
: ''}
{t('common.Next Step')}
</Button>
</Box>
{previewRaw && <PreviewRawText {...previewRaw} onClose={() => setPreviewRaw(undefined)} />}
</Box>
);
});

View File

@@ -1,108 +1,62 @@
import React, { useEffect, useMemo, useState } from 'react';
import { ImportDataComponentProps } from '@/web/core/dataset/type.d';
import { Box, Button, Flex } from '@chakra-ui/react';
import { ImportSourceItemType } from '@/web/core/dataset/type.d';
import FileSelector, { type SelectFileItemType } from '@/web/core/dataset/components/FileSelector';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { ImportDataComponentProps, ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button } from '@chakra-ui/react';
import FileSelector from '../components/FileSelector';
import { useTranslation } from 'next-i18next';
import { getNanoid } from '@fastgpt/global/common/string/tools';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import MyTooltip from '@/components/MyTooltip';
import { useImportStore } from '../Provider';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import dynamic from 'next/dynamic';
import { fileDownload } from '@/web/common/file/utils';
import { readCsvContent } from '@fastgpt/web/common/file/read/csv';
import { RenderUploadFiles } from '../components/RenderFiles';
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
const Upload = dynamic(() => import('../commonProgress/Upload'));
type FileItemType = ImportSourceItemType & { file: File };
const fileType = '.csv';
const maxSelectFileCount = 1000;
const FileLocal = ({ activeStep, goToNext }: ImportDataComponentProps) => {
return (
<>
{activeStep === 0 && <SelectFile goToNext={goToNext} />}
{activeStep === 1 && <PreviewData showPreviewChunks goToNext={goToNext} />}
{activeStep === 2 && <Upload showPreviewChunks />}
{activeStep === 2 && <Upload />}
</>
);
};
export default React.memo(FileLocal);
const csvTemplate = `index,content
"必填内容","可选内容。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
const csvTemplate = `"第一列内容","第二列内容"
"必填","可选。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
"只会讲第一和第二列内容导入,其余列会被忽略",""
"结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。",""
"AIGC发展分为几个阶段","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`;
const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () => void }) {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { sources, setSources } = useImportStore();
// @ts-ignore
const [selectFiles, setSelectFiles] = useState<FileItemType[]>(sources);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
sources.map((source) => ({
isUploading: false,
...source
}))
);
const [uploading, setUploading] = useState(false);
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
useEffect(() => {
setSources(successFiles);
}, [successFiles]);
const { mutate: onSelectFile, isLoading } = useRequest({
mutationFn: async (files: SelectFileItemType[]) => {
{
for await (const selectFile of files) {
const { file, folderPath } = selectFile;
const { header, data } = await readCsvContent({ file });
const filterData: FileItemType['chunks'] = data
.filter((item) => item[0])
.map((item) => ({
q: item[0] || '',
a: item[1] || '',
chunkIndex: 0
}));
const item: FileItemType = {
id: getNanoid(32),
file,
rawText: '',
chunks: filterData,
chunkChars: 0,
sourceFolderPath: folderPath,
sourceName: file.name,
sourceSize: formatFileSize(file.size),
icon: getFileIcon(file.name),
errorMsg:
header[0] !== 'index' || header[1] !== 'content' || filterData.length === 0
? t('core.dataset.import.Csv format error')
: ''
};
setSelectFiles((state) => {
const results = [item].concat(state).slice(0, 10);
return results;
});
}
}
},
errorToast: t('common.file.Select failed')
});
return (
<Box>
<FileSelector
multiple
maxCount={maxSelectFileCount}
maxSize={(feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024}
isLoading={isLoading}
fileType={fileType}
onSelectFile={onSelectFile}
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
onStartSelect={() => setUploading(true)}
onFinishSelect={() => setUploading(false)}
/>
<Box
@@ -122,43 +76,16 @@ const SelectFile = React.memo(function SelectFile({ goToNext }: { goToNext: () =
</Box>
{/* render files */}
<Flex my={4} flexWrap={'wrap'} gap={5} alignItems={'center'}>
{selectFiles.map((item) => (
<Flex
key={item.id}
alignItems={'center'}
px={4}
py={2}
borderRadius={'md'}
bg={'myGray.100'}
>
<MyIcon name={item.icon as any} w={'16px'} />
<Box ml={1} mr={3}>
{item.sourceName}
</Box>
<Box mr={1} fontSize={'xs'} color={'myGray.500'}>
{item.sourceSize}
</Box>
{item.errorMsg && (
<MyTooltip label={item.errorMsg}>
<MyIcon name={'common/errorFill'} w={'14px'} mr={3} />
</MyTooltip>
)}
<MyIcon
name={'common/closeLight'}
w={'14px'}
color={'myGray.500'}
cursor={'pointer'}
onClick={() => {
setSelectFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
))}
</Flex>
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
<Box textAlign={'right'}>
<Button isDisabled={successFiles.length === 0 || isLoading} onClick={goToNext}>
<Box textAlign={'right'} mt={5}>
<Button
isDisabled={successFiles.length === 0 || uploading}
onClick={() => {
setSelectFiles((state) => state.filter((item) => !item.errorMsg));
goToNext();
}}
>
{selectFiles.length > 0
? `${t('core.dataset.import.Total files', { total: selectFiles.length })} | `
: ''}

View File

@@ -6,22 +6,15 @@ import { useRouter } from 'next/router';
import { TabEnum } from '../../index';
import { useMyStep } from '@fastgpt/web/hooks/useStep';
import dynamic from 'next/dynamic';
import Provider from './Provider';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import Provider from './Provider';
const FileLocal = dynamic(() => import('./diffSource/FileLocal'));
const FileLink = dynamic(() => import('./diffSource/FileLink'));
const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
const TableLocal = dynamic(() => import('./diffSource/TableLocal'));
export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
tableLocal = 'tableLocal'
}
const ImportDataset = () => {
const { t } = useTranslation();
const router = useRouter();
@@ -65,7 +58,7 @@ const ImportDataset = () => {
title: t('core.dataset.import.Upload data')
}
],
[ImportDataSourceEnum.tableLocal]: [
[ImportDataSourceEnum.csvTable]: [
{
title: t('core.dataset.import.Select file')
},
@@ -88,7 +81,7 @@ const ImportDataset = () => {
if (source === ImportDataSourceEnum.fileLocal) return FileLocal;
if (source === ImportDataSourceEnum.fileLink) return FileLink;
if (source === ImportDataSourceEnum.fileCustom) return FileCustomText;
if (source === ImportDataSourceEnum.tableLocal) return TableLocal;
if (source === ImportDataSourceEnum.csvTable) return TableLocal;
}, [source]);
return ImportComponent ? (
@@ -142,7 +135,7 @@ const ImportDataset = () => {
<MyStep />
</Box>
</Box>
<Provider dataset={datasetDetail} parentId={parentId}>
<Provider dataset={datasetDetail} parentId={parentId} importSource={source}>
<Box flex={'1 0 0'} overflow={'auto'} position={'relative'}>
<ImportComponent activeStep={activeStep} goToNext={goToNext} />
</Box>

View File

@@ -0,0 +1,7 @@
import { ImportSourceItemType } from '@/web/core/dataset/type';
export type UploadFileItemType = ImportSourceItemType & {
file?: File;
isUploading: boolean;
uploadedFileRate: number;
};

View File

@@ -1,19 +1,5 @@
import React, { useEffect, useMemo, useState } from 'react';
import {
Box,
Textarea,
Button,
Flex,
useTheme,
useDisclosure,
Table,
Thead,
Tbody,
Tr,
Th,
Td,
TableContainer
} from '@chakra-ui/react';
import { Box, Textarea, Button, Flex, useTheme, useDisclosure } from '@chakra-ui/react';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useSearchTestStore, SearchTestStoreItemType } from '@/web/core/dataset/store/searchTest';
import { postSearchText } from '@/web/core/dataset/api';
@@ -36,10 +22,7 @@ import { useForm } from 'react-hook-form';
import MySelect from '@fastgpt/web/components/common/MySelect';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { fileDownload } from '@/web/common/file/utils';
import { readCsvContent } from '@fastgpt/web/common/file/read/csv';
import { delay } from '@fastgpt/global/common/system/utils';
import QuoteItem from '@/components/core/dataset/QuoteItem';
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import SearchParamsTip from '@/components/core/dataset/SearchParamsTip';
@@ -134,34 +117,6 @@ const Test = ({ datasetId }: { datasetId: string }) => {
});
}
});
// const { mutate: onFileTest, isLoading: fileTestIsLoading } = useRequest({
// mutationFn: async ({ searchParams }: FormType) => {
// if (!selectFile) return Promise.reject('File is not selected');
// const { data } = await readCsvContent({ file: selectFile });
// const testList = data.slice(0, 100);
// const results: SearchTestResponse[] = [];
// for await (const item of testList) {
// try {
// const result = await postSearchText({ datasetId, text: item[0].trim(), ...searchParams });
// results.push(result);
// } catch (error) {
// await delay(500);
// }
// }
// return results;
// },
// onSuccess(res: SearchTestResponse[]) {
// console.log(res);
// },
// onError(err) {
// toast({
// title: getErrText(err),
// status: 'error'
// });
// }
// });
const onSelectFile = async (files: File[]) => {
const file = files[0];