perf: backup import (#4866)

* i18n

* remove invalid code

* perf: backup import

* backup tip

* fix: indexsize invalid
This commit is contained in:
Archer
2025-05-22 15:53:51 +08:00
committed by GitHub
parent dd3c251603
commit 88bd3aaa9e
67 changed files with 751 additions and 388 deletions

View File

@@ -0,0 +1,94 @@
import React, { useState } from 'react';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { useTranslation } from 'next-i18next';
import { Box, Button, HStack, ModalBody, ModalFooter, VStack } from '@chakra-ui/react';
import FileSelector, { type SelectFileItemType } from '../components/FileSelector';
import MyIcon from '@fastgpt/web/components/common/Icon';
import MyIconButton from '@fastgpt/web/components/common/Icon/button';
import { postBackupDatasetCollection } from '@/web/core/dataset/api';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { useContextSelector } from 'use-context-selector';
import LightTip from '@fastgpt/web/components/common/LightTip';
const BackupImportModal = ({
onFinish,
onClose
}: {
onFinish: () => void;
onClose: () => void;
}) => {
const { t } = useTranslation();
const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
const [selectFiles, setSelectFiles] = useState<SelectFileItemType[]>([]);
const [percent, setPercent] = useState(0);
const { runAsync: onBackupImport, loading: isBackupLoading } = useRequest2(
async () => {
await postBackupDatasetCollection({
datasetId,
file: selectFiles[0].file,
percentListen: setPercent
});
},
{
onSuccess() {
onFinish();
onClose();
},
successToast: t('dataset:backup_dataset_success')
}
);
return (
<MyModal iconSrc="backup" iconColor={'primary.600'} isOpen title={t('dataset:backup_dataset')}>
<ModalBody>
<LightTip mb={3} icon="common/info" text={t('dataset:backup_dataset_tip')} />
<FileSelector
maxCount={1}
fileType="csv"
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
/>
{/* File render */}
{selectFiles.length > 0 && (
<VStack mt={4} gap={2}>
{selectFiles.map((item, index) => (
<HStack key={index} w={'100%'}>
<MyIcon name={item.icon as any} w={'1rem'} />
<Box color={'myGray.900'}>{item.name}</Box>
<Box fontSize={'xs'} color={'myGray.500'} flex={1}>
{item.size}
</Box>
<MyIconButton
icon="delete"
hoverColor="red.500"
hoverBg="red.50"
onClick={() => {
setSelectFiles(selectFiles.filter((_, i) => i !== index));
}}
/>
</HStack>
))}
</VStack>
)}
</ModalBody>
<ModalFooter>
<Button isLoading={isBackupLoading} variant="whiteBase" mr={2} onClick={onClose}>
{t('common:Close')}
</Button>
<Button onClick={onBackupImport} isDisabled={selectFiles.length === 0 || isBackupLoading}>
{isBackupLoading
? percent === 100
? t('dataset:backup_data_parse')
: t('dataset:backup_data_uploading', { num: percent })
: t('common:Import')}
</Button>
</ModalFooter>
</MyModal>
);
};
export default BackupImportModal;

View File

@@ -36,6 +36,7 @@ import MyTag from '@fastgpt/web/components/common/Tag/index';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
const FileSourceSelector = dynamic(() => import('../Import/components/FileSourceSelector'));
const BackupImportModal = dynamic(() => import('./BackupImportModal'));
const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
const { t } = useTranslation();
@@ -76,6 +77,12 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
onOpen: onOpenFileSourceSelector,
onClose: onCloseFileSourceSelector
} = useDisclosure();
// Backup import modal
const {
isOpen: isOpenBackupImportModal,
onOpen: onOpenBackupImportModal,
onClose: onCloseBackupImportModal
} = useDisclosure();
const { runAsync: onCreateCollection } = useRequest2(
async ({ name, type }: { name: string; type: DatasetCollectionTypeEnum }) => {
@@ -220,11 +227,11 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
{
label: (
<Flex>
<MyIcon name={'common/folderFill'} w={'20px'} mr={2} />
{t('common:Folder')}
<MyIcon name={'core/dataset/fileCollection'} mr={2} w={'20px'} />
{t('common:core.dataset.Text collection')}
</Flex>
),
onClick: () => setEditFolderData({})
onClick: onOpenFileSourceSelector
},
{
label: (
@@ -244,27 +251,24 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
{
label: (
<Flex>
<MyIcon name={'core/dataset/fileCollection'} mr={2} w={'20px'} />
{t('common:core.dataset.Text collection')}
<MyIcon name={'backup'} mr={2} w={'20px'} />
{t('dataset:backup_dataset')}
</Flex>
),
onClick: onOpenFileSourceSelector
},
onClick: onOpenBackupImportModal
}
]
},
{
children: [
{
label: (
<Flex>
<MyIcon name={'core/dataset/tableCollection'} mr={2} w={'20px'} />
{t('common:core.dataset.Table collection')}
<MyIcon name={'common/folderFill'} w={'20px'} mr={2} />
{t('common:Folder')}
</Flex>
),
onClick: () =>
router.replace({
query: {
...router.query,
currentTab: TabEnum.import,
source: ImportDataSourceEnum.csvTable
}
})
onClick: () => setEditFolderData({})
}
]
}
@@ -471,6 +475,14 @@ const Header = ({ hasTrainingData }: { hasTrainingData: boolean }) => {
)}
<EditCreateVirtualFileModal iconSrc={'modal/manualDataset'} closeBtnText={''} />
{isOpenFileSourceSelector && <FileSourceSelector onClose={onCloseFileSourceSelector} />}
{isOpenBackupImportModal && (
<BackupImportModal
onFinish={() => {
getData(1);
}}
onClose={onCloseBackupImportModal}
/>
)}
</MyBox>
);
};

View File

@@ -257,18 +257,12 @@ const CollectionCard = () => {
)}
</Td>
<Td py={2}>
{!checkCollectionIsFolder(collection.type) ? (
<>
{collection.trainingType
? t(
(DatasetCollectionDataProcessModeMap[collection.trainingType]
?.label || '-') as any
)
: '-'}
</>
) : (
'-'
)}
{collection.trainingType
? t(
(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label ||
'-') as any
)
: '-'}
</Td>
<Td py={2}>{collection.dataAmount || '-'}</Td>
<Td fontSize={'xs'} py={2} color={'myGray.500'}>

View File

@@ -27,7 +27,10 @@ import Markdown from '@/components/Markdown';
import { useMemoizedFn } from 'ahooks';
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import { TabEnum } from './NavBar';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionDataProcessModeEnum,
ImportDataSourceEnum
} from '@fastgpt/global/core/dataset/constants';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import TrainingStates from './CollectionCard/TrainingStates';
import { getTextValidLength } from '@fastgpt/global/common/string/utils';

View File

@@ -118,14 +118,18 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
const imageIndex = watch('imageIndex');
const trainingModeList = useMemo(() => {
const list = Object.entries(DatasetCollectionDataProcessModeMap);
return list
.filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
.map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
const list = {
[DatasetCollectionDataProcessModeEnum.chunk]:
DatasetCollectionDataProcessModeMap[DatasetCollectionDataProcessModeEnum.chunk],
[DatasetCollectionDataProcessModeEnum.qa]:
DatasetCollectionDataProcessModeMap[DatasetCollectionDataProcessModeEnum.qa]
};
return Object.entries(list).map(([key, value]) => ({
title: t(value.label as any),
value: key as DatasetCollectionDataProcessModeEnum,
tooltip: t(value.tooltip as any)
}));
}, [t]);
const {
chunkSizeField,

View File

@@ -144,20 +144,6 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.csvTable]: [
{
title: t('dataset:import_select_file')
},
{
title: t('dataset:import_param_setting')
},
{
title: t('dataset:import_data_preview')
},
{
title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.externalFile]: [
{
title: t('dataset:import_select_file')
@@ -206,7 +192,7 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
chunkSettingMode: ChunkSettingModeEnum.auto,
chunkSplitMode: DataChunkSplitModeEnum.size,
embeddingChunkSize: 2000,
embeddingChunkSize: chunkAutoChunkSize,
indexSize: vectorModel?.defaultToken || 512,
qaChunkSize: getLLMDefaultChunkSize(agentModel),
chunkSplitter: '',

View File

@@ -75,7 +75,6 @@ const PreviewData = () => {
overlapRatio: chunkOverlapRatio,
selector: processParamsForm.getValues('webSelector'),
isQAImport: importSource === ImportDataSourceEnum.csvTable,
externalFileId: previewFile.externalFileId
});
},

View File

@@ -26,7 +26,6 @@ import { useRouter } from 'next/router';
import { TabEnum } from '../../../../../pages/dataset/detail/index';
import {
postCreateDatasetApiDatasetCollection,
postCreateDatasetCsvTableCollection,
postCreateDatasetExternalFileCollection,
postCreateDatasetFileCollection,
postCreateDatasetLinkCollection,
@@ -146,11 +145,6 @@ const Upload = () => {
...commonParams,
text: item.rawText
});
} else if (importSource === ImportDataSourceEnum.csvTable && item.dbFileId) {
await postCreateDatasetCsvTableCollection({
...commonParams,
fileId: item.dbFileId
});
} else if (importSource === ImportDataSourceEnum.externalFile && item.externalFileUrl) {
await postCreateDatasetExternalFileCollection({
...commonParams,

View File

@@ -1,101 +0,0 @@
import React, { useEffect, useMemo, useState } from 'react';
import { type ImportSourceItemType } from '@/web/core/dataset/type.d';
import { Box, Button } from '@chakra-ui/react';
import FileSelector from '../components/FileSelector';
import { useTranslation } from 'next-i18next';
import dynamic from 'next/dynamic';
import { fileDownload } from '@/web/common/file/utils';
import { RenderUploadFiles } from '../components/RenderFiles';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
const PreviewData = dynamic(() => import('../commonProgress/PreviewData'));
const Upload = dynamic(() => import('../commonProgress/Upload'));
const fileType = '.csv';
const FileLocal = () => {
const activeStep = useContextSelector(DatasetImportContext, (v) => v.activeStep);
return (
<>
{activeStep === 0 && <SelectFile />}
{activeStep === 1 && <PreviewData />}
{activeStep === 2 && <Upload />}
</>
);
};
export default React.memo(FileLocal);
const csvTemplate = `index,content
"第一列内容","第二列内容"
"必填列","可选列。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
"只会将第一和第二列内容导入,其余列会被忽略",""
"结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。",""
"AIGC发展分为几个阶段","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`;
const SelectFile = React.memo(function SelectFile() {
const { t } = useTranslation();
const { goToNext, sources, setSources } = useContextSelector(DatasetImportContext, (v) => v);
const [selectFiles, setSelectFiles] = useState<ImportSourceItemType[]>(
sources.map((source) => ({
isUploading: false,
...source
}))
);
const [uploading, setUploading] = useState(false);
const successFiles = useMemo(() => selectFiles.filter((item) => !item.errorMsg), [selectFiles]);
useEffect(() => {
setSources(successFiles);
}, [successFiles]);
return (
<Box>
<FileSelector
fileType={fileType}
selectFiles={selectFiles}
setSelectFiles={setSelectFiles}
onStartSelect={() => setUploading(true)}
onFinishSelect={() => setUploading(false)}
/>
<Box
mt={4}
color={'primary.600'}
textDecoration={'underline'}
cursor={'pointer'}
onClick={() =>
fileDownload({
text: csvTemplate,
type: 'text/csv;charset=utf-8',
filename: 'template.csv'
})
}
>
{t('common:core.dataset.import.Down load csv template')}
</Box>
{/* render files */}
<RenderUploadFiles files={selectFiles} setFiles={setSelectFiles} />
<Box textAlign={'right'} mt={5}>
<Button
isDisabled={successFiles.length === 0 || uploading}
onClick={() => {
setSelectFiles((state) => state.filter((item) => !item.errorMsg));
goToNext();
}}
>
{selectFiles.length > 0
? `${t('dataset:total_num_files', { total: selectFiles.length })} | `
: ''}
{t('common:next_step')}
</Button>
</Box>
</Box>
);
});

View File

@@ -8,7 +8,6 @@ import DatasetImportContextProvider, { DatasetImportContext } from './Context';
const FileLocal = dynamic(() => import('./diffSource/FileLocal'));
const FileLink = dynamic(() => import('./diffSource/FileLink'));
const FileCustomText = dynamic(() => import('./diffSource/FileCustomText'));
const TableLocal = dynamic(() => import('./diffSource/TableLocal'));
const ExternalFileCollection = dynamic(() => import('./diffSource/ExternalFile'));
const APIDatasetCollection = dynamic(() => import('./diffSource/APIDataset'));
const ReTraining = dynamic(() => import('./diffSource/ReTraining'));
@@ -21,7 +20,6 @@ const ImportDataset = () => {
if (importSource === ImportDataSourceEnum.fileLocal) return FileLocal;
if (importSource === ImportDataSourceEnum.fileLink) return FileLink;
if (importSource === ImportDataSourceEnum.fileCustom) return FileCustomText;
if (importSource === ImportDataSourceEnum.csvTable) return TableLocal;
if (importSource === ImportDataSourceEnum.externalFile) return ExternalFileCollection;
if (importSource === ImportDataSourceEnum.apiDataset) return APIDatasetCollection;
}, [importSource]);

View File

@@ -84,14 +84,22 @@ const MetaDataCard = ({ datasetId }: { datasetId: string }) => {
label: t('dataset:collection.training_type'),
value: t(DatasetCollectionDataProcessModeMap[collection.trainingType]?.label as any)
},
{
label: t('dataset:chunk_size'),
value: collection.chunkSize || '-'
},
{
label: t('dataset:index_size'),
value: collection.indexSize || '-'
},
...(collection.chunkSize
? [
{
label: t('dataset:chunk_size'),
value: collection.chunkSize
}
]
: []),
...(collection.indexSize
? [
{
label: t('dataset:index_size'),
value: collection.indexSize
}
]
: []),
...(webSelector
? [
{

View File

@@ -0,0 +1,218 @@
import MyBox from '@fastgpt/web/components/common/MyBox';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { Box, type FlexProps } from '@chakra-ui/react';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import React, { type DragEvent, useCallback, useMemo, useState } from 'react';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import { useSystemStore } from '@/web/common/system/useSystemStore';
export type SelectFileItemType = {
file: File;
icon: string;
name: string;
size: string;
};
const FileSelector = ({
fileType,
selectFiles,
setSelectFiles,
maxCount = 1000,
...props
}: {
fileType: string;
selectFiles: SelectFileItemType[];
setSelectFiles: React.Dispatch<React.SetStateAction<SelectFileItemType[]>>;
maxCount?: number;
} & FlexProps) => {
const { t } = useTranslation();
const { toast } = useToast();
const { feConfigs } = useSystemStore();
const maxSize = (feConfigs?.uploadFileMaxSize || 1024) * 1024 * 1024;
const { File, onOpen } = useSelectFile({
fileType,
multiple: maxCount > 1,
maxCount
});
const [isDragging, setIsDragging] = useState(false);
const isMaxSelected = useMemo(
() => selectFiles.length >= maxCount,
[maxCount, selectFiles.length]
);
const filterTypeReg = new RegExp(
`(${fileType
.split(',')
.map((item) => item.trim())
.join('|')})$`,
'i'
);
const onSelectFile = useCallback(
async (files: File[]) => {
const fileList = files.map((file) => ({
file,
icon: getFileIcon(file.name),
name: file.name,
size: formatFileSize(file.size)
}));
setSelectFiles((state) => {
return [...fileList, ...state].slice(0, maxCount);
});
},
[maxCount, setSelectFiles]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = async (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const items = e.dataTransfer.items;
const firstEntry = items[0].webkitGetAsEntry();
if (firstEntry?.isDirectory && items.length === 1) {
{
const readFile = (entry: any) => {
return new Promise((resolve) => {
entry.file((file: File) => {
if (filterTypeReg.test(file.name)) {
onSelectFile([file]);
}
resolve(file);
});
});
};
const traverseFileTree = (dirReader: any) => {
return new Promise((resolve) => {
let fileNum = 0;
dirReader.readEntries(async (entries: any[]) => {
for await (const entry of entries) {
if (entry.isFile) {
await readFile(entry);
fileNum++;
} else if (entry.isDirectory) {
await traverseFileTree(entry.createReader());
}
}
// chrome: readEntries will return 100 entries at most
if (fileNum === 100) {
await traverseFileTree(dirReader);
}
resolve('');
});
});
};
for await (const item of items) {
const entry = item.webkitGetAsEntry();
if (entry) {
if (entry.isFile) {
await readFile(entry);
} else if (entry.isDirectory) {
//@ts-ignore
await traverseFileTree(entry.createReader());
}
}
}
}
} else if (firstEntry?.isFile) {
const files = Array.from(e.dataTransfer.files);
let isErr = files.some((item) => item.type === '');
if (isErr) {
return toast({
title: t('file:upload_error_description'),
status: 'error'
});
}
onSelectFile(files.filter((item) => filterTypeReg.test(item.name)));
} else {
return toast({
title: t('file:upload_error_description'),
status: 'error'
});
}
};
return (
<MyBox
display={'flex'}
flexDirection={'column'}
alignItems={'center'}
justifyContent={'center'}
px={3}
py={[4, 7]}
borderWidth={'1.5px'}
borderStyle={'dashed'}
borderRadius={'md'}
userSelect={'none'}
{...(isMaxSelected
? {
cursor: 'not-allowed'
}
: {
cursor: 'pointer',
_hover: {
bg: 'primary.50',
borderColor: 'primary.600'
},
borderColor: isDragging ? 'primary.600' : 'borderColor.high',
onDragEnter: handleDragEnter,
onDragOver: (e) => e.preventDefault(),
onDragLeave: handleDragLeave,
onDrop: handleDrop,
onClick: onOpen
})}
{...props}
>
<MyIcon name={'common/uploadFileFill'} w={'32px'} />
{isMaxSelected ? (
<>
<Box color={'myGray.500'} fontSize={'xs'}>
{t('file:reached_max_file_count')}
</Box>
</>
) : (
<>
<Box fontWeight={'bold'}>
{isDragging
? t('file:release_the_mouse_to_upload_the_file')
: t('file:select_and_drag_file_tip')}
</Box>
{/* file type */}
<Box color={'myGray.500'} fontSize={'xs'}>
{t('file:support_file_type', { fileType })}
</Box>
<Box color={'myGray.500'} fontSize={'xs'}>
{/* max count */}
{maxCount && t('file:support_max_count', { maxCount })}
{/* max size */}
{maxSize && t('file:support_max_size', { maxSize: formatFileSize(maxSize) })}
</Box>
<File onSelect={(files) => onSelectFile(files)} />
</>
)}
</MyBox>
);
};
export default React.memo(FileSelector);

View File

@@ -0,0 +1,86 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middleware/entry';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import { addLog } from '@fastgpt/service/common/system/log';
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
export type backupQuery = {};
export type backupBody = {};
export type backupResponse = {};
async function handler(req: ApiRequestProps<backupBody, backupQuery>, res: ApiResponseType<any>) {
const filePaths: string[] = [];
try {
const upload = getUploadModel({
maxSize: global.feConfigs?.uploadFileMaxSize
});
const { file, data } = await upload.doUpload<{ datasetId: string }>(req, res);
filePaths.push(file.path);
if (file.mimetype !== 'text/csv') {
throw new Error('File must be a CSV file');
}
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: WritePermissionVal,
datasetId: data.datasetId
});
// 1. Read
const { rawText } = await readRawTextByLocalFile({
teamId,
tmbId,
path: file.path,
encoding: file.encoding,
getFormatText: false
});
if (!rawText.startsWith('q,a,indexes')) {
return Promise.reject('Backup file start with "q,a,indexes"');
}
// 2. delete tmp file
removeFilesByPaths(filePaths);
// 3. Create collection
await createCollectionAndInsertData({
dataset,
rawText,
backupParse: true,
createCollectionParams: {
teamId,
tmbId,
datasetId: dataset._id,
name: file.originalname,
type: DatasetCollectionTypeEnum.virtual,
trainingType: DatasetCollectionDataProcessModeEnum.backup
}
});
return {};
} catch (error) {
addLog.error(`Backup dataset collection create error: ${error}`);
removeFilesByPaths(filePaths);
return Promise.reject(error);
}
}
export default NextAPI(handler);
export const config = {
api: {
bodyParser: false
}
};

View File

@@ -1,61 +0,0 @@
import type { NextApiRequest } from 'next';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { type FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionDataProcessModeEnum,
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { NextAPI } from '@/service/middleware/entry';
import { type CreateCollectionResponse } from '@/global/core/dataset/api';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
async function handler(req: NextApiRequest): CreateCollectionResponse {
const { datasetId, parentId, fileId, ...body } = req.body as FileIdCreateDatasetCollectionParams;
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: WritePermissionVal,
datasetId: datasetId
});
// 1. read file
const { rawText, filename } = await readFileContentFromMongo({
teamId,
tmbId,
bucketName: BucketNameEnum.dataset,
fileId,
isQAImport: true
});
const { collectionId, insertResults } = await createCollectionAndInsertData({
dataset,
rawText,
isQAImport: true,
createCollectionParams: {
...body,
teamId,
tmbId,
name: filename,
parentId,
datasetId,
type: DatasetCollectionTypeEnum.file,
fileId,
// special metadata
trainingType: DatasetCollectionDataProcessModeEnum.chunk,
chunkSize: 0
}
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
return { collectionId, results: insertResults };
}
export default NextAPI(handler);

View File

@@ -2,15 +2,11 @@ import type { NextApiRequest } from 'next';
import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { createCollectionAndInsertData } from '@fastgpt/service/core/dataset/collection/controller';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { type CreateCollectionResponse } from '@/global/core/dataset/api';
import { urlsFetch } from '@fastgpt/service/common/string/cheerio';
import { hashStr } from '@fastgpt/global/common/string/tools';
async function handler(req: NextApiRequest): CreateCollectionResponse {
const { link, ...body } = req.body as LinkCreateDatasetCollectionParams;

View File

@@ -1,5 +1,5 @@
/* push data to training queue */
import type { NextApiRequest, NextApiResponse } from 'next';
import type { NextApiResponse } from 'next';
import type { PushDatasetDataProps } from '@fastgpt/global/core/dataset/api.d';
import { authDatasetCollection } from '@fastgpt/service/support/permission/dataset/auth';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
@@ -8,9 +8,10 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { getTrainingModeByCollection } from '@fastgpt/service/core/dataset/collection/utils';
import type { ApiRequestProps } from '@fastgpt/service/type/next';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const body = req.body as PushDatasetDataProps;
async function handler(req: ApiRequestProps<PushDatasetDataProps>, res: NextApiResponse<any>) {
const body = req.body;
// Adapter 4.9.0
body.trainingType = body.trainingType || body.trainingMode;

View File

@@ -12,6 +12,14 @@ import { NextAPI } from '@/service/middleware/entry';
import { WritePermissionVal } from '@fastgpt/global/support/permission/constant';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import type { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
type DataItemType = {
_id: string;
q: string;
a: string;
indexes: DatasetDataSchemaType['indexes'];
};
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let { datasetId } = req.query as {
@@ -23,7 +31,7 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
}
// 凭证校验
const { teamId } = await authDataset({
const { teamId, dataset } = await authDataset({
req,
authToken: true,
datasetId,
@@ -42,19 +50,14 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
});
res.setHeader('Content-Type', 'text/csv; charset=utf-8;');
res.setHeader('Content-Disposition', 'attachment; filename=dataset.csv; ');
res.setHeader('Content-Disposition', `attachment; filename=${dataset.name}-backup.csv;`);
const cursor = MongoDatasetData.find<{
_id: string;
collectionId: { name: string };
q: string;
a: string;
}>(
const cursor = MongoDatasetData.find<DataItemType>(
{
teamId,
datasetId: { $in: datasets.map((d) => d._id) }
},
'q a',
'q a indexes',
{
...readFromSecondary
}
@@ -67,13 +70,14 @@ async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
readStream: cursor
});
write(`\uFEFFindex,content`);
write(`\uFEFFq,a,indexes`);
cursor.on('data', (doc) => {
cursor.on('data', (doc: DataItemType) => {
const q = doc.q.replace(/"/g, '""') || '';
const a = doc.a.replace(/"/g, '""') || '';
const indexes = doc.indexes.map((i) => `"${i.text.replace(/"/g, '""')}"`).join(',');
write(`\n"${q}","${a}"`);
write(`\n"${q}","${a}",${indexes}`);
});
cursor.on('end', () => {

View File

@@ -1,7 +1,7 @@
import type {
ChunkSettingModeEnum,
DataChunkSplitModeEnum,
DatasetCollectionDataProcessModeEnum
import {
type ChunkSettingModeEnum,
type DataChunkSplitModeEnum,
type DatasetCollectionDataProcessModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
@@ -39,7 +39,6 @@ export type PostPreviewFilesChunksProps = {
// Read params
selector?: string;
isQAImport?: boolean;
externalFileId?: string;
};
export type PreviewChunksResponse = {
@@ -66,7 +65,6 @@ async function handler(
overlapRatio,
selector,
isQAImport,
datasetId,
externalFileId
} = req.body;
@@ -118,7 +116,6 @@ async function handler(
type,
sourceId,
selector,
isQAImport,
apiServer: dataset.apiServer,
feishuServer: dataset.feishuServer,
yuqueServer: dataset.yuqueServer,
@@ -131,9 +128,9 @@ async function handler(
chunkSize,
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
overlapRatio,
customReg: chunkSplitter ? [chunkSplitter] : [],
isQAImport: isQAImport
customReg: chunkSplitter ? [chunkSplitter] : []
});
return {
chunks: chunks.slice(0, 10),
total: chunks.length

View File

@@ -29,7 +29,6 @@ import { GET } from '@/web/common/api/request';
import { getDocPath } from '@/web/common/system/doc';
import { getWebReqUrl } from '@fastgpt/web/common/system/utils';
import LoginForm from '@/pageComponents/login/LoginForm/LoginForm';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getBdVId } from '@/web/support/marketing/utils';
const RegisterForm = dynamic(() => import('@/pageComponents/login/RegisterForm'));
@@ -49,7 +48,6 @@ const Login = ({ ChineseRedirectUrl }: { ChineseRedirectUrl: string }) => {
const { setLastChatAppId } = useChatStore();
const { isOpen, onOpen, onClose } = useDisclosure();
const { isPc } = useSystem();
const { toast } = useToast();
const {
isOpen: isOpenCookiesDrawer,

View File

@@ -23,9 +23,11 @@ const reloadConfigWatch = () => {
changeStream.on('change', async (change) => {
try {
if (
change.operationType === 'update' ||
(change.operationType === 'insert' &&
change.fullDocument.type === SystemConfigsTypeEnum.fastgptPro) ||
change.operationType === 'update'
[SystemConfigsTypeEnum.fastgptPro, SystemConfigsTypeEnum.license].includes(
change.fullDocument.type
))
) {
await initSystemConfig();
console.log('refresh system config');

View File

@@ -11,7 +11,7 @@ import {
type DatasetDataIndexItemType,
type DatasetDataItemType
} from '@fastgpt/global/core/dataset/type';
import { getEmbeddingModel, getLLMModel } from '@fastgpt/service/core/ai/model';
import { getEmbeddingModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { type ClientSession } from '@fastgpt/service/common/mongo';
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
@@ -93,13 +93,15 @@ const formatIndexes = async ({
return item;
}
});
indexes = indexes.filter((item) => item.type !== DatasetDataIndexTypeEnum.default);
indexes.push(...concatDefaultIndexes);
// Remove same text
// 其他索引不能与默认索引相同,且不能自己有重复
indexes = indexes.filter(
(item, index, self) => index === self.findIndex((t) => t.text === item.text)
(item, index, self) =>
item.type !== DatasetDataIndexTypeEnum.default &&
!concatDefaultIndexes.find((t) => t.text === item.text) &&
index === self.findIndex((t) => t.text === item.text)
);
indexes.push(...concatDefaultIndexes);
const chekcIndexes = (
await Promise.all(

View File

@@ -262,6 +262,7 @@ const insertData = async ({
q: trainingData.q,
a: trainingData.a,
chunkIndex: trainingData.chunkIndex,
indexSize: trainingData.indexSize,
indexes: trainingData.indexes,
embeddingModel: trainingData.model,
session

View File

@@ -1,7 +1,6 @@
import { GET, POST, PUT, DELETE } from '@/web/common/api/request';
import type {
GetPathProps,
ParentIdType,
ParentTreePathItemType
} from '@fastgpt/global/common/parentFolder/type.d';
import type {
@@ -120,6 +119,33 @@ export const resumeInheritPer = (datasetId: string) =>
export const postChangeOwner = (data: { ownerId: string; datasetId: string }) =>
POST(`/proApi/core/dataset/changeOwner`, data);
export const postBackupDatasetCollection = ({
file,
percentListen,
datasetId
}: {
file: File;
percentListen: (percent: number) => void;
datasetId: string;
}) => {
const formData = new FormData();
formData.append('file', file, encodeURIComponent(file.name));
formData.append('data', JSON.stringify({ datasetId }));
return POST(`/core/dataset/collection/create/backup`, formData, {
timeout: 600000,
onUploadProgress: (e) => {
if (!e.total) return;
const percent = Math.round((e.loaded / e.total) * 100);
percentListen?.(percent);
},
headers: {
'Content-Type': 'multipart/form-data; charset=utf-8'
}
});
};
/* =========== search test ============ */
export const postSearchText = (data: SearchTestProps) =>
POST<SearchTestResponse>(`/core/dataset/searchTest`, data);
@@ -149,10 +175,7 @@ export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectio
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/text`, data);
export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data, {
timeout: 360000
});
export const postCreateDatasetExternalFileCollection = (
data: ExternalFileCreateDatasetCollectionParams
) =>