This commit is contained in:
Archer
2023-10-22 23:54:04 +08:00
committed by GitHub
parent 3091a90df6
commit a3534407bf
365 changed files with 7266 additions and 6055 deletions

View File

@@ -0,0 +1,125 @@
import React, { useState } from 'react';
import {
Box,
Flex,
Button,
NumberInput,
NumberInputField,
NumberInputStepper,
NumberIncrementStepper,
NumberDecrementStepper
} from '@chakra-ui/react';
import { useConfirm } from '@/web/common/hooks/useConfirm';
import { formatPrice } from '@fastgpt/global/common/bill/tools';
import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
const ChunkImport = () => {
const { datasetDetail } = useDatasetStore();
const vectorModel = datasetDetail.vectorModel;
const unitPrice = vectorModel?.price || 0.2;
const {
chunkLen,
setChunkLen,
successChunks,
totalChunks,
isUnselectedFile,
price,
onclickUpload,
onReSplitChunks,
uploading,
showRePreview,
setReShowRePreview
} = useImportStore();
const { openConfirm, ConfirmModal } = useConfirm({
content: `该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。`
});
return (
<Box display={['block', 'flex']} h={['auto', '100%']}>
<SelectorContainer fileExtension={fileExtension}>
{/* chunk size */}
<Flex py={4} alignItems={'center'}>
<Box>
<MyTooltip
label={
'按结束标点符号进行分段。前后段落会有 20% 的内容重叠。\n中文文档建议不要超过1000英文不要超过1500'
}
forceShow
>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<Box
flex={1}
css={{
'& > span': {
display: 'block'
}
}}
>
<MyTooltip label={`范围: 100~${datasetDetail.vectorModel.maxToken}`}>
<NumberInput
ml={4}
defaultValue={chunkLen}
min={100}
max={datasetDetail.vectorModel.maxToken}
step={10}
onChange={(e) => {
setChunkLen(+e);
setReShowRePreview(true);
}}
>
<NumberInputField />
<NumberInputStepper>
<NumberIncrementStepper />
<NumberDecrementStepper />
</NumberInputStepper>
</NumberInput>
</MyTooltip>
</Box>
</Flex>
{/* price */}
<Flex py={4} alignItems={'center'}>
<Box>
<MyTooltip
label={`索引生成计费为: ${formatPrice(unitPrice, 1000)}/1k tokens`}
forceShow
>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<Box ml={4}>{price}</Box>
</Flex>
<Flex mt={3}>
{showRePreview && (
<Button variant={'base'} mr={4} onClick={onReSplitChunks}>
</Button>
)}
<Button isDisabled={uploading} onClick={openConfirm(onclickUpload)}>
{uploading ? <Box>{Math.round((successChunks / totalChunks) * 100)}%</Box> : '确认导入'}
</Button>
</Flex>
</SelectorContainer>
{!isUnselectedFile && (
<Box flex={['auto', '1 0 0']} h={'100%'} overflowY={'auto'}>
<PreviewFileOrChunk />
</Box>
)}
<ConfirmModal />
</Box>
);
};
export default ChunkImport;

View File

@@ -0,0 +1,66 @@
import React from 'react';
import { useTranslation } from 'next-i18next';
import MyModal from '@/components/MyModal';
import { Box, Input, Textarea, ModalBody, ModalFooter, Button } from '@chakra-ui/react';
import { useForm } from 'react-hook-form';
import { useRequest } from '@/web/common/hooks/useRequest';
const CreateFileModal = ({
onClose,
onSuccess
}: {
onClose: () => void;
onSuccess: (e: { filename: string; content: string }) => Promise<void>;
}) => {
const { t } = useTranslation();
const { register, handleSubmit } = useForm({
defaultValues: {
filename: '',
content: ''
}
});
const { mutate, isLoading } = useRequest({
mutationFn: () => handleSubmit(onSuccess)(),
onSuccess: () => {
onClose();
}
});
return (
<MyModal title={t('file.Create File')} isOpen w={'600px'} top={'15vh'}>
<ModalBody>
<Box mb={1} fontSize={'sm'}>
</Box>
<Input
mb={5}
{...register('filename', {
required: '文件名不能为空'
})}
/>
<Box mb={1} fontSize={'sm'}>
</Box>
<Textarea
{...register('content', {
required: '文件内容不能为空'
})}
rows={12}
whiteSpace={'nowrap'}
resize={'both'}
/>
</ModalBody>
<ModalFooter>
<Button variant={'base'} mr={4} onClick={onClose}>
</Button>
<Button isLoading={isLoading} onClick={mutate}>
</Button>
</ModalFooter>
</MyModal>
);
};
export default CreateFileModal;

View File

@@ -0,0 +1,36 @@
import React from 'react';
import { Box, Flex, Button } from '@chakra-ui/react';
import { useConfirm } from '@/web/common/hooks/useConfirm';
import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
const fileExtension = '.csv';
const CsvImport = () => {
const { successChunks, totalChunks, isUnselectedFile, onclickUpload, uploading } =
useImportStore();
const { openConfirm, ConfirmModal } = useConfirm({
content: `该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。`
});
return (
<Box display={['block', 'flex']} h={['auto', '100%']}>
<SelectorContainer fileExtension={fileExtension} showUrlFetch={false}>
<Flex mt={3}>
<Button isDisabled={uploading} onClick={openConfirm(onclickUpload)}>
{uploading ? <Box>{Math.round((successChunks / totalChunks) * 100)}%</Box> : '确认导入'}
</Button>
</Flex>
</SelectorContainer>
{!isUnselectedFile && (
<Box flex={['auto', '1 0 0']} h={'100%'} overflowY={'auto'}>
<PreviewFileOrChunk />
</Box>
)}
<ConfirmModal />
</Box>
);
};
export default CsvImport;

View File

@@ -0,0 +1,424 @@
import MyIcon from '@/components/Icon';
import { useLoading } from '@/web/common/hooks/useLoading';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useToast } from '@/web/common/hooks/useToast';
import { splitText2Chunks } from '@/global/common/string/tools';
import { simpleText } from '@fastgpt/global/common/string/tools';
import {
uploadFiles,
fileDownload,
readCsvContent,
readTxtContent,
readPdfContent,
readDocContent
} from '@/web/common/file/utils';
import { Box, Flex, useDisclosure, type BoxProps } from '@chakra-ui/react';
import { DragEvent, useCallback, useState } from 'react';
import { useTranslation } from 'next-i18next';
import { customAlphabet } from 'nanoid';
import dynamic from 'next/dynamic';
import MyTooltip from '@/components/MyTooltip';
import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d';
import type {
DatasetChunkItemType,
DatasetCollectionSchemaType
} from '@fastgpt/global/core/dataset/type';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { getFileIcon } from '@fastgpt/global/common/file/icon';
import { countPromptTokens } from '@/global/common/tiktoken';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
const UrlFetchModal = dynamic(() => import('./UrlFetchModal'));
const CreateFileModal = dynamic(() => import('./CreateFileModal'));
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const csvTemplate = `index,content\n"被索引的内容","对应的答案。CSV 中请注意内容不能包含双引号,双引号是列分割符号"\n"什么是 laf","laf 是一个云函数开发平台……",""\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……"`;
export type FileItemType = {
id: string; // fileId / raw Link
filename: string;
chunks: DatasetChunkItemType[];
text: string; // raw text
icon: string;
tokens: number; // total tokens
type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link;
metadata: DatasetCollectionSchemaType['metadata'];
};
interface Props extends BoxProps {
fileExtension: string;
onPushFiles: (files: FileItemType[]) => void;
tipText?: string;
chunkLen?: number;
isCsv?: boolean;
showUrlFetch?: boolean;
showCreateFile?: boolean;
}
const FileSelect = ({
fileExtension,
onPushFiles,
tipText,
chunkLen = 500,
isCsv = false,
showUrlFetch = true,
showCreateFile = true,
...props
}: Props) => {
const { datasetDetail } = useDatasetStore();
const { Loading: FileSelectLoading } = useLoading();
const { t } = useTranslation();
const { toast } = useToast();
const { File: FileSelector, onOpen } = useSelectFile({
fileType: fileExtension,
multiple: true
});
const [isDragging, setIsDragging] = useState(false);
const [selectingText, setSelectingText] = useState<string>();
const {
isOpen: isOpenUrlFetch,
onOpen: onOpenUrlFetch,
onClose: onCloseUrlFetch
} = useDisclosure();
const {
isOpen: isOpenCreateFile,
onOpen: onOpenCreateFile,
onClose: onCloseCreateFile
} = useDisclosure();
// select file
const onSelectFile = useCallback(
async (files: File[]) => {
try {
for await (let file of files) {
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
/* text file */
const icon = getFileIcon(file?.name);
// ts
if (!icon) continue;
// upload file
const filesId = await uploadFiles([file], { datasetId: datasetDetail._id }, (percent) => {
if (percent < 100) {
setSelectingText(
t('file.Uploading', { name: file.name.slice(0, 30), percent }) || ''
);
} else {
setSelectingText(t('file.Parse', { name: file.name.slice(0, 30) }) || '');
}
});
const fileId = filesId[0];
/* csv file */
if (extension === 'csv') {
const { header, data } = await readCsvContent(file);
if (header[0] !== 'index' || header[1] !== 'content') {
throw new Error('csv 文件格式有误,请确保 index 和 content 两列');
}
const filterData = data
.filter((item) => item[0])
.map((item) => ({
q: item[0] || '',
a: item[1] || ''
}));
const fileItem: FileItemType = {
id: nanoid(),
filename: file.name,
icon,
tokens: filterData.reduce((sum, item) => sum + countPromptTokens(item.q), 0),
text: '',
chunks: filterData,
type: DatasetCollectionTypeEnum.file,
metadata: {
fileId
}
};
onPushFiles([fileItem]);
continue;
}
// parse and upload files
let text = await (async () => {
switch (extension) {
case 'txt':
case 'md':
return readTxtContent(file);
case 'pdf':
return readPdfContent(file);
case 'doc':
case 'docx':
return readDocContent(file);
}
return '';
})();
if (text) {
text = simpleText(text);
const splitRes = splitText2Chunks({
text,
maxLen: chunkLen
});
const fileItem: FileItemType = {
id: nanoid(),
filename: file.name,
icon,
text,
tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.file,
metadata: {
fileId
},
chunks: splitRes.chunks.map((chunk) => ({
q: chunk,
a: ''
}))
};
onPushFiles([fileItem]);
}
}
} catch (error: any) {
console.log(error);
toast({
title: getErrText(error, '解析文件失败'),
status: 'error'
});
}
setSelectingText(undefined);
},
[chunkLen, datasetDetail._id, onPushFiles, t, toast]
);
// link fetch
const onUrlFetch = useCallback(
(e: FetchResultItem[]) => {
const result: FileItemType[] = e.map(({ url, content }) => {
const splitRes = splitText2Chunks({
text: content,
maxLen: chunkLen
});
return {
id: nanoid(),
filename: url,
icon: '/imgs/files/link.svg',
text: content,
tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.link,
metadata: {
rawLink: url
},
chunks: splitRes.chunks.map((chunk) => ({
q: chunk,
a: ''
}))
};
});
onPushFiles(result);
},
[chunkLen, onPushFiles]
);
// manual create file and copy data
const onCreateFile = useCallback(
async ({ filename, content }: { filename: string; content: string }) => {
content = simpleText(content);
// create virtual txt file
const txtBlob = new Blob([content], { type: 'text/plain' });
const txtFile = new File([txtBlob], `${filename}.txt`, {
type: txtBlob.type,
lastModified: new Date().getTime()
});
const fileIds = await uploadFiles([txtFile], { datasetId: datasetDetail._id });
const splitRes = splitText2Chunks({
text: content,
maxLen: chunkLen
});
onPushFiles([
{
id: nanoid(),
filename,
icon: '/imgs/files/txt.svg',
text: content,
tokens: splitRes.tokens,
type: DatasetCollectionTypeEnum.file,
metadata: {
fileId: fileIds[0]
},
chunks: splitRes.chunks.map((chunk) => ({
q: chunk,
a: ''
}))
}
]);
},
[chunkLen, datasetDetail._id, onPushFiles]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
};
const handleDrop = useCallback(
async (e: DragEvent<HTMLDivElement>) => {
e.preventDefault();
setIsDragging(false);
const items = e.dataTransfer.items;
const fileList: File[] = [];
if (e.dataTransfer.items.length <= 1) {
const traverseFileTree = async (item: any) => {
return new Promise<void>((resolve, reject) => {
if (item.isFile) {
item.file((file: File) => {
fileList.push(file);
resolve();
});
} else if (item.isDirectory) {
const dirReader = item.createReader();
dirReader.readEntries(async (entries: any[]) => {
for (let i = 0; i < entries.length; i++) {
await traverseFileTree(entries[i]);
}
resolve();
});
}
});
};
for (let i = 0; i < items.length; i++) {
const item = items[i].webkitGetAsEntry();
if (item) {
await traverseFileTree(item);
}
}
} else {
const files = Array.from(e.dataTransfer.files);
let isErr = files.some((item) => item.type === '');
if (isErr) {
return toast({
title: t('file.upload error description'),
status: 'error'
});
}
for (let i = 0; i < files.length; i++) {
fileList.push(files[i]);
}
}
onSelectFile(fileList);
},
[onSelectFile, t, toast]
);
const SelectTextStyles: BoxProps = {
ml: 1,
as: 'span',
cursor: 'pointer',
color: 'myBlue.700',
_hover: {
textDecoration: 'underline'
}
};
return (
<Box
display={'inline-block'}
textAlign={'center'}
bg={'myWhite.400'}
p={5}
borderRadius={'lg'}
border={'1px dashed'}
borderColor={'myGray.300'}
w={'100%'}
position={'relative'}
{...props}
onDragEnter={handleDragEnter}
onDragOver={handleDragEnter}
onDragLeave={handleDragLeave}
onDrop={handleDrop}
>
<Flex justifyContent={'center'} alignItems={'center'}>
<MyIcon mr={1} name={'uploadFile'} w={'16px'} />
{isDragging ? (
t('file.Release the mouse to upload the file')
) : (
<Box>
{t('file.Drag and drop')},
<MyTooltip label={t('file.max 10')}>
<Box {...SelectTextStyles} onClick={onOpen}>
{t('file.select a document')}
</Box>
</MyTooltip>
{showUrlFetch && (
<>
,
<Box {...SelectTextStyles} onClick={onOpenUrlFetch}>
{t('file.Fetch Url')}
</Box>
</>
)}
{showCreateFile && (
<>
,
<Box {...SelectTextStyles} onClick={onOpenCreateFile}>
{t('file.Create file')}
</Box>
</>
)}
</Box>
)}
</Flex>
<Box mt={1}>{t('file.support', { fileExtension: fileExtension })}</Box>
{tipText && (
<Box mt={1} fontSize={'sm'} color={'myGray.600'}>
{t(tipText)}
</Box>
)}
{isCsv && (
<Box
mt={1}
cursor={'pointer'}
textDecoration={'underline'}
color={'myBlue.600'}
fontSize={'12px'}
onClick={() =>
fileDownload({
text: csvTemplate,
type: 'text/csv',
filename: 'template.csv'
})
}
>
{t('file.Click to download CSV template')}
</Box>
)}
{selectingText !== undefined && (
<FileSelectLoading loading text={selectingText} fixed={false} />
)}
<FileSelector onSelect={onSelectFile} />
{isOpenUrlFetch && <UrlFetchModal onClose={onCloseUrlFetch} onSuccess={onUrlFetch} />}
{isOpenCreateFile && <CreateFileModal onClose={onCloseCreateFile} onSuccess={onCreateFile} />}
</Box>
);
};
export default FileSelect;

View File

@@ -0,0 +1,123 @@
import React, { useMemo, useState } from 'react';
import { Box, type BoxProps, Flex, useTheme, ModalCloseButton } from '@chakra-ui/react';
import MyRadio from '@/components/Radio/index';
import dynamic from 'next/dynamic';
import ChunkImport from './Chunk';
import { useTranslation } from 'react-i18next';
const QAImport = dynamic(() => import('./QA'), {});
const CsvImport = dynamic(() => import('./Csv'), {});
import MyModal from '@/components/MyModal';
import Provider from './Provider';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { qaModelList } from '@/web/common/system/staticData';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
export enum ImportTypeEnum {
index = 'index',
qa = 'qa',
csv = 'csv'
}
const ImportData = ({
datasetId,
parentId,
onClose,
uploadSuccess
}: {
datasetId: string;
parentId: string;
onClose: () => void;
uploadSuccess: () => void;
}) => {
const { t } = useTranslation();
const theme = useTheme();
const { datasetDetail } = useDatasetStore();
const [importType, setImportType] = useState<`${ImportTypeEnum}`>(ImportTypeEnum.index);
const typeMap = useMemo(() => {
const vectorModel = datasetDetail.vectorModel;
const qaModel = qaModelList[0];
const map = {
[ImportTypeEnum.index]: {
defaultChunkLen: vectorModel?.defaultToken || 500,
unitPrice: vectorModel?.price || 0.2,
mode: TrainingModeEnum.index
},
[ImportTypeEnum.qa]: {
defaultChunkLen: qaModel?.maxToken * 0.5 || 8000,
unitPrice: qaModel?.price || 3,
mode: TrainingModeEnum.qa
},
[ImportTypeEnum.csv]: {
defaultChunkLen: vectorModel?.defaultToken || 500,
unitPrice: vectorModel?.price || 0.2,
mode: TrainingModeEnum.index
}
};
return map[importType];
}, [datasetDetail.vectorModel, importType]);
const TitleStyle: BoxProps = {
fontWeight: 'bold',
fontSize: ['md', 'xl']
};
return (
<MyModal
title={<Box {...TitleStyle}>{t('dataset.data.File import')}</Box>}
isOpen
isCentered
maxW={['90vw', '85vw']}
w={['90vw', '85vw']}
h={'90vh'}
>
<ModalCloseButton onClick={onClose} />
<Flex flexDirection={'column'} flex={'1 0 0'}>
<Box pb={[5, 7]} px={[4, 8]} borderBottom={theme.borders.base}>
<MyRadio
gridTemplateColumns={['repeat(1,1fr)', 'repeat(3, 350px)']}
list={[
{
icon: 'indexImport',
title: '直接分段',
desc: '选择文本文件,直接将其按分段进行处理',
value: ImportTypeEnum.index
},
{
icon: 'qaImport',
title: 'QA拆分',
desc: '选择文本文件,让大模型自动生成问答对',
value: ImportTypeEnum.qa
},
{
icon: 'csvImport',
title: 'CSV 导入',
desc: '批量导入问答对,是最精准的数据',
value: ImportTypeEnum.csv
}
]}
value={importType}
onChange={(e) => setImportType(e as `${ImportTypeEnum}`)}
/>
</Box>
<Provider
{...typeMap}
importType={importType}
datasetId={datasetId}
parentId={parentId}
onUploadSuccess={uploadSuccess}
>
<Box flex={'1 0 0'} h={0}>
{importType === ImportTypeEnum.index && <ChunkImport />}
{importType === ImportTypeEnum.qa && <QAImport />}
{importType === ImportTypeEnum.csv && <CsvImport />}
</Box>
</Provider>
</Flex>
</MyModal>
);
};
export default ImportData;

View File

@@ -0,0 +1,473 @@
import React, {
type SetStateAction,
type Dispatch,
useContext,
useCallback,
createContext,
useState,
useMemo,
useEffect
} from 'react';
import FileSelect, { FileItemType } from './FileSelect';
import { useRequest } from '@/web/common/hooks/useRequest';
import { postDatasetCollection } from '@/web/core/dataset/api';
import { formatPrice } from '@fastgpt/global/common/bill/tools';
import { splitText2Chunks } from '@/global/common/string/tools';
import { useToast } from '@/web/common/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { Box, Flex, Image, useTheme } from '@chakra-ui/react';
import { CloseIcon } from '@chakra-ui/icons';
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
import MyIcon from '@/components/Icon';
import { chunksUpload } from '@/web/core/dataset/utils';
import { postCreateTrainingBill } from '@/web/common/bill/api';
import { useTranslation } from 'react-i18next';
import { ImportTypeEnum } from './ImportModal';
const filenameStyles = {
className: 'textEllipsis',
maxW: '400px'
};
type useImportStoreType = {
files: FileItemType[];
setFiles: Dispatch<SetStateAction<FileItemType[]>>;
previewFile: FileItemType | undefined;
setPreviewFile: Dispatch<SetStateAction<FileItemType | undefined>>;
successChunks: number;
setSuccessChunks: Dispatch<SetStateAction<number>>;
isUnselectedFile: boolean;
totalChunks: number;
onclickUpload: (e: { files: FileItemType[] }) => void;
onReSplitChunks: () => void;
price: number;
uploading: boolean;
chunkLen: number;
setChunkLen: Dispatch<number>;
showRePreview: boolean;
setReShowRePreview: Dispatch<SetStateAction<boolean>>;
};
const StateContext = createContext<useImportStoreType>({
onclickUpload: function (e: { files: FileItemType[] }): void {
throw new Error('Function not implemented.');
},
uploading: false,
files: [],
previewFile: undefined,
successChunks: 0,
isUnselectedFile: false,
totalChunks: 0,
onReSplitChunks: function (): void {
throw new Error('Function not implemented.');
},
price: 0,
chunkLen: 0,
setChunkLen: function (value: number): void {
throw new Error('Function not implemented.');
},
setFiles: function (value: React.SetStateAction<FileItemType[]>): void {
throw new Error('Function not implemented.');
},
setPreviewFile: function (value: React.SetStateAction<FileItemType | undefined>): void {
throw new Error('Function not implemented.');
},
setSuccessChunks: function (value: React.SetStateAction<number>): void {
throw new Error('Function not implemented.');
},
showRePreview: false,
setReShowRePreview: function (value: React.SetStateAction<boolean>): void {
throw new Error('Function not implemented.');
}
});
export const useImportStore = () => useContext(StateContext);
const Provider = ({
datasetId,
parentId,
unitPrice,
mode,
defaultChunkLen = 500,
importType,
onUploadSuccess,
children
}: {
datasetId: string;
parentId: string;
unitPrice: number;
mode: `${TrainingModeEnum}`;
defaultChunkLen: number;
importType: `${ImportTypeEnum}`;
onUploadSuccess: () => void;
children: React.ReactNode;
}) => {
const { t } = useTranslation();
const { toast } = useToast();
const [files, setFiles] = useState<FileItemType[]>([]);
const [successChunks, setSuccessChunks] = useState(0);
const [chunkLen, setChunkLen] = useState(defaultChunkLen);
const [previewFile, setPreviewFile] = useState<FileItemType>();
const [showRePreview, setReShowRePreview] = useState(false);
const isUnselectedFile = useMemo(() => files.length === 0, [files]);
const totalChunks = useMemo(
() => files.reduce((sum, file) => sum + file.chunks.length, 0),
[files]
);
const price = useMemo(() => {
return formatPrice(files.reduce((sum, file) => sum + file.tokens, 0) * unitPrice);
}, [files, unitPrice]);
/* start upload data */
const { mutate: onclickUpload, isLoading: uploading } = useRequest({
mutationFn: async () => {
let totalInsertion = 0;
for await (const file of files) {
const chunks = file.chunks;
// create a file collection and training bill
const [collectionId, billId] = await Promise.all([
postDatasetCollection({
datasetId,
parentId,
name: file.filename,
type: file.type,
metadata: file.metadata
}),
postCreateTrainingBill({
name: t('dataset.collections.Create Training Data', { filename: file.filename })
})
]);
// upload data
const { insertLen } = await chunksUpload({
collectionId,
billId,
chunks,
mode,
onUploading: (insertLen) => {
setSuccessChunks((state) => state + insertLen);
}
});
totalInsertion += insertLen;
}
return totalInsertion;
},
onSuccess(num) {
toast({
title: `共成功导入 ${num} 组数据,请耐心等待训练.`,
status: 'success'
});
onUploadSuccess();
},
errorToast: '导入文件失败'
});
const onReSplitChunks = useCallback(async () => {
try {
setFiles((state) =>
state.map((file) => {
const splitRes = splitText2Chunks({
text: file.text,
maxLen: chunkLen
});
return {
...file,
tokens: splitRes.tokens,
chunks: splitRes.chunks.map((chunk) => ({
q: chunk,
a: ''
}))
};
})
);
setReShowRePreview(false);
} catch (error) {
toast({
status: 'warning',
title: getErrText(error, '文本分段异常')
});
}
}, [chunkLen, toast]);
const reset = useCallback(() => {
setFiles([]);
setSuccessChunks(0);
setChunkLen(defaultChunkLen);
setPreviewFile(undefined);
setReShowRePreview(false);
}, [defaultChunkLen]);
useEffect(() => {
reset();
}, [importType, reset]);
const value = {
files,
setFiles,
previewFile,
setPreviewFile,
successChunks,
setSuccessChunks,
isUnselectedFile,
totalChunks,
price,
onReSplitChunks,
onclickUpload,
uploading,
chunkLen,
setChunkLen,
showRePreview,
setReShowRePreview
};
return <StateContext.Provider value={value}>{children}</StateContext.Provider>;
};
export default React.memo(Provider);
export const PreviewFileOrChunk = () => {
const theme = useTheme();
const { setFiles, previewFile, setPreviewFile, setReShowRePreview, totalChunks, files } =
useImportStore();
return (
<Box h={'100%'} w={'100%'}>
{!!previewFile ? (
<Box
position={'relative'}
display={['block', 'flex']}
h={'100%'}
flexDirection={'column'}
pt={[3, 6]}
bg={'myWhite.400'}
>
<Box px={[4, 8]} fontSize={['lg', 'xl']} fontWeight={'bold'} {...filenameStyles}>
{previewFile.filename}
</Box>
<CloseIcon
position={'absolute'}
right={[4, 8]}
top={4}
cursor={'pointer'}
onClick={() => setPreviewFile(undefined)}
/>
<Box
flex={'1 0 0'}
h={['auto', 0]}
overflow={'overlay'}
px={[4, 8]}
my={4}
contentEditable
dangerouslySetInnerHTML={{ __html: previewFile.text }}
fontSize={'sm'}
whiteSpace={'pre-wrap'}
wordBreak={'break-all'}
onBlur={(e) => {
// @ts-ignore
const val = e.target.innerText;
setReShowRePreview(true);
setFiles((state) =>
state.map((file) =>
file.id === previewFile.id
? {
...file,
text: val
}
: file
)
);
}}
/>
</Box>
) : (
<Box pt={[3, 6]}>
<Flex px={[4, 8]} alignItems={'center'}>
<Box fontSize={['lg', 'xl']} fontWeight={'bold'}>
({totalChunks})
</Box>
{totalChunks > 50 && (
<Box ml={2} fontSize={'sm'} color={'myhGray.500'}>
</Box>
)}
</Flex>
<Box px={[4, 8]} overflow={'overlay'}>
{files.map((file) =>
file.chunks.slice(0, 50).map((chunk, i) => (
<Box
key={i}
py={4}
bg={'myWhite.500'}
my={2}
borderRadius={'md'}
fontSize={'sm'}
_hover={{ ...hoverDeleteStyles }}
>
<Flex mb={1} px={4} userSelect={'none'}>
<Box
flexShrink={0}
px={3}
py={'1px'}
border={theme.borders.base}
borderRadius={'md'}
>
# {i + 1}
</Box>
<Box ml={2} fontSize={'sm'} color={'myhGray.500'} {...filenameStyles}>
{file.filename}
</Box>
<Box flex={1} />
<DeleteIcon
onClick={() => {
setFiles((state) =>
state.map((stateFile) =>
stateFile.id === file.id
? {
...file,
chunks: [...file.chunks.slice(0, i), ...file.chunks.slice(i + 1)]
}
: stateFile
)
);
}}
/>
</Flex>
<Box
px={4}
fontSize={'sm'}
whiteSpace={'pre-wrap'}
wordBreak={'break-all'}
contentEditable={!chunk.a}
dangerouslySetInnerHTML={{
__html: chunk.a ? `q:${chunk.q}\na:${chunk.a}` : chunk.q
}}
onBlur={(e) => {
// @ts-ignore
const val = e.target.innerText;
/* delete file */
if (val === '') {
setFiles((state) =>
state.map((stateFile) =>
stateFile.id === file.id
? {
...file,
chunks: [...file.chunks.slice(0, i), ...file.chunks.slice(i + 1)]
}
: stateFile
)
);
} else {
// update chunk
setFiles((stateFiles) =>
stateFiles.map((stateFile) =>
file.id === stateFile.id
? {
...stateFile,
chunks: stateFile.chunks.map((chunk, index) => ({
...chunk,
index: i === index ? val : chunk.q
}))
}
: stateFile
)
);
}
}}
/>
</Box>
))
)}
</Box>
</Box>
)}
</Box>
);
};
export const SelectorContainer = ({
fileExtension,
showUrlFetch,
showCreateFile,
children
}: {
fileExtension: string;
showUrlFetch?: boolean;
showCreateFile?: boolean;
children: React.ReactNode;
}) => {
const { files, setPreviewFile, isUnselectedFile, setFiles, chunkLen } = useImportStore();
return (
<Box
h={'100%'}
overflowY={'auto'}
flex={['auto', '1 0 400px']}
{...(isUnselectedFile
? {}
: {
maxW: ['auto', '500px']
})}
p={[4, 8]}
>
<FileSelect
fileExtension={fileExtension}
onPushFiles={(files) => {
setFiles((state) => files.concat(state));
}}
chunkLen={chunkLen}
showUrlFetch={showUrlFetch}
showCreateFile={showCreateFile}
py={isUnselectedFile ? '100px' : 5}
/>
{!isUnselectedFile && (
<Box py={4} px={2} maxH={'400px'} overflowY={'auto'}>
{files.map((item) => (
<Flex
key={item.id}
w={'100%'}
_notLast={{ mb: 5 }}
px={5}
py={2}
boxShadow={'1px 1px 5px rgba(0,0,0,0.15)'}
borderRadius={'md'}
cursor={'pointer'}
position={'relative'}
alignItems={'center'}
_hover={{
bg: 'myBlue.100',
'& .delete': {
display: 'block'
}
}}
onClick={() => setPreviewFile(item)}
>
<Image src={item.icon} w={'16px'} alt={''} />
<Box ml={2} flex={'1 0 0'} pr={3} {...filenameStyles}>
{item.filename}
</Box>
<MyIcon
position={'absolute'}
right={3}
className="delete"
name={'delete'}
w={'16px'}
_hover={{ color: 'red.600' }}
display={['block', 'none']}
onClick={(e) => {
e.stopPropagation();
setFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
))}
</Box>
)}
{!isUnselectedFile && <>{children}</>}
</Box>
);
};

View File

@@ -0,0 +1,100 @@
import React, { useState, useMemo } from 'react';
import { Box, Flex, Button, Input } from '@chakra-ui/react';
import { useConfirm } from '@/web/common/hooks/useConfirm';
import { formatPrice } from '@fastgpt/global/common/bill/tools';
import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon, InfoOutlineIcon } from '@chakra-ui/icons';
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
import { replaceVariable } from '@/global/common/string/tools';
import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
const QAImport = () => {
const { datasetDetail } = useDatasetStore();
const vectorModel = datasetDetail.vectorModel;
const unitPrice = vectorModel?.price || 0.2;
const {
successChunks,
totalChunks,
isUnselectedFile,
price,
onclickUpload,
onReSplitChunks,
uploading,
showRePreview
} = useImportStore();
const { openConfirm, ConfirmModal } = useConfirm({
content: `该任务无法终止!导入后会自动调用大模型生成问答对,会有一些细节丢失,请确认!如果余额不足,未完成的任务会被暂停。`
});
const [prompt, setPrompt] = useState('');
const previewQAPrompt = useMemo(() => {
return replaceVariable(Prompt_AgentQA.prompt, {
theme: prompt || Prompt_AgentQA.defaultTheme
});
}, [prompt]);
return (
<Box display={['block', 'flex']} h={['auto', '100%']}>
<SelectorContainer fileExtension={fileExtension}>
{/* prompt */}
<Box py={5}>
<Box mb={2}>
QA {' '}
<MyTooltip label={previewQAPrompt} forceShow>
<InfoOutlineIcon ml={1} />
</MyTooltip>
</Box>
<Flex alignItems={'center'} fontSize={'sm'}>
<Box mr={2}></Box>
<Input
fontSize={'sm'}
flex={1}
placeholder={Prompt_AgentQA.defaultTheme}
bg={'myWhite.500'}
defaultValue={prompt}
onChange={(e) => setPrompt(e.target.value || '')}
/>
</Flex>
</Box>
{/* price */}
<Flex py={5} alignItems={'center'}>
<Box>
<MyTooltip
label={`索引生成计费为: ${formatPrice(unitPrice, 1000)}/1k tokens`}
forceShow
>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<Box ml={4}>{price}</Box>
</Flex>
<Flex mt={3}>
{showRePreview && (
<Button variant={'base'} mr={4} onClick={onReSplitChunks}>
</Button>
)}
<Button isDisabled={uploading} onClick={openConfirm(onclickUpload)}>
{uploading ? <Box>{Math.round((successChunks / totalChunks) * 100)}%</Box> : '确认导入'}
</Button>
</Flex>
</SelectorContainer>
{!isUnselectedFile && (
<Box flex={['auto', '1 0 0']} h={'100%'} overflowY={'auto'}>
<PreviewFileOrChunk />
</Box>
)}
<ConfirmModal />
</Box>
);
};
export default QAImport;

View File

@@ -0,0 +1,67 @@
import React, { useRef } from 'react';
import { useTranslation } from 'next-i18next';
import MyModal from '@/components/MyModal';
import { Box, Button, ModalBody, ModalFooter, Textarea } from '@chakra-ui/react';
import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d';
import { useRequest } from '@/web/common/hooks/useRequest';
import { postFetchUrls } from '@/web/common/plugin/api';
const UrlFetchModal = ({
onClose,
onSuccess
}: {
onClose: () => void;
onSuccess: (e: FetchResultItem[]) => void;
}) => {
const { t } = useTranslation();
const Dom = useRef<HTMLTextAreaElement>(null);
const { mutate, isLoading } = useRequest({
mutationFn: async () => {
const val = Dom.current?.value || '';
const urls = val.split('\n').filter((e) => e);
const res = await postFetchUrls(urls);
onSuccess(res);
onClose();
},
errorToast: '获取链接失败'
});
return (
<MyModal
title={
<>
<Box>{t('file.Fetch Url')}</Box>
<Box fontWeight={'normal'} fontSize={'sm'} color={'myGray.500'} mt={1}>
</Box>
</>
}
top={'15vh'}
isOpen
onClose={onClose}
w={'600px'}
>
<ModalBody>
<Textarea
ref={Dom}
rows={12}
whiteSpace={'nowrap'}
resize={'both'}
placeholder={'最多10个链接每行一个。'}
/>
</ModalBody>
<ModalFooter>
<Button variant={'base'} mr={4} onClick={onClose}>
</Button>
<Button isLoading={isLoading} onClick={mutate}>
</Button>
</ModalFooter>
</MyModal>
);
};
export default UrlFetchModal;