4.6.2-production (#518)

This commit is contained in:
Archer
2023-11-26 16:13:45 +08:00
committed by GitHub
parent 3acbf1ab17
commit f818260711
30 changed files with 477 additions and 283 deletions

View File

@@ -16,10 +16,12 @@ import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
import { useTranslation } from 'next-i18next';
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
const fileExtension = '.txt, .docx, .pdf, .md';
const ChunkImport = () => {
const { t } = useTranslation();
const { datasetDetail } = useDatasetStore();
const vectorModel = datasetDetail.vectorModel;
const unitPrice = vectorModel?.price || 0.2;
@@ -48,13 +50,8 @@ const ChunkImport = () => {
{/* chunk size */}
<Flex py={4} alignItems={'center'}>
<Box>
<MyTooltip
label={
'按结束标点符号进行分段。前后段落会有 20% 的内容重叠。\n中文文档建议不要超过1000英文不要超过1500'
}
forceShow
>
{t('core.dataset.import.Ideal chunk length')}
<MyTooltip label={t('core.dataset.import.Ideal chunk length Tips')} forceShow>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>

View File

@@ -48,6 +48,7 @@ export interface Props extends BoxProps {
onPushFiles: (files: FileItemType[]) => void;
tipText?: string;
chunkLen?: number;
overlapRatio?: number;
fileTemplate?: {
type: string;
filename: string;
@@ -63,6 +64,7 @@ const FileSelect = ({
onPushFiles,
tipText,
chunkLen = 500,
overlapRatio,
fileTemplate,
showUrlFetch = true,
showCreateFile = true,
@@ -97,6 +99,13 @@ const FileSelect = ({
// select file
const onSelectFile = useCallback(
async (files: File[]) => {
if (files.length >= 100) {
return toast({
status: 'warning',
title: t('common.file.Select file amount limit 100')
});
}
try {
for await (let file of files) {
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
@@ -165,7 +174,6 @@ const FileSelect = ({
return readTxtContent(file);
case 'pdf':
return readPdfContent(file);
case 'doc':
case 'docx':
return readDocContent(file);
}
@@ -176,7 +184,8 @@ const FileSelect = ({
text = simpleText(text);
const splitRes = splitText2Chunks({
text,
maxLen: chunkLen
chunkLen,
overlapRatio
});
const fileItem: FileItemType = {
@@ -206,7 +215,7 @@ const FileSelect = ({
}
setSelectingText(undefined);
},
[chunkLen, datasetDetail._id, onPushFiles, t, toast]
[chunkLen, datasetDetail._id, onPushFiles, overlapRatio, t, toast]
);
// link fetch
const onUrlFetch = useCallback(
@@ -214,7 +223,8 @@ const FileSelect = ({
const result: FileItemType[] = e.map(({ url, content }) => {
const splitRes = splitText2Chunks({
text: content,
maxLen: chunkLen
chunkLen,
overlapRatio
});
return {
id: nanoid(),
@@ -234,7 +244,7 @@ const FileSelect = ({
});
onPushFiles(result);
},
[chunkLen, onPushFiles]
[chunkLen, onPushFiles, overlapRatio]
);
// manual create file and copy data
const onCreateFile = useCallback(
@@ -255,7 +265,8 @@ const FileSelect = ({
const splitRes = splitText2Chunks({
text: content,
maxLen: chunkLen
chunkLen,
overlapRatio
});
onPushFiles([
@@ -276,7 +287,7 @@ const FileSelect = ({
}
]);
},
[chunkLen, datasetDetail._id, onPushFiles]
[chunkLen, datasetDetail._id, onPushFiles, overlapRatio]
);
const handleDragEnter = (e: DragEvent<HTMLDivElement>) => {

View File

@@ -41,16 +41,19 @@ const ImportData = ({
const map = {
[ImportTypeEnum.chunk]: {
defaultChunkLen: vectorModel?.defaultToken || 500,
chunkOverlapRatio: 0.2,
unitPrice: vectorModel?.price || 0.2,
mode: TrainingModeEnum.chunk
},
[ImportTypeEnum.qa]: {
defaultChunkLen: agentModel?.maxContext * 0.6 || 9000,
defaultChunkLen: agentModel?.maxContext * 0.6 || 8000,
chunkOverlapRatio: 0,
unitPrice: agentModel?.price || 3,
mode: TrainingModeEnum.qa
},
[ImportTypeEnum.csv]: {
defaultChunkLen: vectorModel?.defaultToken || 500,
chunkOverlapRatio: 0,
unitPrice: vectorModel?.price || 0.2,
mode: TrainingModeEnum.chunk
}

View File

@@ -44,6 +44,7 @@ type useImportStoreType = {
price: number;
uploading: boolean;
chunkLen: number;
chunkOverlapRatio: number;
setChunkLen: Dispatch<number>;
showRePreview: boolean;
setReShowRePreview: Dispatch<SetStateAction<boolean>>;
@@ -66,6 +67,7 @@ const StateContext = createContext<useImportStoreType>({
},
price: 0,
chunkLen: 0,
chunkOverlapRatio: 0,
setChunkLen: function (value: number): void {
throw new Error('Function not implemented.');
},
@@ -93,6 +95,7 @@ const Provider = ({
vectorModel,
agentModel,
defaultChunkLen = 500,
chunkOverlapRatio = 0.2,
importType,
onUploadSuccess,
children
@@ -104,6 +107,7 @@ const Provider = ({
vectorModel: string;
agentModel: string;
defaultChunkLen: number;
chunkOverlapRatio: number;
importType: `${ImportTypeEnum}`;
onUploadSuccess: () => void;
children: React.ReactNode;
@@ -180,7 +184,8 @@ const Provider = ({
state.map((file) => {
const splitRes = splitText2Chunks({
text: file.text,
maxLen: chunkLen
chunkLen,
overlapRatio: chunkOverlapRatio
});
return {
@@ -228,6 +233,7 @@ const Provider = ({
onclickUpload,
uploading,
chunkLen,
chunkOverlapRatio,
setChunkLen,
showRePreview,
setReShowRePreview
@@ -413,7 +419,8 @@ export const SelectorContainer = ({
tip?: string;
children: React.ReactNode;
}) => {
const { files, setPreviewFile, isUnselectedFile, setFiles, chunkLen } = useImportStore();
const { files, setPreviewFile, isUnselectedFile, setFiles, chunkLen, chunkOverlapRatio } =
useImportStore();
return (
<Box
h={'100%'}
@@ -432,6 +439,7 @@ export const SelectorContainer = ({
setFiles((state) => files.concat(state));
}}
chunkLen={chunkLen}
overlapRatio={chunkOverlapRatio}
showUrlFetch={showUrlFetch}
showCreateFile={showCreateFile}
fileTemplate={fileTemplate}

View File

@@ -1,15 +1,14 @@
import React, { useState, useMemo } from 'react';
import { Box, Flex, Button, Input } from '@chakra-ui/react';
import React, { useState } from 'react';
import { Box, Flex, Button, Textarea } from '@chakra-ui/react';
import { useConfirm } from '@/web/common/hooks/useConfirm';
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon, InfoOutlineIcon } from '@chakra-ui/icons';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { useImportStore, SelectorContainer, PreviewFileOrChunk } from './Provider';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
const fileExtension = '.txt, .docx, .pdf, .md';
const QAImport = () => {
const { datasetDetail } = useDatasetStore();
@@ -31,36 +30,27 @@ const QAImport = () => {
content: `该任务无法终止!导入后会自动调用大模型生成问答对,会有一些细节丢失,请确认!如果余额不足,未完成的任务会被暂停。`
});
const [prompt, setPrompt] = useState('');
const previewQAPrompt = useMemo(() => {
return replaceVariable(Prompt_AgentQA.prompt, {
theme: prompt || Prompt_AgentQA.defaultTheme
});
}, [prompt]);
const [prompt, setPrompt] = useState(Prompt_AgentQA.description);
return (
<Box display={['block', 'flex']} h={['auto', '100%']}>
<SelectorContainer fileExtension={fileExtension}>
{/* prompt */}
<Box py={5}>
<Box mb={2}>
QA {' '}
<MyTooltip label={previewQAPrompt} forceShow>
<InfoOutlineIcon ml={1} />
</MyTooltip>
<Box p={3} bg={'myWhite.600'} borderRadius={'md'}>
<Box mb={1} fontWeight={'bold'}>
QA
</Box>
<Flex alignItems={'center'} fontSize={'sm'}>
<Box mr={2}></Box>
<Input
fontSize={'sm'}
flex={1}
placeholder={Prompt_AgentQA.defaultTheme}
bg={'myWhite.500'}
<Box whiteSpace={'pre-wrap'} fontSize={'sm'}>
<Textarea
defaultValue={prompt}
onChange={(e) => setPrompt(e.target.value || '')}
rows={8}
fontSize={'sm'}
onChange={(e) => {
setPrompt(e.target.value);
}}
/>
</Flex>
<Box>{Prompt_AgentQA.fixedText}</Box>
</Box>
</Box>
{/* price */}
<Flex py={5} alignItems={'center'}>
@@ -81,10 +71,7 @@ const QAImport = () => {
</Button>
)}
<Button
isDisabled={uploading}
onClick={openConfirm(() => onclickUpload({ prompt: previewQAPrompt }))}
>
<Button isDisabled={uploading} onClick={openConfirm(() => onclickUpload({ prompt }))}>
{uploading ? <Box>{Math.round((successChunks / totalChunks) * 100)}%</Box> : '确认导入'}
</Button>
</Flex>