This commit is contained in:
archer
2023-07-17 10:20:30 +08:00
parent 98a5796592
commit 246283ee1c
46 changed files with 1747 additions and 780 deletions

View File

@@ -1,18 +1,5 @@
import React, { useCallback, useState, useRef, useEffect } from 'react';
import {
Box,
Card,
IconButton,
Flex,
Button,
useDisclosure,
Menu,
MenuButton,
MenuList,
MenuItem,
Input,
Grid
} from '@chakra-ui/react';
import { Box, Card, IconButton, Flex, Button, Input, Grid } from '@chakra-ui/react';
import type { KbDataItemType } from '@/types/plugin';
import { usePagination } from '@/hooks/usePagination';
import {
@@ -26,15 +13,14 @@ import { fileDownload } from '@/utils/file';
import { useMutation, useQuery } from '@tanstack/react-query';
import { useToast } from '@/hooks/useToast';
import Papa from 'papaparse';
import dynamic from 'next/dynamic';
import InputModal, { FormData as InputDataType } from './InputDataModal';
import { debounce } from 'lodash';
import { getErrText } from '@/utils/tools';
const SelectFileModal = dynamic(() => import('./SelectFileModal'), { ssr: true });
const SelectCsvModal = dynamic(() => import('./SelectCsvModal'), { ssr: true });
import MyIcon from '@/components/Icon';
import MyTooltip from '@/components/MyTooltip';
const DataCard = ({ kbId }: { kbId: string }) => {
const BoxRef = useRef<HTMLDivElement>(null);
const lastSearch = useRef('');
const [searchText, setSearchText] = useState('');
const { toast } = useToast();
@@ -46,74 +32,61 @@ const DataCard = ({ kbId }: { kbId: string }) => {
Pagination,
total,
getData,
pageNum
pageNum,
pageSize
} = usePagination<KbDataItemType>({
api: getKbDataList,
pageSize: 24,
defaultRequest: false,
params: {
kbId,
searchText
},
onChange() {
if (BoxRef.current) {
BoxRef.current.scrollTop = 0;
}
}
});
const [editInputData, setEditInputData] = useState<InputDataType>();
const {
isOpen: isOpenSelectFileModal,
onOpen: onOpenSelectFileModal,
onClose: onCloseSelectFileModal
} = useDisclosure();
const {
isOpen: isOpenSelectCsvModal,
onOpen: onOpenSelectCsvModal,
onClose: onCloseSelectCsvModal
} = useDisclosure();
const { data: { qaListLen = 0, vectorListLen = 0 } = {}, refetch } = useQuery(
['getModelSplitDataList', kbId],
() => getTrainingData({ kbId, init: false }),
{
const { data: { qaListLen = 0, vectorListLen = 0 } = {}, refetch: refetchTrainingData } =
useQuery(['getModelSplitDataList', kbId], () => getTrainingData({ kbId, init: false }), {
onError(err) {
console.log(err);
}
}
);
});
const refetchData = useCallback(
(num = pageNum) => {
getData(num);
refetch();
refetchTrainingData();
return null;
},
[getData, pageNum, refetch]
[getData, pageNum, refetchTrainingData]
);
// get al data and export csv
const { mutate: onclickExport, isLoading: isLoadingExport = false } = useMutation({
mutationFn: () => getExportDataList(kbId),
onSuccess(res) {
try {
const text = Papa.unparse({
fields: ['question', 'answer', 'source'],
data: res
});
fileDownload({
text,
type: 'text/csv',
filename: 'data.csv'
});
toast({
title: '导出成功,下次导出需要半小时后',
status: 'success'
});
} catch (error) {
error;
}
const text = Papa.unparse({
fields: ['question', 'answer', 'source'],
data: res
});
fileDownload({
text,
type: 'text/csv',
filename: 'data.csv'
});
toast({
title: '导出成功,下次导出需要半小时后',
status: 'success'
});
},
onError(err: any) {
toast({
title: typeof err === 'string' ? err : err?.message || '导出异常',
title: getErrText(err, '导出异常'),
status: 'error'
});
console.log(err);
@@ -134,59 +107,39 @@ const DataCard = ({ kbId }: { kbId: string }) => {
enabled: qaListLen > 0 || vectorListLen > 0
});
useEffect(() => {
setSearchText('');
getData(1);
}, [kbId]);
return (
<Box position={'relative'} px={5} py={[1, 5]}>
<Box ref={BoxRef} position={'relative'} px={5} py={[1, 5]} h={'100%'} overflow={'overlay'}>
<Flex justifyContent={'space-between'}>
<Box fontWeight={'bold'} fontSize={'lg'} mr={2}>
: {total}
</Box>
<Box>
<IconButton
icon={<RepeatIcon />}
aria-label={'refresh'}
variant={'base'}
isLoading={isLoading}
mr={[2, 4]}
size={'sm'}
onClick={() => {
getData(pageNum);
getTrainingData({ kbId, init: true });
}}
/>
<MyTooltip label={'刷新'}>
<IconButton
icon={<RepeatIcon />}
aria-label={'refresh'}
variant={'base'}
isLoading={isLoading}
mr={[2, 4]}
size={'sm'}
onClick={() => {
getData(pageNum);
getTrainingData({ kbId, init: true });
}}
/>
</MyTooltip>
<Button
variant={'base'}
mr={2}
size={'sm'}
variant={'base'}
borderColor={'myBlue.600'}
color={'myBlue.600'}
isLoading={isLoadingExport || isLoading}
title={'半小时仅能导出1次'}
onClick={() => onclickExport()}
>
csv
</Button>
<Menu autoSelect={false}>
<MenuButton as={Button} size={'sm'} isLoading={isLoading}>
</MenuButton>
<MenuList>
<MenuItem
onClick={() =>
setEditInputData({
a: '',
q: ''
})
}
>
</MenuItem>
<MenuItem onClick={onOpenSelectFileModal}>/</MenuItem>
<MenuItem onClick={onOpenSelectCsvModal}>csv </MenuItem>
</MenuList>
</Menu>
</Box>
</Flex>
<Flex my={4}>
@@ -204,7 +157,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
maxW={['60%', '300px']}
size={'sm'}
value={searchText}
placeholder="根据匹配知识,补充知识和来源搜索"
placeholder="根据匹配知识,补充知识和来源进行搜索"
onChange={(e) => {
setSearchText(e.target.value);
getFirstData();
@@ -245,7 +198,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
}
>
<Box
h={'100px'}
h={'95px'}
overflow={'hidden'}
wordBreak={'break-all'}
px={3}
@@ -255,7 +208,9 @@ const DataCard = ({ kbId }: { kbId: string }) => {
<Box color={'myGray.1000'} mb={2}>
{item.q}
</Box>
<Box color={'myGray.600'}>{item.a}</Box>
<Box color={'myGray.600'} className={'textEllipsis3'}>
{item.a}
</Box>
</Box>
<Flex py={2} px={4} h={'36px'} alignItems={'flex-end'} fontSize={'sm'}>
<Box className={'textEllipsis'} flex={1}>
@@ -292,9 +247,19 @@ const DataCard = ({ kbId }: { kbId: string }) => {
))}
</Grid>
<Flex mt={2} justifyContent={'center'}>
<Pagination />
</Flex>
{total > pageSize && (
<Flex mt={2} justifyContent={'center'}>
<Pagination />
</Flex>
)}
{total === 0 && (
<Flex h={'100%'} flexDirection={'column'} alignItems={'center'} pt={'10vh'}>
<MyIcon name="empty" w={'48px'} h={'48px'} color={'transparent'} />
<Box mt={2} color={'myGray.500'}>
</Box>
</Flex>
)}
{editInputData !== undefined && (
<InputModal
@@ -304,12 +269,6 @@ const DataCard = ({ kbId }: { kbId: string }) => {
onSuccess={() => refetchData()}
/>
)}
{isOpenSelectFileModal && (
<SelectFileModal kbId={kbId} onClose={onCloseSelectFileModal} onSuccess={refetchData} />
)}
{isOpenSelectCsvModal && (
<SelectCsvModal kbId={kbId} onClose={onCloseSelectCsvModal} onSuccess={refetchData} />
)}
</Box>
);
};

View File

@@ -0,0 +1,83 @@
import React, { useState } from 'react';
import { Box, type BoxProps, Flex, Textarea, useTheme } from '@chakra-ui/react';
import MyRadio from '@/components/Radio/index';
import dynamic from 'next/dynamic';
import ManualImport from './Import/Manual';
const ChunkImport = dynamic(() => import('./Import/Chunk'), {
ssr: true
});
const QAImport = dynamic(() => import('./Import/QA'), {
ssr: true
});
const CsvImport = dynamic(() => import('./Import/Csv'), {
ssr: true
});
enum ImportTypeEnum {
manual = 'manual',
index = 'index',
qa = 'qa',
csv = 'csv'
}
const ImportData = ({ kbId }: { kbId: string }) => {
const theme = useTheme();
const [importType, setImportType] = useState<`${ImportTypeEnum}`>(ImportTypeEnum.manual);
const TitleStyle: BoxProps = {
fontWeight: 'bold',
fontSize: ['md', 'xl'],
mb: [3, 5]
};
return (
<Flex flexDirection={'column'} h={'100%'} pt={[1, 5]}>
<Box {...TitleStyle} px={[4, 8]}>
</Box>
<Box pb={[5, 7]} px={[4, 8]} borderBottom={theme.borders.base}>
<MyRadio
gridTemplateColumns={['repeat(1,1fr)', 'repeat(2, 350px)']}
list={[
{
icon: 'manualImport',
title: '手动输入',
desc: '手动输入问答对,是最精准的数据',
value: ImportTypeEnum.manual
},
{
icon: 'indexImport',
title: '直接分段',
desc: '选择文本文件,直接将其按分段进行处理',
value: ImportTypeEnum.index
},
{
icon: 'qaImport',
title: 'QA拆分',
desc: '选择文本文件,让大模型自动生成问答对',
value: ImportTypeEnum.qa
},
{
icon: 'csvImport',
title: 'CSV 导入',
desc: '批量导入问答对,是最精准的数据',
value: ImportTypeEnum.csv
}
]}
value={importType}
onChange={(e) => setImportType(e as `${ImportTypeEnum}`)}
/>
</Box>
<Box flex={'1 0 0'} h={0}>
{importType === ImportTypeEnum.manual && <ManualImport kbId={kbId} />}
{importType === ImportTypeEnum.index && <ChunkImport kbId={kbId} />}
{importType === ImportTypeEnum.qa && <QAImport kbId={kbId} />}
{importType === ImportTypeEnum.csv && <CsvImport kbId={kbId} />}
</Box>
</Flex>
);
};
export default ImportData;

View File

@@ -0,0 +1,459 @@
import React, { useState, useCallback, useMemo } from 'react';
import {
Box,
Flex,
Button,
useTheme,
NumberInput,
NumberInputField,
NumberInputStepper,
NumberIncrementStepper,
NumberDecrementStepper,
Image,
Textarea
} from '@chakra-ui/react';
import { useToast } from '@/hooks/useToast';
import { useConfirm } from '@/hooks/useConfirm';
import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file';
import { useMutation } from '@tanstack/react-query';
import { postKbDataFromList } from '@/api/plugins/kb';
import { splitText_token } from '@/utils/file';
import { getErrText } from '@/utils/tools';
import { formatPrice } from '@/utils/user';
import { vectorModelList } from '@/store/static';
import MyIcon from '@/components/Icon';
import CloseIcon from '@/components/Icon/close';
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { fileImgs } from '@/constants/common';
import { customAlphabet } from 'nanoid';
import { TrainingModeEnum } from '@/constants/plugin';
import FileSelect from './FileSelect';
import { useRouter } from 'next/router';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
type FileItemType = {
id: string;
filename: string;
text: string;
icon: string;
chunks: string[];
tokens: number;
};
const ChunkImport = ({ kbId }: { kbId: string }) => {
const model = vectorModelList[0]?.model;
const unitPrice = vectorModelList[0]?.price || 0.2;
const theme = useTheme();
const router = useRouter();
const { toast } = useToast();
const [chunkLen, setChunkLen] = useState(500);
const [showRePreview, setShowRePreview] = useState(false);
const [selecting, setSelecting] = useState(false);
const [files, setFiles] = useState<FileItemType[]>([]);
const [previewFile, setPreviewFile] = useState<FileItemType>();
const [successChunks, setSuccessChunks] = useState(0);
const totalChunk = useMemo(
() => files.reduce((sum, file) => sum + file.chunks.length, 0),
[files]
);
const emptyFiles = useMemo(() => files.length === 0, [files]);
// price count
const price = useMemo(() => {
return formatPrice(files.reduce((sum, file) => sum + file.tokens, 0) * unitPrice);
}, [files, unitPrice]);
const { openConfirm, ConfirmChild } = useConfirm({
content: `该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。`
});
const onSelectFile = useCallback(
async (files: File[]) => {
setSelecting(true);
try {
let promise = Promise.resolve();
files.forEach((file) => {
promise = promise.then(async () => {
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
const icon = fileImgs.find((item) => new RegExp(item.reg).test(file.name))?.src;
const text = await (async () => {
switch (extension) {
case 'txt':
case 'md':
return readTxtContent(file);
case 'pdf':
return readPdfContent(file);
case 'doc':
case 'docx':
return readDocContent(file);
}
return '';
})();
if (icon && text) {
const splitRes = splitText_token({
text: text,
maxLen: chunkLen
});
setFiles((state) => [
{
id: nanoid(),
filename: file.name,
text,
icon,
...splitRes
},
...state
]);
}
});
});
await promise;
} catch (error: any) {
console.log(error);
toast({
title: typeof error === 'string' ? error : '解析文件失败',
status: 'error'
});
}
setSelecting(false);
},
[chunkLen, toast]
);
const { mutate: onclickUpload, isLoading: uploading } = useMutation({
mutationFn: async () => {
const chunks: { a: string; q: string; source: string }[] = [];
files.forEach((file) =>
file.chunks.forEach((chunk) => {
chunks.push({
q: chunk,
a: '',
source: file.filename
});
})
);
// subsection import
let success = 0;
const step = 100;
for (let i = 0; i < chunks.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
model,
data: chunks.slice(i, i + step),
mode: TrainingModeEnum.index
});
success += insertLen;
setSuccessChunks(success);
}
toast({
title: `去重后共导入 ${success} 条数据,请耐心等待训练.`,
status: 'success'
});
router.replace({
query: {
kbId,
currentTab: 'data'
}
});
},
onError(err) {
toast({
title: getErrText(err, '导入文件失败'),
status: 'error'
});
}
});
const onRePreview = useCallback(async () => {
try {
const splitRes = files.map((item) =>
splitText_token({
text: item.text,
maxLen: chunkLen
})
);
setFiles((state) =>
state.map((file, index) => ({
...file,
...splitRes[index]
}))
);
setPreviewFile(undefined);
setShowRePreview(false);
} catch (error) {
toast({
status: 'warning',
title: getErrText(error, '文本分段异常')
});
}
}, [chunkLen, files, toast]);
return (
<Box display={['block', 'flex']} h={['auto', '100%']}>
<Box flex={1} minW={['auto', '400px']} w={['100%', 0]} p={[4, 8]}>
<FileSelect
fileExtension={fileExtension}
onSelectFile={onSelectFile}
isLoading={selecting}
py={emptyFiles ? '100px' : 5}
/>
{!emptyFiles && (
<>
<Box py={4} maxH={'400px'}>
{files.map((item) => (
<Flex
key={item.id}
w={'100%'}
_notLast={{ mb: 5 }}
px={5}
py={2}
boxShadow={'1px 1px 5px rgba(0,0,0,0.15)'}
borderRadius={'md'}
cursor={'pointer'}
position={'relative'}
alignItems={'center'}
_hover={{
bg: 'myBlue.100',
'& .delete': {
display: 'block'
}
}}
onClick={() => setPreviewFile(item)}
>
<Image src={item.icon} w={'16px'} alt={''} />
<Box ml={2} flex={'1 0 0'} pr={3} className="textEllipsis">
{item.filename}
</Box>
<MyIcon
position={'absolute'}
right={3}
className="delete"
name={'delete'}
w={'16px'}
_hover={{ color: 'red.600' }}
display={['block', 'none']}
onClick={(e) => {
e.stopPropagation();
setFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
))}
</Box>
{/* chunk size */}
<Flex py={5} alignItems={'center'}>
<Box>
<MyTooltip
label={'基于 Gpt3.5 的 Token 计算方法进行分段。前后段落会有 30% 的内容重叠。'}
>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<NumberInput
ml={4}
flex={1}
defaultValue={chunkLen}
min={300}
max={1000}
step={10}
onChange={(e) => {
setChunkLen(+e);
setShowRePreview(true);
}}
>
<NumberInputField />
<NumberInputStepper>
<NumberIncrementStepper />
<NumberDecrementStepper />
</NumberInputStepper>
</NumberInput>
</Flex>
{/* price */}
<Flex py={5} alignItems={'center'}>
<Box>
<MyTooltip label={`索引生成计费为: ${formatPrice(unitPrice, 1000)}/1k tokens`}>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<Box ml={4}>
{}
{price}
</Box>
</Flex>
<Flex mt={3}>
{showRePreview && (
<Button variant={'base'} mr={4} onClick={onRePreview}>
</Button>
)}
<Button isDisabled={uploading} onClick={openConfirm(onclickUpload)}>
{uploading ? (
<Box>{Math.round((successChunks / totalChunk) * 100)}%</Box>
) : (
'确认导入'
)}
</Button>
</Flex>
</>
)}
</Box>
{!emptyFiles && (
<Box flex={'2 0 0'} w={['100%', 0]} h={'100%'}>
{previewFile ? (
<Box
position={'relative'}
display={['block', 'flex']}
h={'100%'}
flexDirection={'column'}
pt={[4, 8]}
bg={'myWhite.400'}
>
<Box px={[4, 8]} fontSize={['lg', 'xl']} fontWeight={'bold'}>
{previewFile.filename}
</Box>
<CloseIcon
position={'absolute'}
right={[4, 8]}
top={4}
onClick={() => setPreviewFile(undefined)}
/>
<Box
flex={'1 0 0'}
h={['auto', 0]}
overflow={'overlay'}
px={[4, 8]}
my={4}
contentEditable
dangerouslySetInnerHTML={{ __html: previewFile.text }}
fontSize={'sm'}
whiteSpace={'pre-wrap'}
wordBreak={'break-all'}
onBlur={(e) => {
// @ts-ignore
const val = e.target.innerText;
setShowRePreview(true);
setFiles((state) =>
state.map((file) =>
file.id === previewFile.id
? {
...file,
text: val
}
: file
)
);
}}
/>
</Box>
) : (
<Box h={'100%'} pt={[4, 8]} overflow={'overlay'}>
<Box px={[4, 8]} fontSize={['lg', 'xl']} fontWeight={'bold'}>
({totalChunk})
</Box>
<Box px={[4, 8]} overflow={'overlay'}>
{files.map((file) =>
file.chunks.map((item, i) => (
<Box
key={item}
py={4}
bg={'myWhite.500'}
my={2}
borderRadius={'md'}
fontSize={'sm'}
_hover={{ ...hoverDeleteStyles }}
>
<Flex mb={1} px={4} userSelect={'none'}>
<Box px={3} py={'1px'} border={theme.borders.base} borderRadius={'md'}>
# {i + 1}
</Box>
<Box flex={1} />
<DeleteIcon
onClick={() => {
setFiles((state) =>
state.map((stateFile) =>
stateFile.id === file.id
? {
...file,
chunks: [
...file.chunks.slice(0, i),
...file.chunks.slice(i + 1)
]
}
: stateFile
)
);
}}
/>
</Flex>
<Box
px={4}
fontSize={'sm'}
whiteSpace={'pre-wrap'}
wordBreak={'break-all'}
contentEditable
dangerouslySetInnerHTML={{ __html: item }}
onBlur={(e) => {
// @ts-ignore
const val = e.target.innerText;
if (val === '') {
setFiles((state) =>
state.map((stateFile) =>
stateFile.id === file.id
? {
...file,
chunks: [
...file.chunks.slice(0, i),
...file.chunks.slice(i + 1)
]
}
: stateFile
)
);
} else {
setFiles((state) =>
state.map((stateFile) =>
stateFile.id === file.id
? {
...file,
chunks: file.chunks.map((chunk, index) =>
i === index ? val : chunk
)
}
: stateFile
)
);
}
}}
/>
</Box>
))
)}
</Box>
</Box>
)}
</Box>
)}
<ConfirmChild />
</Box>
);
};
export default ChunkImport;

View File

@@ -0,0 +1,241 @@
import React, { useState, useCallback, useMemo } from 'react';
import { Box, Flex, Button, useTheme, Image } from '@chakra-ui/react';
import { useToast } from '@/hooks/useToast';
import { useConfirm } from '@/hooks/useConfirm';
import { useMutation } from '@tanstack/react-query';
import { postKbDataFromList } from '@/api/plugins/kb';
import { getErrText } from '@/utils/tools';
import { vectorModelList } from '@/store/static';
import MyIcon from '@/components/Icon';
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
import { customAlphabet } from 'nanoid';
import { TrainingModeEnum } from '@/constants/plugin';
import FileSelect from './FileSelect';
import { useRouter } from 'next/router';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
import { readCsvContent } from '@/utils/file';
const fileExtension = '.csv';
type FileItemType = {
id: string;
filename: string;
chunks: { q: string; a: string }[];
};
const CsvImport = ({ kbId }: { kbId: string }) => {
const model = vectorModelList[0]?.model;
const theme = useTheme();
const router = useRouter();
const { toast } = useToast();
const [selecting, setSelecting] = useState(false);
const [files, setFiles] = useState<FileItemType[]>([]);
const [successChunks, setSuccessChunks] = useState(0);
const totalChunk = useMemo(
() => files.reduce((sum, file) => sum + file.chunks.length, 0),
[files]
);
const emptyFiles = useMemo(() => files.length === 0, [files]);
const { openConfirm, ConfirmChild } = useConfirm({
content: `该任务无法终止,需要一定时间生成索引,请确认导入。如果余额不足,未完成的任务会被暂停,充值后可继续进行。`
});
const onSelectFile = useCallback(
async (files: File[]) => {
setSelecting(true);
try {
let promise = Promise.resolve();
files.forEach((file) => {
promise = promise.then(async () => {
const { header, data } = await readCsvContent(file);
if (header[0] !== 'question' || header[1] !== 'answer') {
throw new Error('csv 文件格式有误');
}
setFiles((state) => [
{
id: nanoid(),
filename: file.name,
chunks: data.map((item) => ({
q: item[0],
a: item[1]
}))
},
...state
]);
});
});
await promise;
} catch (error: any) {
console.log(error);
toast({
title: typeof error === 'string' ? error : '解析文件失败',
status: 'error'
});
}
setSelecting(false);
},
[toast]
);
const { mutate: onclickUpload, isLoading: uploading } = useMutation({
mutationFn: async () => {
const chunks: { a: string; q: string; source: string }[] = [];
files.forEach((file) =>
file.chunks.forEach((chunk) => {
chunks.push({
...chunk,
source: file.filename
});
})
);
// subsection import
let success = 0;
const step = 100;
for (let i = 0; i < chunks.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
model,
data: chunks.slice(i, i + step),
mode: TrainingModeEnum.index
});
success += insertLen;
setSuccessChunks(success);
}
toast({
title: `去重后共导入 ${success} 条数据,请耐心等待训练.`,
status: 'success'
});
router.replace({
query: {
kbId,
currentTab: 'data'
}
});
},
onError(err) {
toast({
title: getErrText(err, '导入文件失败'),
status: 'error'
});
}
});
return (
<Box display={['block', 'flex']} h={['auto', '100%']}>
<Box flex={1} minW={['auto', '400px']} w={['100%', 0]} p={[4, 8]}>
<FileSelect
fileExtension={fileExtension}
onSelectFile={onSelectFile}
isLoading={selecting}
py={emptyFiles ? '100px' : 5}
/>
{!emptyFiles && (
<>
<Box py={4} maxH={'400px'}>
{files.map((item) => (
<Flex
key={item.id}
w={'100%'}
_notLast={{ mb: 5 }}
px={5}
py={2}
boxShadow={'1px 1px 5px rgba(0,0,0,0.15)'}
borderRadius={'md'}
position={'relative'}
alignItems={'center'}
_hover={{ ...hoverDeleteStyles }}
>
<Image src={'/imgs/files/csv.svg'} w={'16px'} alt={''} />
<Box ml={2} flex={'1 0 0'} pr={3} className="textEllipsis">
{item.filename}
</Box>
<MyIcon
position={'absolute'}
right={3}
className="delete"
name={'delete'}
w={'16px'}
_hover={{ color: 'red.600' }}
display={['block', 'none']}
onClick={(e) => {
e.stopPropagation();
setFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
))}
</Box>
<Flex mt={3}>
<Button isDisabled={uploading} onClick={openConfirm(onclickUpload)}>
{uploading ? (
<Box>{Math.round((successChunks / totalChunk) * 100)}%</Box>
) : (
'确认导入'
)}
</Button>
</Flex>
</>
)}
</Box>
{!emptyFiles && (
<Box flex={'2 0 0'} w={['100%', 0]} h={'100%'} pt={[4, 8]} overflow={'overlay'}>
<Box px={[4, 8]} fontSize={['lg', 'xl']} fontWeight={'bold'}>
({totalChunk})
</Box>
<Box px={[4, 8]} overflow={'overlay'}>
{files.map((file) =>
file.chunks.slice(0, 100).map((item, i) => (
<Box
key={i}
py={4}
bg={'myWhite.500'}
my={2}
borderRadius={'md'}
fontSize={'sm'}
_hover={{ ...hoverDeleteStyles }}
>
<Flex mb={1} px={4} userSelect={'none'}>
<Box px={3} py={'1px'} border={theme.borders.base} borderRadius={'md'}>
# {i + 1}
</Box>
<Box flex={1} />
<DeleteIcon
onClick={() => {
setFiles((state) =>
state.map((stateFile) =>
stateFile.id === file.id
? {
...file,
chunks: [...file.chunks.slice(0, i), ...file.chunks.slice(i + 1)]
}
: stateFile
)
);
}}
/>
</Flex>
<Box px={4} fontSize={'sm'} whiteSpace={'pre-wrap'} wordBreak={'break-all'}>
{`q: ${item.q}\na: ${item.a}`}
</Box>
</Box>
))
)}
</Box>
</Box>
)}
<ConfirmChild />
</Box>
);
};
export default CsvImport;

View File

@@ -0,0 +1,49 @@
import React from 'react';
import { Box, Flex, type BoxProps } from '@chakra-ui/react';
import { useLoading } from '@/hooks/useLoading';
import { useSelectFile } from '@/hooks/useSelectFile';
import MyIcon from '@/components/Icon';
interface Props extends BoxProps {
fileExtension: string;
onSelectFile: (files: File[]) => Promise<void>;
isLoading?: boolean;
}
const FileSelect = ({ fileExtension, onSelectFile, isLoading, ...props }: Props) => {
const { Loading: FileSelectLoading } = useLoading();
const { File, onOpen } = useSelectFile({
fileType: fileExtension,
multiple: true
});
return (
<Box
display={'inline-block'}
textAlign={'center'}
bg={'myWhite.400'}
p={5}
borderRadius={'lg'}
border={'1px dashed'}
borderColor={'myGray.300'}
w={'100%'}
position={'relative'}
{...props}
>
<Flex justifyContent={'center'} alignItems={'center'}>
<MyIcon mr={1} name={'uploadFile'} w={'16px'} />
{' '}
<Box ml={1} as={'span'} cursor={'pointer'} color={'myBlue.700'} onClick={onOpen}>
</Box>
</Flex>
<Box mt={1}> {fileExtension} </Box>
<FileSelectLoading loading={isLoading} fixed={false} />
<File onSelect={onSelectFile} />
</Box>
);
};
export default FileSelect;

View File

@@ -0,0 +1,100 @@
import React, { useCallback, useState } from 'react';
import { Box, type BoxProps, Flex, Textarea, useTheme, Button } from '@chakra-ui/react';
import MyRadio from '@/components/Radio/index';
import { useForm } from 'react-hook-form';
import { useToast } from '@/hooks/useToast';
import { useRequest } from '@/hooks/useRequest';
import { getErrText } from '@/utils/tools';
import { vectorModelList } from '@/store/static';
import { postKbDataFromList } from '@/api/plugins/kb';
import { TrainingModeEnum } from '@/constants/plugin';
type ManualFormType = { q: string; a: string };
const ManualImport = ({ kbId }: { kbId: string }) => {
const { register, handleSubmit, reset } = useForm({
defaultValues: { q: '', a: '' }
});
const { toast } = useToast();
const { mutate: onImportData, isLoading } = useRequest({
mutationFn: async (e: ManualFormType) => {
if (e.a.length + e.q.length >= 3000) {
toast({
title: '总长度超长了',
status: 'warning'
});
return;
}
try {
const data = {
a: e.a,
q: e.q,
source: '手动录入'
};
const { insertLen } = await postKbDataFromList({
kbId,
model: vectorModelList[0].model,
mode: TrainingModeEnum.index,
data: [data]
});
if (insertLen === 0) {
toast({
title: '已存在完全一致的数据',
status: 'warning'
});
} else {
toast({
title: '导入数据成功,需要一段时间训练',
status: 'success'
});
reset({
a: '',
q: ''
});
}
} catch (err: any) {
toast({
title: getErrText(err, '出现了点意外~'),
status: 'error'
});
}
}
});
return (
<Box p={[4, 8]}>
<Box display={'flex'} flexDirection={['column', 'row']}>
<Box flex={1} mr={[0, 4]} mb={[4, 0]} h={['50%', '100%']}>
<Box h={'30px'}>{'匹配的知识点'}</Box>
<Textarea
placeholder={'匹配的知识点。这部分内容会被搜索,请把控内容的质量。总和最多 3000 字。'}
maxLength={3000}
h={['250px', '500px']}
{...register(`q`, {
required: true
})}
/>
</Box>
<Box flex={1} h={['50%', '100%']}>
<Box h={'30px'}></Box>
<Textarea
placeholder={
'补充知识。这部分内容不会被搜索,但会作为"匹配的知识点"的内容补充,你可以讲一些细节的内容填写在这里。总和最多 3000 字。'
}
h={['250px', '500px']}
maxLength={3000}
{...register('a')}
/>
</Box>
</Box>
<Button mt={5} isLoading={isLoading} onClick={handleSubmit((data) => onImportData(data))}>
</Button>
</Box>
);
};
export default React.memo(ManualImport);

View File

@@ -0,0 +1,451 @@
import React, { useState, useCallback, useMemo } from 'react';
import {
Box,
Flex,
Button,
useTheme,
NumberInput,
NumberInputField,
NumberInputStepper,
NumberIncrementStepper,
NumberDecrementStepper,
Image,
Textarea,
Input
} from '@chakra-ui/react';
import { useToast } from '@/hooks/useToast';
import { useConfirm } from '@/hooks/useConfirm';
import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file';
import { useMutation } from '@tanstack/react-query';
import { postKbDataFromList } from '@/api/plugins/kb';
import { splitText_token } from '@/utils/file';
import { getErrText } from '@/utils/tools';
import { formatPrice } from '@/utils/user';
import { qaModelList } from '@/store/static';
import MyIcon from '@/components/Icon';
import CloseIcon from '@/components/Icon/close';
import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete';
import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { fileImgs } from '@/constants/common';
import { customAlphabet } from 'nanoid';
import { TrainingModeEnum } from '@/constants/plugin';
import FileSelect from './FileSelect';
import { useRouter } from 'next/router';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const fileExtension = '.txt, .doc, .docx, .pdf, .md';
type FileItemType = {
id: string;
filename: string;
text: string;
icon: string;
chunks: string[];
tokens: number;
};
const QAImport = ({ kbId }: { kbId: string }) => {
const model = qaModelList[0]?.model;
const unitPrice = qaModelList[0]?.price || 3;
const chunkLen = qaModelList[0].maxToken / 2;
const theme = useTheme();
const router = useRouter();
const { toast } = useToast();
const [selecting, setSelecting] = useState(false);
const [files, setFiles] = useState<FileItemType[]>([]);
const [showRePreview, setShowRePreview] = useState(false);
const [previewFile, setPreviewFile] = useState<FileItemType>();
const [successChunks, setSuccessChunks] = useState(0);
const [prompt, setPrompt] = useState('');
const totalChunk = useMemo(
() => files.reduce((sum, file) => sum + file.chunks.length, 0),
[files]
);
const emptyFiles = useMemo(() => files.length === 0, [files]);
// price count
const price = useMemo(() => {
return formatPrice(files.reduce((sum, file) => sum + file.tokens, 0) * unitPrice * 1.3);
}, [files, unitPrice]);
const { openConfirm, ConfirmChild } = useConfirm({
content: `该任务无法终止!导入后会自动调用大模型生成问答对,会有一些细节丢失,请确认!如果余额不足,未完成的任务会被暂停。`
});
const onSelectFile = useCallback(
async (files: File[]) => {
setSelecting(true);
try {
let promise = Promise.resolve();
files.forEach((file) => {
promise = promise.then(async () => {
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
const icon = fileImgs.find((item) => new RegExp(item.reg).test(file.name))?.src;
const text = await (async () => {
switch (extension) {
case 'txt':
case 'md':
return readTxtContent(file);
case 'pdf':
return readPdfContent(file);
case 'doc':
case 'docx':
return readDocContent(file);
}
return '';
})();
console.log(extension, text, '=====', icon);
if (icon && text) {
const splitRes = splitText_token({
text: text,
maxLen: chunkLen
});
setFiles((state) => [
{
id: nanoid(),
filename: file.name,
text,
icon,
...splitRes
},
...state
]);
}
});
});
await promise;
} catch (error: any) {
console.log(error);
toast({
title: typeof error === 'string' ? error : '解析文件失败',
status: 'error'
});
}
setSelecting(false);
},
[chunkLen, toast]
);
const { mutate: onclickUpload, isLoading: uploading } = useMutation({
mutationFn: async () => {
const chunks: { a: string; q: string; source: string }[] = [];
files.forEach((file) =>
file.chunks.forEach((chunk) => {
chunks.push({
q: chunk,
a: '',
source: file.filename
});
})
);
// subsection import
let success = 0;
const step = 100;
for (let i = 0; i < chunks.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
model,
data: chunks.slice(i, i + step),
mode: TrainingModeEnum.qa,
prompt: prompt || '下面是一段长文本'
});
success += insertLen;
setSuccessChunks(success);
}
toast({
title: `共导入 ${success} 条数据,请耐心等待训练.`,
status: 'success'
});
router.replace({
query: {
kbId,
currentTab: 'data'
}
});
},
onError(err) {
toast({
title: getErrText(err, '导入文件失败'),
status: 'error'
});
}
});
const onRePreview = useCallback(async () => {
try {
const splitRes = files.map((item) =>
splitText_token({
text: item.text,
maxLen: chunkLen
})
);
setFiles((state) =>
state.map((file, index) => ({
...file,
...splitRes[index]
}))
);
setPreviewFile(undefined);
setShowRePreview(false);
} catch (error) {
toast({
status: 'warning',
title: getErrText(error, '文本分段异常')
});
}
}, [chunkLen, files, toast]);
return (
<Box display={['block', 'flex']} h={['auto', '100%']}>
<Box flex={1} minW={['auto', '400px']} w={['100%', 0]} p={[4, 8]}>
<FileSelect
fileExtension={fileExtension}
onSelectFile={onSelectFile}
isLoading={selecting}
py={emptyFiles ? '100px' : 5}
/>
{!emptyFiles && (
<>
<Box py={4} maxH={'400px'}>
{files.map((item) => (
<Flex
key={item.id}
w={'100%'}
_notLast={{ mb: 5 }}
px={5}
py={2}
boxShadow={'1px 1px 5px rgba(0,0,0,0.15)'}
borderRadius={'md'}
cursor={'pointer'}
position={'relative'}
alignItems={'center'}
_hover={{
bg: 'myBlue.100',
'& .delete': {
display: 'block'
}
}}
onClick={() => setPreviewFile(item)}
>
<Image src={item.icon} w={'16px'} alt={''} />
<Box ml={2} flex={'1 0 0'} pr={3} className="textEllipsis">
{item.filename}
</Box>
<MyIcon
position={'absolute'}
right={3}
className="delete"
name={'delete'}
w={'16px'}
_hover={{ color: 'red.600' }}
display={['block', 'none']}
onClick={(e) => {
e.stopPropagation();
setFiles((state) => state.filter((file) => file.id !== item.id));
}}
/>
</Flex>
))}
</Box>
{/* prompt */}
<Box py={5}>
<Box mb={2}>
QA {' '}
<MyTooltip
label={`可输入关于文件内容的范围介绍,例如:\n1. 关于 Laf 的介绍\n2. xxx的简历`}
>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<Flex alignItems={'center'} fontSize={'sm'}>
<Box mr={2}></Box>
<Input
flex={1}
placeholder={'Laf 云函数的介绍'}
bg={'myWhite.500'}
defaultValue={prompt}
onBlur={(e) => (e.target.value ? setPrompt(`下面是"${e.target.value}"`) : '')}
/>
</Flex>
</Box>
{/* price */}
<Flex py={5} alignItems={'center'}>
<Box>
<MyTooltip label={`索引生成计费为: ${formatPrice(unitPrice, 1000)}/1k tokens`}>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<Box ml={4}>{price}</Box>
</Flex>
<Flex mt={3}>
{showRePreview && (
<Button variant={'base'} mr={4} onClick={onRePreview}>
</Button>
)}
<Button isDisabled={uploading} onClick={openConfirm(onclickUpload)}>
{uploading ? (
<Box>{Math.round((successChunks / totalChunk) * 100)}%</Box>
) : (
'确认导入'
)}
</Button>
</Flex>
</>
)}
</Box>
{!emptyFiles && (
<Box flex={'2 0 0'} w={['100%', 0]} h={'100%'}>
{previewFile ? (
<Box
position={'relative'}
display={['block', 'flex']}
h={'100%'}
flexDirection={'column'}
pt={[4, 8]}
bg={'myWhite.400'}
>
<Box px={[4, 8]} fontSize={['lg', 'xl']} fontWeight={'bold'}>
{previewFile.filename}
</Box>
<CloseIcon
position={'absolute'}
right={[4, 8]}
top={4}
onClick={() => setPreviewFile(undefined)}
/>
<Box
flex={'1 0 0'}
h={['auto', 0]}
overflow={'overlay'}
px={[4, 8]}
my={4}
contentEditable
dangerouslySetInnerHTML={{ __html: previewFile.text }}
fontSize={'sm'}
whiteSpace={'pre-wrap'}
wordBreak={'break-all'}
onBlur={(e) => {
// @ts-ignore
const val = e.target.innerText;
setShowRePreview(true);
setFiles((state) =>
state.map((file) =>
file.id === previewFile.id
? {
...file,
text: val
}
: file
)
);
}}
/>
</Box>
) : (
<Box h={'100%'} pt={[4, 8]} overflow={'overlay'}>
<Box px={[4, 8]} fontSize={['lg', 'xl']} fontWeight={'bold'}>
({totalChunk})
</Box>
<Box px={[4, 8]} overflow={'overlay'}>
{files.map((file) =>
file.chunks.map((item, i) => (
<Box
key={item}
py={4}
bg={'myWhite.500'}
my={2}
borderRadius={'md'}
fontSize={'sm'}
_hover={{ ...hoverDeleteStyles }}
>
<Flex mb={1} px={4} userSelect={'none'}>
<Box px={3} py={'1px'} border={theme.borders.base} borderRadius={'md'}>
# {i + 1}
</Box>
<Box flex={1} />
<DeleteIcon
onClick={() => {
setFiles((state) =>
state.map((stateFile) =>
stateFile.id === file.id
? {
...file,
chunks: [
...file.chunks.slice(0, i),
...file.chunks.slice(i + 1)
]
}
: stateFile
)
);
}}
/>
</Flex>
<Box
px={4}
fontSize={'sm'}
whiteSpace={'pre-wrap'}
wordBreak={'break-all'}
contentEditable
dangerouslySetInnerHTML={{ __html: item }}
onBlur={(e) => {
// @ts-ignore
const val = e.target.innerText;
if (val === '') {
setFiles((state) =>
state.map((stateFile) =>
stateFile.id === file.id
? {
...file,
chunks: [
...file.chunks.slice(0, i),
...file.chunks.slice(i + 1)
]
}
: stateFile
)
);
} else {
setFiles((state) =>
state.map((stateFile) =>
stateFile.id === file.id
? {
...file,
chunks: file.chunks.map((chunk, index) =>
i === index ? val : chunk
)
}
: stateFile
)
);
}
}}
/>
</Box>
))
)}
</Box>
</Box>
)}
</Box>
)}
<ConfirmChild />
</Box>
);
};
export default QAImport;

View File

@@ -17,6 +17,8 @@ import { useConfirm } from '@/hooks/useConfirm';
import { UseFormReturn } from 'react-hook-form';
import { compressImg } from '@/utils/file';
import type { KbItemType } from '@/types/plugin';
import { vectorModelList } from '@/store/static';
import MySelect from '@/components/Select';
import Avatar from '@/components/Avatar';
import Tag from '@/components/Tag';
import MyTooltip from '@/components/MyTooltip';
@@ -144,18 +146,18 @@ const Info = (
}));
return (
<Flex p={5} flexDirection={'column'} alignItems={'center'}>
<Flex mt={5} w={'100%'} maxW={'350px'} alignItems={'center'}>
<Box flex={'0 0 90px'} w={0}>
<Box py={5} px={[5, 10]}>
<Flex mt={5} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
ID
</Box>
<Box flex={1}>{kbDetail._id}</Box>
</Flex>
<Flex mt={5} w={'100%'} maxW={'350px'} alignItems={'center'}>
<Box flex={'0 0 90px'} w={0}>
<Flex mt={5} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
</Box>
<Box flex={1}>
<Box flex={[1, '0 0 300px']}>
<Avatar
m={'auto'}
src={getValues('avatar')}
@@ -167,27 +169,44 @@ const Info = (
/>
</Box>
</Flex>
<FormControl mt={8} w={'100%'} maxW={'350px'} display={'flex'} alignItems={'center'}>
<Box flex={'0 0 90px'} w={0}>
<FormControl mt={8} w={'100%'} display={'flex'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
</Box>
<Input
flex={1}
flex={[1, '0 0 300px']}
{...register('name', {
required: '知识库名称不能为空'
})}
/>
</FormControl>
<Flex mt={8} alignItems={'center'} w={'100%'} maxW={'350px'} flexWrap={'wrap'}>
<Box flex={'0 0 90px'} w={0}>
<Flex mt={8} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
</Box>
<Box flex={[1, '0 0 300px']}>
<MySelect
w={'100%'}
value={getValues('model')}
list={vectorModelList.map((item) => ({
label: item.name,
value: item.model
}))}
onchange={(res) => {
setValue('model', res);
}}
/>
</Box>
</Flex>
<Flex mt={8} alignItems={'center'} w={'100%'} flexWrap={'wrap'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
<MyTooltip label={'用空格隔开多个标签,便于搜索'}>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<Input
flex={1}
maxW={'300px'}
flex={[1, '0 0 300px']}
ref={InputRef}
placeholder={'标签,使用空格分割。'}
maxLength={30}
@@ -196,7 +215,7 @@ const Info = (
setRefresh(!refresh);
}}
/>
<Box pl={'90px'} mt={2} w="100%">
<Box w={'100%'} pl={['90px', '160px']} mt={2}>
{getValues('tags')
.split(' ')
.filter((item) => item)
@@ -207,8 +226,9 @@ const Info = (
))}
</Box>
</Flex>
<Flex mt={5} w={'100%'} maxW={'350px'} alignItems={'flex-end'}>
<Box flex={'0 0 90px'} w={0}></Box>
<Flex mt={5} w={'100%'} alignItems={'flex-end'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}></Box>
<Button
isLoading={btnLoading}
mr={4}
@@ -232,7 +252,7 @@ const Info = (
</Flex>
<File onSelect={onSelectFile} />
<ConfirmChild />
</Flex>
</Box>
);
};

View File

@@ -108,12 +108,18 @@ const InputDataModal = ({
try {
const data = {
dataId: e.dataId,
kbId,
a: e.a,
q: e.q === defaultValues.q ? '' : e.q
};
await putKbDataById(data);
onSuccess(data);
} catch (error) {}
} catch (err) {
toast({
status: 'error',
title: getErrText(err, '更新数据失败')
});
}
setLoading(false);
}
@@ -123,7 +129,7 @@ const InputDataModal = ({
});
onClose();
},
[defaultValues, onClose, onSuccess, toast]
[defaultValues.a, defaultValues.q, kbId, onClose, onSuccess, toast]
);
return (
@@ -194,6 +200,10 @@ const InputDataModal = ({
await delOneKbDataByDataId(defaultValues.dataId);
onDelete();
onClose();
toast({
status: 'success',
title: '记录已删除'
});
} catch (error) {
toast({
status: 'warning',

View File

@@ -1,180 +0,0 @@
import React, { useState, useCallback } from 'react';
import {
Box,
Flex,
Button,
Modal,
ModalOverlay,
ModalContent,
ModalHeader,
ModalCloseButton,
ModalBody
} from '@chakra-ui/react';
import { useToast } from '@/hooks/useToast';
import { useSelectFile } from '@/hooks/useSelectFile';
import { useConfirm } from '@/hooks/useConfirm';
import { readCsvContent } from '@/utils/file';
import { useMutation } from '@tanstack/react-query';
import { postKbDataFromList } from '@/api/plugins/kb';
import Markdown from '@/components/Markdown';
import { useMarkdown } from '@/hooks/useMarkdown';
import { fileDownload } from '@/utils/file';
import { TrainingModeEnum } from '@/constants/plugin';
import { getErrText } from '@/utils/tools';
const csvTemplate = `question,answer\n"什么是 laf","laf 是一个云函数开发平台……"\n"什么是 sealos","Sealos 是以 kubernetes 为内核的云操作系统发行版,可以……"`;
const SelectJsonModal = ({
onClose,
onSuccess,
kbId
}: {
onClose: () => void;
onSuccess: () => void;
kbId: string;
}) => {
const [selecting, setSelecting] = useState(false);
const { toast } = useToast();
const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: false });
const [fileData, setFileData] = useState<{ q: string; a: string }[]>([]);
const [fileName, setFileName] = useState('');
const [successData, setSuccessData] = useState(0);
const { openConfirm, ConfirmChild } = useConfirm({
content: '确认导入该数据集?'
});
const onSelectFile = useCallback(
async (e: File[]) => {
const file = e[0];
setSelecting(true);
setFileName(file.name);
try {
const { header, data } = await readCsvContent(file);
if (header[0] !== 'question' || header[1] !== 'answer') {
throw new Error('csv 文件格式有误');
}
setFileData(
data.map((item) => ({
q: item[0] || '',
a: item[1] || ''
}))
);
} catch (error: any) {
toast({
title: getErrText(error, 'csv 文件格式有误'),
status: 'error'
});
}
setSelecting(false);
},
[setSelecting, toast]
);
const { mutate, isLoading: uploading } = useMutation({
mutationFn: async () => {
if (!fileData || fileData.length === 0) return;
let success = 0;
// subsection import
const step = 100;
for (let i = 0; i < fileData.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
data: fileData.slice(i, i + step).map((item) => ({
...item,
source: fileName
})),
mode: TrainingModeEnum.index
});
success += insertLen || 0;
setSuccessData((state) => state + step);
}
toast({
title: `导入数据成功,最终导入: ${success} 条数据。需要一段时间训练`,
status: 'success',
duration: 4000
});
onClose();
onSuccess();
},
onError(err) {
toast({
title: getErrText(err, '导入文件失败'),
status: 'error'
});
}
});
const { data: intro } = useMarkdown({ url: '/csvSelect.md' });
return (
<Modal isOpen={true} onClose={onClose} isCentered>
<ModalOverlay />
<ModalContent maxW={'90vw'} position={'relative'} m={0} h={'90vh'}>
<ModalHeader>csv </ModalHeader>
<ModalCloseButton />
<ModalBody h={'100%'} display={['block', 'flex']} fontSize={'sm'} overflowY={'auto'}>
<Box flex={'2 0 0'} w={['100%', 0]} mr={[0, 4]} mb={[4, 0]}>
<Markdown source={intro} />
<Box
my={3}
cursor={'pointer'}
textDecoration={'underline'}
color={'myBlue.600'}
onClick={() =>
fileDownload({
text: csvTemplate,
type: 'text/csv',
filename: 'template.csv'
})
}
>
csv模板
</Box>
<Box>
<Button isLoading={selecting} isDisabled={uploading} onClick={onOpen}>
csv
</Button>
<Box mt={4}>
{fileName} {fileData.length} 100
</Box>
</Box>
</Box>
<Box flex={'3 0 0'} h={'100%'} overflow={'auto'} p={2} backgroundColor={'blackAlpha.50'}>
{fileData.slice(0, 100).map((item, index) => (
<Box key={index}>
<Box>
Q{index + 1}. {item.q}
</Box>
<Box>
A{index + 1}. {item.a}
</Box>
</Box>
))}
</Box>
</ModalBody>
<Flex px={6} pt={2} pb={4}>
<Box flex={1}></Box>
<Button variant={'base'} isLoading={uploading} mr={3} onClick={onClose}>
</Button>
<Button isDisabled={fileData.length === 0 || uploading} onClick={openConfirm(mutate)}>
{uploading ? (
<Box>{Math.round((successData / fileData.length) * 100)}%</Box>
) : (
'确认导入'
)}
</Button>
</Flex>
</ModalContent>
<ConfirmChild />
<File onSelect={onSelectFile} />
</Modal>
);
};
export default SelectJsonModal;

View File

@@ -1,349 +0,0 @@
import React, { useState, useCallback } from 'react';
import {
Box,
Flex,
Button,
Modal,
ModalOverlay,
ModalContent,
ModalHeader,
ModalCloseButton,
ModalBody,
Input,
Textarea
} from '@chakra-ui/react';
import { useToast } from '@/hooks/useToast';
import { useSelectFile } from '@/hooks/useSelectFile';
import { useConfirm } from '@/hooks/useConfirm';
import { readTxtContent, readPdfContent, readDocContent } from '@/utils/file';
import { useMutation } from '@tanstack/react-query';
import { postKbDataFromList } from '@/api/plugins/kb';
import Radio from '@/components/Radio';
import { splitText_token } from '@/utils/file';
import { TrainingModeEnum } from '@/constants/plugin';
import { getErrText } from '@/utils/tools';
import { formatPrice } from '@/utils/user';
import MySlider from '@/components/Slider';
import { qaModelList, vectorModelList } from '@/store/static';
const fileExtension = '.txt,.doc,.docx,.pdf,.md';
const SelectFileModal = ({
onClose,
onSuccess,
kbId
}: {
onClose: () => void;
onSuccess: () => void;
kbId: string;
}) => {
const [modeMap, setModeMap] = useState({
[TrainingModeEnum.qa]: {
model: qaModelList[0].model,
maxLen: (qaModelList[0]?.maxToken || 16000) * 0.5,
price: qaModelList[0]?.price || 3
},
[TrainingModeEnum.index]: {
model: vectorModelList[0].model,
maxLen: 600,
price: vectorModelList[0]?.price || 0.2
}
});
const [btnLoading, setBtnLoading] = useState(false);
const { toast } = useToast();
const [prompt, setPrompt] = useState('');
const { File, onOpen } = useSelectFile({
fileType: fileExtension,
multiple: true
});
const [mode, setMode] = useState<`${TrainingModeEnum}`>(TrainingModeEnum.index);
const [files, setFiles] = useState<{ filename: string; text: string }[]>([
{ filename: '文本1', text: '' }
]);
const [splitRes, setSplitRes] = useState<{
price: number;
chunks: { filename: string; value: string }[];
successChunks: number;
}>({
price: 0,
successChunks: 0,
chunks: []
});
const { openConfirm, ConfirmChild } = useConfirm({
content: `确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,未完成的任务会被暂停。一共 ${
splitRes.chunks.length
} 组。${splitRes.price ? `大约 ${splitRes.price} 元。` : ''}`
});
const onSelectFile = useCallback(
async (files: File[]) => {
setBtnLoading(true);
try {
let promise = Promise.resolve();
files.forEach((file) => {
promise = promise.then(async () => {
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
const text = await (async () => {
switch (extension) {
case 'txt':
case 'md':
return readTxtContent(file);
case 'pdf':
return readPdfContent(file);
case 'doc':
case 'docx':
return readDocContent(file);
}
return '';
})();
text && setFiles((state) => [{ filename: file.name, text }].concat(state));
return;
});
});
await promise;
} catch (error: any) {
console.log(error);
toast({
title: typeof error === 'string' ? error : '解析文件失败',
status: 'error'
});
}
setBtnLoading(false);
},
[toast]
);
console.log({ model: modeMap[mode].model });
const { mutate, isLoading: uploading } = useMutation({
mutationFn: async () => {
if (splitRes.chunks.length === 0) return;
// subsection import
let success = 0;
const step = 100;
for (let i = 0; i < splitRes.chunks.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
model: modeMap[mode].model,
data: splitRes.chunks
.slice(i, i + step)
.map((item) => ({ q: item.value, a: '', source: item.filename })),
prompt: `下面是"${prompt || '一段长文本'}"`,
mode
});
success += insertLen;
setSplitRes((state) => ({
...state,
successChunks: state.successChunks + step
}));
}
toast({
title: `去重后共导入 ${success} 条数据,需要一段拆解和训练.`,
status: 'success'
});
onClose();
onSuccess();
},
onError(err) {
toast({
title: getErrText(err, '导入文件失败'),
status: 'error'
});
}
});
const onclickImport = useCallback(async () => {
setBtnLoading(true);
try {
const splitRes = files
.map((item) =>
splitText_token({
text: item.text,
...modeMap[mode]
})
)
.map((item, i) => ({
...item,
filename: files[i].filename
}))
.filter((item) => item.tokens > 0);
let price = formatPrice(
splitRes.reduce((sum, item) => sum + item.tokens, 0) * modeMap[mode].price
);
if (mode === 'qa') {
price *= 1.2;
}
setSplitRes({
price,
chunks: splitRes
.map((item) =>
item.chunks.map((chunk) => ({
filename: item.filename,
value: chunk
}))
)
.flat(),
successChunks: 0
});
openConfirm(mutate)();
} catch (error) {
toast({
status: 'warning',
title: getErrText(error, '拆分文本异常')
});
}
setBtnLoading(false);
}, [files, mode, modeMap, mutate, openConfirm, toast]);
return (
<Modal isOpen={true} onClose={onClose} isCentered>
<ModalOverlay />
<ModalContent
display={'flex'}
maxW={'min(1000px, 90vw)'}
m={0}
position={'relative'}
h={'90vh'}
>
<ModalHeader></ModalHeader>
<ModalCloseButton />
<ModalBody
flex={1}
h={0}
display={'flex'}
flexDirection={'column'}
p={0}
alignItems={'center'}
justifyContent={'center'}
fontSize={'sm'}
>
<Box mt={2} px={5} maxW={['100%', '70%']} textAlign={'justify'} color={'blackAlpha.600'}>
{fileExtension} Gpt会自动对文本进行 QA
tokens{files.length}
</Box>
{/* 拆分模式 */}
<Flex w={'100%'} px={5} alignItems={'center'} mt={4}>
<Box flex={'0 0 70px'}>:</Box>
<Radio
ml={3}
list={[
{ label: '直接分段', value: 'index' },
{ label: 'QA拆分', value: 'qa' }
]}
value={mode}
onChange={(e) => setMode(e as 'index' | 'qa')}
/>
</Flex>
{/* 内容介绍 */}
<Flex w={'100%'} px={5} alignItems={'center'} mt={4}>
{mode === TrainingModeEnum.qa && (
<>
<Box flex={'0 0 70px'} mr={2}>
</Box>
<Input
placeholder="提示词,例如: Laf的介绍/关于gpt4的论文/一段长文本"
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
size={'sm'}
/>
</>
)}
{/* chunk size */}
{mode === TrainingModeEnum.index && (
<Flex mt={5}>
<Box w={['70px']} flexShrink={0}>
</Box>
<Box flex={1} ml={'10px'}>
<MySlider
markList={[
{ label: '300', value: 300 },
{ label: '1000', value: 1000 }
]}
width={['100%', '260px']}
min={300}
max={1000}
step={50}
value={modeMap[TrainingModeEnum.index].maxLen}
onChange={(val) => {
setModeMap((state) => ({
...state,
[TrainingModeEnum.index]: {
...modeMap[TrainingModeEnum.index],
maxLen: val
}
}));
}}
/>
</Box>
</Flex>
)}
</Flex>
{/* 文本内容 */}
<Box flex={'1 0 0'} px={5} h={0} w={'100%'} overflowY={'auto'} mt={4}>
{files.slice(0, 100).map((item, i) => (
<Box key={i} mb={5}>
<Box mb={1}>{item.filename}</Box>
<Textarea
placeholder="文件内容,空内容会自动忽略"
maxLength={-1}
rows={10}
fontSize={'xs'}
whiteSpace={'pre-wrap'}
value={item.text}
onChange={(e) => {
setFiles([
...files.slice(0, i),
{ ...item, text: e.target.value },
...files.slice(i + 1)
]);
}}
onBlur={(e) => {
if (files.length > 1 && e.target.value === '') {
setFiles((state) => [...state.slice(0, i), ...state.slice(i + 1)]);
}
}}
/>
</Box>
))}
</Box>
</ModalBody>
<Flex px={6} pt={2} pb={4}>
<Button isLoading={btnLoading} isDisabled={uploading} onClick={onOpen}>
</Button>
<Box flex={1}></Box>
<Button variant={'base'} isLoading={uploading} mr={3} onClick={onClose}>
</Button>
<Button
isDisabled={uploading || btnLoading || files[0]?.text === ''}
onClick={onclickImport}
>
{uploading ? (
<Box>{Math.round((splitRes.successChunks / splitRes.chunks.length) * 100)}%</Box>
) : (
'确认导入'
)}
</Button>
</Flex>
</ModalContent>
<ConfirmChild />
<File onSelect={onSelectFile} />
</Modal>
);
};
export default SelectFileModal;

View File

@@ -5,7 +5,6 @@ import type { KbTestItemType } from '@/types/plugin';
import { searchText, getKbDataItemById } from '@/api/plugins/kb';
import MyIcon from '@/components/Icon';
import { useRequest } from '@/hooks/useRequest';
import { useRouter } from 'next/router';
import { formatTimeToChatTime } from '@/utils/tools';
import InputDataModal, { type FormData } from './InputDataModal';
import { useGlobalStore } from '@/store/global';
@@ -17,8 +16,7 @@ import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const Test = () => {
const { kbId } = useRouter().query as { kbId: string };
const Test = ({ kbId }: { kbId: string }) => {
const theme = useTheme();
const { toast } = useToast();
const { setLoading } = useGlobalStore();

View File

@@ -17,12 +17,17 @@ import SideTabs from '@/components/SideTabs';
import PageContainer from '@/components/PageContainer';
import Avatar from '@/components/Avatar';
import Info from './components/Info';
const ImportData = dynamic(() => import('./components/Import'), {
ssr: false
});
const Test = dynamic(() => import('./components/Test'), {
ssr: false
});
enum TabEnum {
data = 'data',
import = 'import',
test = 'test',
info = 'info'
}
@@ -35,14 +40,12 @@ const Detail = ({ kbId, currentTab }: { kbId: string; currentTab: `${TabEnum}` }
const { isPc } = useScreen();
const { kbDetail, getKbDetail } = useUserStore();
const tabList = useMemo(
() => [
{ label: '数据', id: TabEnum.data, icon: 'overviewLight' },
{ label: '搜索测试', id: TabEnum.test, icon: 'kbTest' },
{ label: '基本信息', id: TabEnum.info, icon: 'settingLight' }
],
[]
);
const tabList = useRef([
{ label: '数据集', id: TabEnum.data, icon: 'overviewLight' },
{ label: '导入数据', id: TabEnum.import, icon: 'importLight' },
{ label: '搜索测试', id: TabEnum.test, icon: 'kbTest' },
{ label: '配置', id: TabEnum.info, icon: 'settingLight' }
]);
const setCurrentTab = useCallback(
(tab: `${TabEnum}`) => {
@@ -77,70 +80,73 @@ const Detail = ({ kbId, currentTab }: { kbId: string; currentTab: `${TabEnum}` }
return (
<PageContainer>
<Box display={['block', 'flex']} h={'100%'} pt={[4, 0]}>
{/* pc tab */}
<Box
display={['none', 'flex']}
flexDirection={'column'}
p={4}
w={'200px'}
borderRight={theme.borders.base}
>
<Flex mb={4} alignItems={'center'}>
<Avatar src={kbDetail.avatar} w={'34px'} borderRadius={'lg'} />
<Box ml={2} fontWeight={'bold'}>
{kbDetail.name}
</Box>
</Flex>
<SideTabs
flex={1}
mx={'auto'}
mt={2}
w={'100%'}
list={tabList}
activeId={currentTab}
onChange={(e: any) => {
setCurrentTab(e);
}}
/>
{isPc ? (
<Flex
alignItems={'center'}
cursor={'pointer'}
py={2}
px={3}
borderRadius={'md'}
_hover={{ bg: 'myGray.100' }}
onClick={() => router.replace('/kb/list')}
flexDirection={'column'}
p={4}
h={'100%'}
flex={'0 0 200px'}
borderRight={theme.borders.base}
>
<IconButton
mr={3}
icon={<MyIcon name={'backFill'} w={'18px'} color={'myBlue.600'} />}
bg={'white'}
boxShadow={'1px 1px 9px rgba(0,0,0,0.15)'}
h={'28px'}
size={'sm'}
borderRadius={'50%'}
aria-label={''}
<Flex mb={4} alignItems={'center'}>
<Avatar src={kbDetail.avatar} w={'34px'} borderRadius={'lg'} />
<Box ml={2} fontWeight={'bold'}>
{kbDetail.name}
</Box>
</Flex>
<SideTabs
flex={1}
mx={'auto'}
mt={2}
w={'100%'}
list={tabList.current}
activeId={currentTab}
onChange={(e: any) => {
setCurrentTab(e);
}}
/>
<Flex
alignItems={'center'}
cursor={'pointer'}
py={2}
px={3}
borderRadius={'md'}
_hover={{ bg: 'myGray.100' }}
onClick={() => router.replace('/kb/list')}
>
<IconButton
mr={3}
icon={<MyIcon name={'backFill'} w={'18px'} color={'myBlue.600'} />}
bg={'white'}
boxShadow={'1px 1px 9px rgba(0,0,0,0.15)'}
h={'28px'}
size={'sm'}
borderRadius={'50%'}
aria-label={''}
/>
</Flex>
</Flex>
</Box>
<Box mb={3} display={['block', 'none']}>
<Tabs
m={'auto'}
w={'260px'}
size={isPc ? 'md' : 'sm'}
list={[
{ id: TabEnum.data, label: '数据管理' },
{ id: TabEnum.test, label: '搜索测试' },
{ id: TabEnum.info, label: '基本信息' }
]}
activeId={currentTab}
onChange={(e: any) => setCurrentTab(e)}
/>
</Box>
<Box flex={'1 0 0'} overflow={'overlay'} pb={[4, 0]}>
) : (
<Box mb={3}>
<Tabs
m={'auto'}
w={'260px'}
size={isPc ? 'md' : 'sm'}
list={tabList.current.map((item) => ({
id: item.id,
label: item.label
}))}
activeId={currentTab}
onChange={(e: any) => setCurrentTab(e)}
/>
</Box>
)}
<Box flex={'1 0 0'} h={'100%'} pb={[4, 0]}>
{currentTab === TabEnum.data && <DataCard kbId={kbId} />}
{currentTab === TabEnum.test && <Test />}
{currentTab === TabEnum.import && <ImportData kbId={kbId} />}
{currentTab === TabEnum.test && <Test kbId={kbId} />}
{currentTab === TabEnum.info && <Info ref={InfoRef} kbId={kbId} form={form} />}
</Box>
</Box>