perf: 知识库优化

This commit is contained in:
archer
2023-03-31 00:05:04 +08:00
parent 456686f3d0
commit 8239c58494
12 changed files with 201 additions and 405 deletions

View File

@@ -1,8 +1,7 @@
import { GET, POST, DELETE, PUT } from './request';
import type { ModelSchema, ModelDataSchema } from '@/types/mongoSchema';
import type { ModelSchema, ModelDataSchema, ModelSplitDataSchema } from '@/types/mongoSchema';
import { ModelUpdateParams } from '@/types/model';
import { TrainingItemType } from '../types/training';
import { PagingData } from '@/types';
import { RequestPaging } from '../types/index';
import { Obj2Query } from '@/utils/tools';
@@ -39,6 +38,9 @@ type GetModelDataListProps = RequestPaging & {
export const getModelDataList = (props: GetModelDataListProps) =>
GET(`/model/data/getModelData?${Obj2Query(props)}`);
export const getModelSplitDataList = (modelId: string) =>
GET<ModelSplitDataSchema[]>(`/model/data/getSplitData?modelId=${modelId}`);
export const postModelDataInput = (data: {
modelId: string;
data: { text: ModelDataSchema['text']; q: ModelDataSchema['q'] }[];

View File

@@ -17,30 +17,24 @@ export const introPage = `
4. 进入模型页,创建一个模型,建议直接用 ChatGPT。
5. 在模型列表点击【对话】,即可使用 API 进行聊天。
### 模型配置
### 定制 prompt
1. **提示语**:会在每个对话框的第一句自动加入,用于限定该模型的对话内容。
1. 进入模型编辑页
2. 调整温度和提示词
3. 使用该模型对话。每次对话时,提示词和温度都会自动注入,方便管理个人的模型。建议把自己日常经常需要使用的 5~10 个方向预设好。
### 知识库
2. **单句最大长度**:每个聊天,单次输入内容的最大长度。
1. 创建模型时选择【知识库】
2. 进入模型编辑页
3. 导入数据,可以选择手动导入,或者选择文件导入。文件导入会自动调用 chatGPT 理解文件内容,并生成知识库。
4. 使用该模型对话。
3. **上下文最大长度**每个聊天最多的轮数除以2建议设置为偶数。可以持续聊天但是旧的聊天内容会被截断AI 就不会知道被截取的内容。
例如上下文最大长度为6。在第 4 轮对话时,第一轮对话的内容不会被计入。
4. **过期时间**:生成对话框后,这个对话框多久过期。
5. **聊天最大加载次数**:单个对话框最多被加载几次,设置为-1代表不限制正数代表只能加载 n 次,防止被盗刷。
### 对话框介绍
1. 每个对话框以 chatId 作为标识。
2. 每次点击【对话】,都会生成新的对话框,无法回到旧的对话框。对话框内刷新,会恢复对话内容。
3. 直接分享对话框(网页)的链接给朋友,会共享同一个对话内容。但是!!!千万不要两个人同时用一个链接,会串味,还没解决这个问题。
4. 如果想分享一个纯的对话框,请点击侧边栏的分享按键。例如:
注意使用知识库模型对话时tokens 消耗会加快。
### 其他问题
还有其他问题,可以加我 wx: YNyiqi拉个交流群大家一起聊聊。
`;
export const chatProblem = `
@@ -58,10 +52,9 @@ export const chatProblem = `
`;
export const versionIntro = `
## Fast GPT V2.0
## Fast GPT V2.2
* 定制知识库:创建模型时可以选择【知识库】模型, 可以手动导入知识点或者直接导入一个文件自动学习。
* 删除和复制功能:点击对话头像,可以选择复制或删除该条内容。
* 优化记账模式: 不再根据文本长度进行记账,而是根据实际消耗 tokens 数量进行记账。
* 文本 QA 拆分: 可以在[数据]模块,使用 QA 拆分功能,粘贴文字或者选择文件均可以实现自动生成 QA。可以一键导出用于微调模型。
`;
export const shareHint = `

View File

@@ -0,0 +1,85 @@
import { useState, useCallback, useMemo } from 'react';
import type { PagingData } from '../types/index';
import { IconButton, Flex, Box } from '@chakra-ui/react';
import { ArrowBackIcon, ArrowForwardIcon } from '@chakra-ui/icons';
import { useQuery, useMutation } from '@tanstack/react-query';
import { useToast } from './useToast';
export const usePagination = <T = any,>({
api,
pageSize = 10,
params = {}
}: {
api: (data: any) => any;
pageSize?: number;
params?: Record<string, any>;
}) => {
const { toast } = useToast();
const [pageNum, setPageNum] = useState(1);
const [total, setTotal] = useState(0);
const maxPage = useMemo(() => Math.ceil(total / pageSize), [pageSize, total]);
const {
mutate,
data = [],
isLoading
} = useMutation({
mutationFn: async (num: number = pageNum) => {
try {
const res: PagingData<T> = await api({
pageNum: num,
pageSize,
...params
});
setPageNum(num);
setTotal(res.total);
return res.data;
} catch (error: any) {
toast({
title: error?.message || '获取数据异常',
status: 'error'
});
console.log(error);
}
}
});
useQuery(['init'], () => {
mutate(1);
return null;
});
const Pagination = useCallback(() => {
return (
<Flex alignItems={'center'} justifyContent={'end'}>
<IconButton
isDisabled={pageNum === 1}
icon={<ArrowBackIcon />}
aria-label={'left'}
size={'sm'}
onClick={() => mutate(pageNum - 1)}
/>
<Box mx={2}>
{pageNum}/{maxPage}
</Box>
<IconButton
isDisabled={pageNum === maxPage}
icon={<ArrowForwardIcon />}
aria-label={'left'}
size={'sm'}
onClick={() => mutate(pageNum + 1)}
/>
</Flex>
);
}, [maxPage, mutate, pageNum]);
return {
pageNum,
pageSize,
total,
data,
isLoading,
Pagination,
getData: mutate
};
};

View File

@@ -40,6 +40,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
}
await connectToDatabase();
let startTime = Date.now();
const { chat, userApiKey, systemKey, userId } = await authChat(chatId, authorization);
@@ -81,7 +82,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
// 获取 chatAPI
const chatAPI = getOpenAIApi(userApiKey || systemKey);
let startTime = Date.now();
// 发出请求
const chatResponse = await chatAPI.createChatCompletion(
{

View File

@@ -48,6 +48,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
await connectToDatabase();
const redis = await connectRedis();
let startTime = Date.now();
const { chat, userApiKey, systemKey, userId } = await authChat(chatId, authorization);
@@ -83,17 +84,22 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const redisData: any[] = await redis.sendCommand([
'FT.SEARCH',
`idx:${VecModelDataIndex}`,
`@modelId:{${String(chat.modelId._id)}} @vector:[VECTOR_RANGE 0.2 $blob]`,
`@modelId:{${String(
chat.modelId._id
)}} @vector:[VECTOR_RANGE 0.15 $blob]=>{$YIELD_DISTANCE_AS: score}`,
// `@modelId:{${String(chat.modelId._id)}}=>[KNN 10 @vector $blob AS score]`,
'RETURN',
'1',
'dataId',
// 'SORTBY',
// 'score',
'SORTBY',
'score',
'PARAMS',
'2',
'blob',
binary,
'LIMIT',
'0',
'20',
'DIALECT',
'2'
]);
@@ -117,8 +123,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
[2, 4, 6, 8, 10, 12, 14, 16, 18, 20].map((i) => {
if (!redisData[i] || !redisData[i][1]) return '';
return ModelData.findById(redisData[i][1])
.select('text')
.then((res) => res?.text || '');
.select('text q')
.then((res) => {
if (!res) return '';
const questions = res.q.map((item) => item.text).join(' ');
const answer = res.text;
return `${questions} ${answer}`;
});
})
)
).filter((item) => item);
@@ -128,7 +139,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
prompts.unshift({
obj: 'SYSTEM',
value: `根据下面的知识回答问题: ${systemPrompt}`
value: `根据下面的知识回答问题: ${systemPrompt}`
});
// 控制在 tokens 数量,防止超出
@@ -150,7 +161,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
// 计算温度
const temperature = modelConstantsData.maxTemperature * (model.temperature / 10);
let startTime = Date.now();
// 发出请求
const chatResponse = await chatAPI.createChatCompletion(
{

View File

@@ -0,0 +1,35 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, SplitData, Model } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
/* 拆分数据成QA */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
const { modelId } = req.query as { modelId: string };
if (!modelId) {
throw new Error('参数错误');
}
await connectToDatabase();
const { authorization } = req.headers;
const userId = await authToken(authorization);
// 找到长度大于0的数据
const data = await SplitData.find({
userId,
modelId,
textList: { $exists: true, $not: { $size: 0 } }
});
jsonRes(res, {
data
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -53,7 +53,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
textList
});
// generateQA();
generateQA();
jsonRes(res, {
data: { chunks, replaceText }

View File

@@ -25,14 +25,14 @@ const Empty = ({ intro }: { intro: string }) => {
<Box whiteSpace={'pre-line'}>{intro}</Box>
</Card>
)}
{/* version intro */}
<Card p={4} mb={10}>
<Markdown source={versionIntro} />
</Card>
<Card p={4}>
<Header></Header>
<Markdown source={chatProblem} />
</Card>
{/* version intro */}
<Card p={4}>
<Markdown source={versionIntro} />
</Card>
</Box>
);
};

View File

@@ -1,326 +0,0 @@
import React, { useEffect, useCallback, useState } from 'react';
import {
Box,
TableContainer,
Table,
Thead,
Tbody,
Tr,
Th,
Td,
IconButton,
Flex,
Button,
Modal,
ModalOverlay,
ModalContent,
ModalHeader,
Checkbox,
CheckboxGroup,
ModalCloseButton,
useDisclosure,
Input,
Textarea,
Stack
} from '@chakra-ui/react';
import type { ModelSchema } from '@/types/mongoSchema';
import { ModelDataSchema } from '@/types/mongoSchema';
import { ModelDataStatusMap } from '@/constants/model';
import { usePaging } from '@/hooks/usePaging';
import ScrollData from '@/components/ScrollData';
import {
getModelDataList,
postModelDataInput,
postModelDataSelect,
delOneModelData,
putModelDataById
} from '@/api/model';
import { getDataList } from '@/api/data';
import { DeleteIcon } from '@chakra-ui/icons';
import { useForm, useFieldArray } from 'react-hook-form';
import { useToast } from '@/hooks/useToast';
import { useQuery } from '@tanstack/react-query';
import { customAlphabet } from 'nanoid';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
type FormData = { text: string; q: { val: string }[] };
type TabType = 'input' | 'select';
const defaultValues = {
text: '',
q: [{ val: '' }]
};
const ModelDataCard = ({ model }: { model: ModelSchema }) => {
const {
nextPage,
isLoadAll,
requesting,
data: modelDataList,
total,
setData,
getData
} = usePaging<ModelDataSchema>({
api: getModelDataList,
pageSize: 20,
params: {
modelId: model._id
}
});
const { toast } = useToast();
const {
isOpen: isOpenImportModal,
onOpen: onOpenImportModal,
onClose: onCloseImportModal
} = useDisclosure();
const { register, handleSubmit, reset, control } = useForm<FormData>({
defaultValues
});
const {
fields: inputQ,
append: appendQ,
remove: removeQ
} = useFieldArray({
control,
name: 'q'
});
const importDataTypes: { id: TabType; label: string }[] = [
{ id: 'input', label: '手动输入' },
{ id: 'select', label: '数据集导入' }
];
const [importDataType, setImportDataType] = useState<TabType>(importDataTypes[0].id);
const [importing, setImporting] = useState(false);
const updateAnswer = useCallback(async (dataId: string, text: string) => {
putModelDataById({
dataId,
text
});
}, []);
const { data: dataList = [] } = useQuery(['getDataList'], getDataList);
const [selectDataId, setSelectDataId] = useState<string[]>([]);
const sureImportData = useCallback(
async (e: FormData) => {
setImporting(true);
try {
if (importDataType === 'input') {
await postModelDataInput({
modelId: model._id,
data: [
{
text: e.text,
q: e.q.map((item) => ({
id: nanoid(),
text: item.val
}))
}
]
});
} else if (importDataType === 'select') {
const res = await postModelDataSelect(model._id, selectDataId);
console.log(res);
}
toast({
title: '导入数据成功,需要一段时间训练',
status: 'success'
});
onCloseImportModal();
getData(1, true);
reset(defaultValues);
} catch (err) {
console.log(err);
}
setImporting(false);
},
[getData, importDataType, model._id, onCloseImportModal, reset, toast]
);
return (
<>
<Flex>
<Box fontWeight={'bold'} fontSize={'lg'} flex={1}>
: {total}
</Box>
<Button size={'sm'} onClick={onOpenImportModal}>
</Button>
</Flex>
<ScrollData
h={'100%'}
px={6}
mt={3}
isLoadAll={isLoadAll}
requesting={requesting}
nextPage={nextPage}
>
<TableContainer mt={4}>
<Table variant={'simple'}>
<Thead>
<Tr>
<Th>Question</Th>
<Th>Text</Th>
<Th>Status</Th>
<Th></Th>
</Tr>
</Thead>
<Tbody>
{modelDataList.map((item) => (
<Tr key={item._id}>
<Td>
{item.q.map((item, i) => (
<Box
key={item.id}
fontSize={'xs'}
maxW={'350px'}
whiteSpace={'pre-wrap'}
_notLast={{ mb: 1 }}
>
Q{i + 1}: {item.text}
</Box>
))}
</Td>
<Td w={'350px'}>
<Textarea
w={'100%'}
h={'100%'}
defaultValue={item.text}
fontSize={'xs'}
resize={'both'}
onBlur={(e) => {
const oldVal = modelDataList.find((data) => item._id === data._id)?.text;
if (oldVal !== e.target.value) {
updateAnswer(item._id, e.target.value);
setData((state) =>
state.map((data) => ({
...data,
text: data._id === item._id ? e.target.value : data.text
}))
);
}
}}
></Textarea>
</Td>
<Td>{ModelDataStatusMap[item.status]}</Td>
<Td>
<IconButton
icon={<DeleteIcon />}
variant={'outline'}
colorScheme={'gray'}
aria-label={'delete'}
size={'sm'}
onClick={async () => {
delOneModelData(item._id);
setData((state) => state.filter((data) => data._id !== item._id));
}}
/>
</Td>
</Tr>
))}
</Tbody>
</Table>
</TableContainer>
</ScrollData>
<Modal isOpen={isOpenImportModal} onClose={onCloseImportModal}>
<ModalOverlay />
<ModalContent maxW={'min(900px, 90vw)'} maxH={'80vh'} position={'relative'}>
<Flex alignItems={'center'}>
<ModalHeader whiteSpace={'nowrap'}></ModalHeader>
<Box>
{importDataTypes.map((item) => (
<Button
key={item.id}
size={'sm'}
mr={5}
variant={item.id === importDataType ? 'solid' : 'outline'}
onClick={() => setImportDataType(item.id)}
>
{item.label}
</Button>
))}
</Box>
</Flex>
<ModalCloseButton />
<Box px={6} pb={2} overflowY={'auto'}>
{importDataType === 'input' && (
<>
<Box mb={2}>:</Box>
<Textarea
mb={4}
placeholder="知识点"
rows={3}
maxH={'200px'}
{...register(`text`, {
required: '知识点'
})}
/>
{inputQ.map((item, index) => (
<Box key={item.id} mb={5}>
<Box mb={2}>{index + 1}:</Box>
<Flex>
<Input
placeholder="问法"
{...register(`q.${index}.val`, {
required: '问法不能为空'
})}
></Input>
{inputQ.length > 1 && (
<IconButton
icon={<DeleteIcon />}
aria-label={'delete'}
colorScheme={'gray'}
variant={'unstyled'}
onClick={() => removeQ(index)}
/>
)}
</Flex>
</Box>
))}
</>
)}
{importDataType === 'select' && (
<CheckboxGroup colorScheme="blue" onChange={(e) => setSelectDataId(e as string[])}>
{dataList.map((item) => (
<Box mb={2} key={item._id}>
<Checkbox value={item._id}>
<Box fontWeight={'bold'} as={'span'}>
{item.name}
</Box>
<Box as={'span'} ml={2} fontSize={'sm'}>
({item.totalData})
</Box>
</Checkbox>
</Box>
))}
</CheckboxGroup>
)}
</Box>
<Flex px={6} pt={2} pb={4}>
{importDataType === 'input' && (
<Button
alignSelf={'flex-start'}
variant={'outline'}
onClick={() => appendQ({ val: '' })}
>
</Button>
)}
<Box flex={1}></Box>
<Button variant={'outline'} mr={3} onClick={onCloseImportModal}>
</Button>
<Button isLoading={importing} onClick={handleSubmit(sureImportData)}>
</Button>
</Flex>
</ModalContent>
</Modal>
</>
);
};
export default ModelDataCard;

View File

@@ -21,13 +21,18 @@ import {
import type { ModelSchema } from '@/types/mongoSchema';
import { ModelDataSchema } from '@/types/mongoSchema';
import { ModelDataStatusMap } from '@/constants/model';
import { usePaging } from '@/hooks/usePaging';
import ScrollData from '@/components/ScrollData';
import { getModelDataList, delOneModelData, putModelDataById } from '@/api/model';
import { usePagination } from '@/hooks/usePagination';
import {
getModelDataList,
delOneModelData,
putModelDataById,
getModelSplitDataList
} from '@/api/model';
import { DeleteIcon, RepeatIcon } from '@chakra-ui/icons';
import { useToast } from '@/hooks/useToast';
import { useLoading } from '@/hooks/useLoading';
import dynamic from 'next/dynamic';
import { useQuery } from '@tanstack/react-query';
const InputModel = dynamic(() => import('./InputDataModal'));
const SelectModel = dynamic(() => import('./SelectFileModal'));
@@ -37,16 +42,15 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
const { Loading } = useLoading();
const {
nextPage,
isLoadAll,
requesting,
data: modelDataList,
isLoading,
Pagination,
total,
setData,
getData
} = usePaging<ModelDataSchema>({
getData,
pageNum
} = usePagination<ModelDataSchema>({
api: getModelDataList,
pageSize: 20,
pageSize: 10,
params: {
modelId: model._id
}
@@ -77,6 +81,18 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
onClose: onCloseSelectModal
} = useDisclosure();
const { data, refetch } = useQuery(['getModelSplitDataList'], () =>
getModelSplitDataList(model._id)
);
const refetchData = useCallback(
(num = 1) => {
getData(num);
refetch();
},
[getData, refetch]
);
return (
<>
<Flex>
@@ -91,7 +107,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
aria-label={'refresh'}
variant={'outline'}
mr={4}
onClick={() => getData(1, true)}
onClick={() => refetchData(pageNum)}
/>
<Menu>
<MenuButton as={Button}></MenuButton>
@@ -101,16 +117,9 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
</MenuList>
</Menu>
</Flex>
<ScrollData
h={'100%'}
px={6}
mt={3}
isLoadAll={isLoadAll}
requesting={requesting}
nextPage={nextPage}
position={'relative'}
>
<TableContainer mt={4}>
{data && data.length > 0 && <Box fontSize={'xs'}>{data.length}...</Box>}
<Box mt={4}>
<TableContainer h={'600px'} overflowY={'auto'}>
<Table variant={'simple'}>
<Thead>
<Tr>
@@ -150,12 +159,6 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
const oldVal = modelDataList.find((data) => item._id === data._id)?.text;
if (oldVal !== e.target.value) {
updateAnswer(item._id, e.target.value);
setData((state) =>
state.map((data) => ({
...data,
text: data._id === item._id ? e.target.value : data.text
}))
);
}
}}
></Textarea>
@@ -170,7 +173,7 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
size={'sm'}
onClick={async () => {
delOneModelData(item._id);
setData((state) => state.filter((data) => data._id !== item._id));
refetchData(pageNum);
}}
/>
</Td>
@@ -179,21 +182,17 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
</Tbody>
</Table>
</TableContainer>
<Loading loading={requesting} fixed={false} />
</ScrollData>
<Box mt={2} textAlign={'end'}>
<Pagination />
</Box>
</Box>
<Loading loading={isLoading} fixed={false} />
{isOpenInputModal && (
<InputModel
modelId={model._id}
onClose={onCloseInputModal}
onSuccess={() => getData(1, true)}
/>
<InputModel modelId={model._id} onClose={onCloseInputModal} onSuccess={refetchData} />
)}
{isOpenSelectModal && (
<SelectModel
modelId={model._id}
onClose={onCloseSelectModal}
onSuccess={() => getData(1, true)}
/>
<SelectModel modelId={model._id} onClose={onCloseSelectModal} onSuccess={refetchData} />
)}
</>
);

View File

@@ -37,7 +37,7 @@ const SelectFileModal = ({
const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true });
const [fileText, setFileText] = useState('');
const { openConfirm, ConfirmChild } = useConfirm({
content: '确认导入该文件,需要一定时间进行拆解,该任务无法终止!'
content: '确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,任务讲被终止。'
});
const onSelectFile = useCallback(

View File

@@ -11,8 +11,7 @@ import { useGlobalStore } from '@/store/global';
import { useScreen } from '@/hooks/useScreen';
import ModelEditForm from './components/ModelEditForm';
import { useQuery } from '@tanstack/react-query';
// import dynamic from 'next/dynamic';
import ModelDataCard from './components/ModelDataCard';
import dynamic from 'next/dynamic';
const ModelDataCard = dynamic(() => import('./components/ModelDataCard'));
@@ -251,7 +250,6 @@ const ModelDetail = ({ modelId }: { modelId: string }) => {
{canTrain && model._id && (
<Card
p={4}
height={'700px'}
{...media(
{
gridColumnStart: 1,