feat: 拆分数据自定义prompt

This commit is contained in:
archer
2023-04-03 00:37:40 +08:00
parent 16a31de1c7
commit 56ba6fa5f7
7 changed files with 40 additions and 14 deletions

View File

@@ -49,8 +49,8 @@ export const postModelDataInput = (data: {
data: { text: ModelDataSchema['text']; q: ModelDataSchema['q'] }[]; data: { text: ModelDataSchema['text']; q: ModelDataSchema['q'] }[];
}) => POST<number>(`/model/data/pushModelDataInput`, data); }) => POST<number>(`/model/data/pushModelDataInput`, data);
export const postModelDataFileText = (modelId: string, text: string) => export const postModelDataFileText = (data: { modelId: string; text: string; prompt: string }) =>
POST(`/model/data/splitData`, { modelId, text }); POST(`/model/data/splitData`, data);
export const postModelDataJsonData = ( export const postModelDataJsonData = (
modelId: string, modelId: string,

View File

@@ -118,7 +118,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
prompts.unshift({ prompts.unshift({
obj: 'SYSTEM', obj: 'SYSTEM',
value: `${model.systemPrompt} 知识库内容: "${systemPrompt}"` value: `${model.systemPrompt} 知识库内容是最新的,知识库内容为: "${systemPrompt}"`
}); });
// 控制在 tokens 数量,防止超出 // 控制在 tokens 数量,防止超出

View File

@@ -8,8 +8,8 @@ import { encode } from 'gpt-token-utils';
/* 拆分数据成QA */ /* 拆分数据成QA */
export default async function handler(req: NextApiRequest, res: NextApiResponse) { export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try { try {
const { text, modelId } = req.body as { text: string; modelId: string }; const { text, modelId, prompt } = req.body as { text: string; modelId: string; prompt: string };
if (!text || !modelId) { if (!text || !modelId || !prompt) {
throw new Error('参数错误'); throw new Error('参数错误');
} }
await connectToDatabase(); await connectToDatabase();
@@ -62,7 +62,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
userId, userId,
modelId, modelId,
rawText: text, rawText: text,
textList textList,
prompt
}); });
generateQA(); generateQA();

View File

@@ -8,7 +8,8 @@ import {
ModalContent, ModalContent,
ModalHeader, ModalHeader,
ModalCloseButton, ModalCloseButton,
ModalBody ModalBody,
Input
} from '@chakra-ui/react'; } from '@chakra-ui/react';
import { useToast } from '@/hooks/useToast'; import { useToast } from '@/hooks/useToast';
import { useSelectFile } from '@/hooks/useSelectFile'; import { useSelectFile } from '@/hooks/useSelectFile';
@@ -34,6 +35,7 @@ const SelectFileModal = ({
}) => { }) => {
const [selecting, setSelecting] = useState(false); const [selecting, setSelecting] = useState(false);
const { toast } = useToast(); const { toast } = useToast();
const [prompt, setPrompt] = useState('');
const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true }); const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true });
const [fileText, setFileText] = useState(''); const [fileText, setFileText] = useState('');
const { openConfirm, ConfirmChild } = useConfirm({ const { openConfirm, ConfirmChild } = useConfirm({
@@ -83,7 +85,11 @@ const SelectFileModal = ({
const { mutate, isLoading } = useMutation({ const { mutate, isLoading } = useMutation({
mutationFn: async () => { mutationFn: async () => {
if (!fileText) return; if (!fileText) return;
await postModelDataFileText(modelId, fileText); await postModelDataFileText({
modelId,
text: fileText,
prompt: `下面是${prompt || '一段长文本'}`
});
toast({ toast({
title: '导入数据成功,需要一段拆解和训练', title: '导入数据成功,需要一段拆解和训练',
status: 'success' status: 'success'
@@ -102,7 +108,7 @@ const SelectFileModal = ({
return ( return (
<Modal isOpen={true} onClose={onClose} isCentered> <Modal isOpen={true} onClose={onClose} isCentered>
<ModalOverlay /> <ModalOverlay />
<ModalContent maxW={'min(900px, 90vw)'} m={0} position={'relative'} h={['90vh', '70vh']}> <ModalContent maxW={'min(900px, 90vw)'} m={0} position={'relative'} h={'90vh'}>
<ModalHeader></ModalHeader> <ModalHeader></ModalHeader>
<ModalCloseButton /> <ModalCloseButton />
@@ -125,6 +131,17 @@ const SelectFileModal = ({
<Box mt={2}> <Box mt={2}>
{fileText.length} {encode(fileText).length} tokens {fileText.length} {encode(fileText).length} tokens
</Box> </Box>
<Flex w={'100%'} alignItems={'center'} my={4}>
<Box flex={'0 0 auto'} mr={2}>
</Box>
<Input
placeholder="提示词,例如: Laf的介绍/关于gpt4的论文/一段长文本"
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
size={'sm'}
/>
</Flex>
<Box <Box
flex={'1 0 0'} flex={'1 0 0'}
h={0} h={0}

View File

@@ -14,11 +14,6 @@ export async function generateQA(next = false): Promise<any> {
if (global.generatingQA && !next) return; if (global.generatingQA && !next) return;
global.generatingQA = true; global.generatingQA = true;
const systemPrompt: ChatCompletionRequestMessage = {
role: 'system',
content: `总结助手。我会向你发送一段长文本,请从中总结出5至30个问题和答案,答案请尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n`
};
try { try {
const redis = await connectRedis(); const redis = await connectRedis();
// 找出一个需要生成的 dataItem // 找出一个需要生成的 dataItem
@@ -63,6 +58,13 @@ export async function generateQA(next = false): Promise<any> {
// 获取 openai 请求实例 // 获取 openai 请求实例
const chatAPI = getOpenAIApi(userApiKey || systemKey); const chatAPI = getOpenAIApi(userApiKey || systemKey);
const systemPrompt: ChatCompletionRequestMessage = {
role: 'system',
content: `${
dataItem.prompt || '下面是一段长文本'
},请从中总结出5至30个问题和答案,答案尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n`
};
// 请求 chatgpt 获取回答 // 请求 chatgpt 获取回答
const response = await chatAPI const response = await chatAPI
.createChatCompletion( .createChatCompletion(

View File

@@ -8,6 +8,11 @@ const SplitDataSchema = new Schema({
ref: 'user', ref: 'user',
required: true required: true
}, },
prompt: {
// 拆分时的提示词
type: String,
required: true
},
modelId: { modelId: {
type: Schema.Types.ObjectId, type: Schema.Types.ObjectId,
ref: 'model', ref: 'model',

View File

@@ -69,6 +69,7 @@ export interface ModelSplitDataSchema {
userId: string; userId: string;
modelId: string; modelId: string;
rawText: string; rawText: string;
prompt: string;
errorText: string; errorText: string;
textList: string[]; textList: string[];
} }