feat: 摘要拆分

This commit is contained in:
archer
2023-03-26 22:09:59 +08:00
parent 888642f154
commit 3e4487ad9a
20 changed files with 397 additions and 83 deletions

View File

@@ -3,12 +3,13 @@ import { RequestPaging } from '../types/index';
import { Obj2Query } from '@/utils/tools'; import { Obj2Query } from '@/utils/tools';
import type { DataListItem } from '@/types/data'; import type { DataListItem } from '@/types/data';
import type { PagingData } from '../types/index'; import type { PagingData } from '../types/index';
import { DataItemSchema } from '@/types/mongoSchema'; import type { DataItemSchema } from '@/types/mongoSchema';
import type { CreateDataProps } from '@/pages/data/components/CreateDataModal';
export const getDataList = (data: RequestPaging) => export const getDataList = (data: RequestPaging) =>
GET<PagingData<DataListItem>>(`/data/getDataList?${Obj2Query(data)}`); GET<PagingData<DataListItem>>(`/data/getDataList?${Obj2Query(data)}`);
export const postData = (name: string) => POST<string>(`/data/postData?name=${name}`); export const postData = (data: CreateDataProps) => POST<string>(`/data/postData`, data);
export const postSplitData = (dataId: string, text: string) => export const postSplitData = (dataId: string, text: string) =>
POST(`/data/splitData`, { dataId, text }); POST(`/data/splitData`, { dataId, text });

6
src/constants/data.ts Normal file
View File

@@ -0,0 +1,6 @@
import type { DataType } from '@/types/data';
export const DataTypeTextMap: Record<DataType, string> = {
QA: '问答拆分',
abstract: '摘要总结'
};

View File

@@ -1,6 +1,8 @@
export enum BillTypeEnum { export enum BillTypeEnum {
chat = 'chat', chat = 'chat',
splitData = 'splitData', splitData = 'splitData',
QA = 'QA',
abstract = 'abstract',
return = 'return' return = 'return'
} }
export enum PageTypeEnum { export enum PageTypeEnum {
@@ -11,6 +13,8 @@ export enum PageTypeEnum {
export const BillTypeMap: Record<`${BillTypeEnum}`, string> = { export const BillTypeMap: Record<`${BillTypeEnum}`, string> = {
[BillTypeEnum.chat]: '对话', [BillTypeEnum.chat]: '对话',
[BillTypeEnum.splitData]: '文本拆分', [BillTypeEnum.splitData]: 'QA拆分',
[BillTypeEnum.QA]: 'QA拆分',
[BillTypeEnum.abstract]: '摘要总结',
[BillTypeEnum.return]: '退款' [BillTypeEnum.return]: '退款'
}; };

View File

@@ -2,11 +2,12 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response'; import { jsonRes } from '@/service/response';
import { connectToDatabase, Data } from '@/service/mongo'; import { connectToDatabase, Data } from '@/service/mongo';
import { authToken } from '@/service/utils/tools'; import { authToken } from '@/service/utils/tools';
import type { DataType } from '@/types/data';
export default async function handler(req: NextApiRequest, res: NextApiResponse) { export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try { try {
let { name } = req.query as { name: string }; let { name, type } = req.body as { name: string; type: DataType };
if (!name) { if (!name || !type) {
throw new Error('参数错误'); throw new Error('参数错误');
} }
await connectToDatabase(); await connectToDatabase();
@@ -18,7 +19,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
// 生成 data 集合 // 生成 data 集合
const data = await Data.create({ const data = await Data.create({
userId, userId,
name name,
type
}); });
jsonRes(res, { jsonRes(res, {

View File

@@ -1,9 +1,11 @@
import type { NextApiRequest, NextApiResponse } from 'next'; import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response'; import { jsonRes } from '@/service/response';
import { connectToDatabase, Data, DataItem } from '@/service/mongo'; import { connectToDatabase, DataItem, Data } from '@/service/mongo';
import { authToken } from '@/service/utils/tools'; import { authToken } from '@/service/utils/tools';
import { generateQA } from '@/service/events/generateQA'; import { generateQA } from '@/service/events/generateQA';
import { generateAbstract } from '@/service/events/generateAbstract';
/* 拆分数据成QA */
export default async function handler(req: NextApiRequest, res: NextApiResponse) { export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try { try {
let { text, dataId } = req.body as { text: string; dataId: string }; let { text, dataId } = req.body as { text: string; dataId: string };
@@ -17,14 +19,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const userId = await authToken(authorization); const userId = await authToken(authorization);
const DataRecord = await Data.findById(dataId);
if (!DataRecord) {
throw new Error('找不到数据集');
}
const dataItems: any[] = []; const dataItems: any[] = [];
// 格式化文本长度 // 每 1000 字符一组
for (let i = 0; i <= text.length / 1000; i++) { for (let i = 0; i <= text.length / 1000; i++) {
dataItems.push({ dataItems.push({
temperature: 0,
userId, userId,
dataId, dataId,
type: DataRecord.type,
text: text.slice(i * 1000, (i + 1) * 1000), text: text.slice(i * 1000, (i + 1) * 1000),
status: 1 status: 1
}); });
@@ -33,10 +41,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
// 批量插入数据 // 批量插入数据
await DataItem.insertMany(dataItems); await DataItem.insertMany(dataItems);
generateQA(); try {
generateQA();
generateAbstract();
} catch (error) {
error;
}
jsonRes(res, { jsonRes(res, {
data: dataItems.length data: ''
}); });
} catch (err) { } catch (err) {
jsonRes(res, { jsonRes(res, {

View File

@@ -13,14 +13,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
// await DataItem.updateMany( // await DataItem.updateMany(
// {}, // {},
// { // {
// times: 2 // type: 'QA'
// // times: 2
// } // }
// ); // );
await Data.updateMany( await Data.updateMany(
{}, {},
{ {
isDeleted: false type: 'QA'
} }
); );

View File

@@ -8,10 +8,21 @@ import {
ModalBody, ModalBody,
ModalCloseButton, ModalCloseButton,
Button, Button,
Input Input,
Select,
FormControl,
FormErrorMessage
} from '@chakra-ui/react'; } from '@chakra-ui/react';
import { postData } from '@/api/data'; import { postData } from '@/api/data';
import { useMutation } from '@tanstack/react-query'; import { useMutation } from '@tanstack/react-query';
import { useForm, SubmitHandler } from 'react-hook-form';
import { DataType } from '@/types/data';
import { DataTypeTextMap } from '@/constants/data';
export interface CreateDataProps {
name: string;
type: DataType;
}
const CreateDataModal = ({ const CreateDataModal = ({
onClose, onClose,
@@ -21,9 +32,20 @@ const CreateDataModal = ({
onSuccess: () => void; onSuccess: () => void;
}) => { }) => {
const [inputVal, setInputVal] = useState(''); const [inputVal, setInputVal] = useState('');
const {
getValues,
register,
handleSubmit,
formState: { errors }
} = useForm<CreateDataProps>({
defaultValues: {
name: '',
type: 'abstract'
}
});
const { isLoading, mutate } = useMutation({ const { isLoading, mutate } = useMutation({
mutationFn: (name: string) => postData(name), mutationFn: (e: CreateDataProps) => postData(e),
onSuccess() { onSuccess() {
onSuccess(); onSuccess();
onClose(); onClose();
@@ -37,23 +59,33 @@ const CreateDataModal = ({
<ModalHeader></ModalHeader> <ModalHeader></ModalHeader>
<ModalCloseButton /> <ModalCloseButton />
<ModalBody display={'flex'}> <ModalBody>
<Input <FormControl mb={8} isInvalid={!!errors.name}>
value={inputVal} <Input
onChange={(e) => setInputVal(e.target.value)} placeholder="数据集名称"
placeholder={'数据集名称'} {...register('name', {
></Input> required: '数据集名称不能为空'
})}
/>
<FormErrorMessage position={'absolute'} fontSize="xs">
{!!errors.name && errors.name.message}
</FormErrorMessage>
</FormControl>
<FormControl>
<Select placeholder="数据集类型" {...register('type', {})}>
{Object.entries(DataTypeTextMap).map(([key, value]) => (
<option key={key} value={key}>
{value}
</option>
))}
</Select>
</FormControl>
</ModalBody> </ModalBody>
<ModalFooter> <ModalFooter>
<Button colorScheme={'gray'} onClick={onClose}> <Button colorScheme={'gray'} onClick={onClose}>
</Button> </Button>
<Button <Button ml={3} isLoading={isLoading} onClick={handleSubmit(mutate as any)}>
ml={3}
isDisabled={inputVal === ''}
isLoading={isLoading}
onClick={() => mutate(inputVal)}
>
</Button> </Button>
</ModalFooter> </ModalFooter>

View File

@@ -22,6 +22,7 @@ import { useToast } from '@/hooks/useToast';
import { useLoading } from '@/hooks/useLoading'; import { useLoading } from '@/hooks/useLoading';
import { formatPrice } from '@/utils/user'; import { formatPrice } from '@/utils/user';
import { modelList, ChatModelNameEnum } from '@/constants/model'; import { modelList, ChatModelNameEnum } from '@/constants/model';
import { encode, decode } from 'gpt-token-utils';
const fileExtension = '.txt,.doc,.docx,.pdf,.md'; const fileExtension = '.txt,.doc,.docx,.pdf,.md';
@@ -106,6 +107,7 @@ const ImportDataModal = ({
.join('\n') .join('\n')
.replace(/\n+/g, '\n'); .replace(/\n+/g, '\n');
setFileText(fileTexts); setFileText(fileTexts);
console.log(encode(fileTexts));
} catch (error: any) { } catch (error: any) {
console.log(error); console.log(error);
toast({ toast({
@@ -161,7 +163,9 @@ const ImportDataModal = ({
placeholder={'请粘贴或输入需要处理的文本'} placeholder={'请粘贴或输入需要处理的文本'}
onChange={(e) => setTextInput(e.target.value)} onChange={(e) => setTextInput(e.target.value)}
/> />
<Box mt={2}> {textInput.length} </Box> <Box mt={2}>
{textInput.length} {encode(textInput).length} tokens
</Box>
</> </>
)} )}
{activeTab === 'doc' && ( {activeTab === 'doc' && (
@@ -174,12 +178,15 @@ const ImportDataModal = ({
border={'1px solid '} border={'1px solid '}
borderColor={'blackAlpha.200'} borderColor={'blackAlpha.200'}
borderRadius={'md'} borderRadius={'md'}
fontSize={'sm'}
> >
<Button onClick={onOpen}></Button> <Button onClick={onOpen}></Button>
<Box mt={2}> {fileExtension} </Box> <Box mt={2}> {fileExtension} </Box>
{fileText && ( {fileText && (
<> <>
<Box mt={2}> {fileText.length} </Box> <Box mt={2}>
{fileText.length} {encode(fileText).length} tokens
</Box>
<Box <Box
maxH={'300px'} maxH={'300px'}
w={'100%'} w={'100%'}

View File

@@ -22,7 +22,7 @@ const DataDetail = ({ dataName, dataId }: { dataName: string; dataId: string })
return ( return (
<Card py={4} h={'100%'} display={'flex'} flexDirection={'column'}> <Card py={4} h={'100%'} display={'flex'} flexDirection={'column'}>
<Box px={6} fontSize={'xl'} fontWeight={'bold'}> <Box px={6} fontSize={'xl'} fontWeight={'bold'}>
{dataName} {dataName}
</Box> </Box>
<ScrollData <ScrollData
flex={'1 0 0'} flex={'1 0 0'}
@@ -38,8 +38,13 @@ const DataDetail = ({ dataName, dataId }: { dataName: string; dataId: string })
<Box key={item._id}> <Box key={item._id}>
{item.result.map((result, i) => ( {item.result.map((result, i) => (
<Box key={i} mb={3}> <Box key={i} mb={3}>
<Box fontWeight={'bold'}>Q: {result.q}</Box> {item.type === 'QA' && (
<Box>A: {result.a}</Box> <>
<Box fontWeight={'bold'}>Q: {result.q}</Box>
<Box>A: {result.a}</Box>
</>
)}
{item.type === 'abstract' && <Box fontSize={'sm'}>{result.abstract}</Box>}
</Box> </Box>
))} ))}
</Box> </Box>

View File

@@ -28,13 +28,14 @@ import { useRouter } from 'next/router';
import { useConfirm } from '@/hooks/useConfirm'; import { useConfirm } from '@/hooks/useConfirm';
import { useRequest } from '@/hooks/useRequest'; import { useRequest } from '@/hooks/useRequest';
import { DataItemSchema } from '@/types/mongoSchema'; import { DataItemSchema } from '@/types/mongoSchema';
import { DataTypeTextMap } from '@/constants/data';
import { customAlphabet } from 'nanoid'; import { customAlphabet } from 'nanoid';
const nanoid = customAlphabet('.,', 1); const nanoid = customAlphabet('.,', 1);
const CreateDataModal = dynamic(() => import('./components/CreateDataModal')); const CreateDataModal = dynamic(() => import('./components/CreateDataModal'));
const ImportDataModal = dynamic(() => import('./components/ImportDataModal')); const ImportDataModal = dynamic(() => import('./components/ImportDataModal'));
export type ExportDataType = 'jsonl'; export type ExportDataType = 'jsonl' | 'txt';
const DataList = () => { const DataList = () => {
const router = useRouter(); const router = useRouter();
@@ -84,21 +85,26 @@ const DataList = () => {
let text = ''; let text = '';
// 生成 jsonl // 生成 jsonl
data.forEach((item) => { data.forEach((item) => {
const result = JSON.stringify({ if (res.type === 'jsonl' && item.q && item.a) {
prompt: `${item.q.toLocaleLowerCase()}${nanoid()}</s>`, const result = JSON.stringify({
completion: ` ${item.a}###` prompt: `${item.q.toLocaleLowerCase()}${nanoid()}</s>`,
}); completion: ` ${item.a}###`
text += `${result}\n`; });
text += `${result}\n`;
} else if (res.type === 'txt' && item.abstract) {
text += `${item.abstract}\n`;
}
}); });
// 去掉最后一个 \n // 去掉最后一个 \n
text = text.substring(0, text.length - 1); text = text.substring(0, text.length - 1);
// 导出为文件 // 导出为文件
const blob = new Blob([text], { type: 'application/json;charset=utf-8' }); const blob = new Blob([text], { type: 'application/json;charset=utf-8' });
// 创建下载链接 // 创建下载链接
const downloadLink = document.createElement('a'); const downloadLink = document.createElement('a');
downloadLink.href = window.URL.createObjectURL(blob); downloadLink.href = window.URL.createObjectURL(blob);
downloadLink.download = 'file.jsonl'; downloadLink.download = `data.${res.type}`;
// 添加链接到页面并触发下载 // 添加链接到页面并触发下载
document.body.appendChild(downloadLink); document.body.appendChild(downloadLink);
@@ -138,6 +144,7 @@ const DataList = () => {
<Thead> <Thead>
<Tr> <Tr>
<Th></Th> <Th></Th>
<Th></Th>
<Th></Th> <Th></Th>
<Th> / </Th> <Th> / </Th>
<Th></Th> <Th></Th>
@@ -158,6 +165,7 @@ const DataList = () => {
}} }}
/> />
</Td> </Td>
<Td>{DataTypeTextMap[item.type || 'QA']}</Td>
<Td>{dayjs(item.createTime).format('YYYY/MM/DD HH:mm')}</Td> <Td>{dayjs(item.createTime).format('YYYY/MM/DD HH:mm')}</Td>
<Td> <Td>
{item.trainingData} / {item.totalData} {item.trainingData} / {item.totalData}
@@ -187,9 +195,18 @@ const DataList = () => {
</MenuButton> </MenuButton>
<MenuList> <MenuList>
<MenuItem onClick={() => handleExportData({ data: item, type: 'jsonl' })}> {item.type === 'QA' && (
jsonl <MenuItem
</MenuItem> onClick={() => handleExportData({ data: item, type: 'jsonl' })}
>
jsonl
</MenuItem>
)}
{item.type === 'abstract' && (
<MenuItem onClick={() => handleExportData({ data: item, type: 'txt' })}>
txt
</MenuItem>
)}
</MenuList> </MenuList>
</Menu> </Menu>

View File

@@ -97,7 +97,7 @@ const ModelEditForm = ({ formHooks }: { formHooks: UseFormReturn<ModelSchema> })
<Box mb={1}></Box> <Box mb={1}></Box>
<Textarea <Textarea
rows={6} rows={6}
maxLength={500} maxLength={-1}
{...register('systemPrompt')} {...register('systemPrompt')}
placeholder={ placeholder={
'模型默认的 prompt 词,通过调整该内容,可以生成一个限定范围的模型。\n\n注意改功能会影响对话的整体朝向' '模型默认的 prompt 词,通过调整该内容,可以生成一个限定范围的模型。\n\n注意改功能会影响对话的整体朝向'

View File

@@ -0,0 +1,177 @@
import { DataItem } from '@/service/mongo';
import { getOpenAIApi } from '@/service/utils/chat';
import { httpsAgent, getOpenApiKey } from '@/service/utils/tools';
import type { ChatCompletionRequestMessage } from 'openai';
import { DataItemSchema } from '@/types/mongoSchema';
import { ChatModelNameEnum } from '@/constants/model';
import { pushSplitDataBill } from '@/service/events/pushBill';
export async function generateAbstract(next = false): Promise<any> {
if (global.generatingAbstract && !next) return;
global.generatingAbstract = true;
const systemPrompt: ChatCompletionRequestMessage = {
role: 'system',
content: `我会向你发送一段长文本请从中总结出3~10个摘要尽量详细请按以下格式返回: "(1):"\n"(2):"\n"(3):"\n`
};
let dataItem: DataItemSchema | null = null;
try {
// 找出一个需要生成的 dataItem
dataItem = await DataItem.findOne({
status: { $ne: 0 },
times: { $gt: 0 },
type: 'abstract'
});
if (!dataItem) {
console.log('没有需要生成 【摘要】 的数据');
global.generatingAbstract = false;
return;
}
// 更新状态为生成中
await DataItem.findByIdAndUpdate(dataItem._id, {
status: 2
});
// 获取 openapi Key
let userApiKey, systemKey;
try {
const key = await getOpenApiKey(dataItem.userId);
userApiKey = key.userApiKey;
systemKey = key.systemKey;
} catch (error) {
// 余额不够了, 把用户所有记录改成闲置
await DataItem.updateMany({
userId: dataItem.userId,
status: 0
});
throw new Error('获取 openai key 失败');
}
console.log('正在生成一组摘要, ID:', dataItem._id);
const startTime = Date.now();
// 获取 openai 请求实例
const chatAPI = getOpenAIApi(userApiKey || systemKey);
// 请求 chatgpt 获取摘要
const abstractResponse = await Promise.allSettled(
[0.5, 1].map((temperature) =>
chatAPI.createChatCompletion(
{
model: ChatModelNameEnum.GPT35,
temperature: temperature,
n: 1,
messages: [
systemPrompt,
{
role: 'user',
content: dataItem?.text || ''
}
]
},
{
timeout: 120000,
httpsAgent
}
)
)
);
// 过滤出成功的响应
const successAbstracts = abstractResponse.filter((item) => item.status === 'fulfilled');
// 提取摘要内容
const rawContents: string[] = successAbstracts.map(
(item: any) => item?.value?.data.choices[0].message?.content || ''
);
// 从 content 中提取摘要内容
const splitContents = rawContents.map((content) => splitText(content)).flat();
// 生成词向量
const vectorResponse = await Promise.allSettled(
splitContents.map((item) =>
chatAPI.createEmbedding({
model: 'text-embedding-ada-002',
input: item.abstract
})
)
);
// 筛选成功的向量请求
const vectorSuccessResponse = vectorResponse
.map((item: any, i) => {
if (item.status !== 'fulfilled') return '';
return {
abstract: splitContents[i].abstract,
abstractVector: item?.value?.data?.data?.[0]?.embedding
};
})
.filter((item) => item);
// 插入数据库,并修改状态
await DataItem.findByIdAndUpdate(dataItem._id, {
status: 0,
$push: {
rawResponse: {
$each: rawContents
},
result: {
$each: vectorSuccessResponse
}
}
});
// 计费
!userApiKey &&
splitContents.length > 0 &&
pushSplitDataBill({
userId: dataItem.userId,
type: 'abstract',
text:
systemPrompt.content +
dataItem.text +
rawContents.join('') +
rawContents.join('').substring(0, Math.floor(dataItem.text.length / 10)) // 向量价格是gpt35的1/10
});
console.log(
'生成摘要成功time:',
`${(Date.now() - startTime) / 1000}s`,
'摘要数量:',
splitContents.length
);
} catch (error: any) {
console.log('error: 生成摘要错误', dataItem?._id);
console.log('response:', error);
if (dataItem?._id) {
await DataItem.findByIdAndUpdate(dataItem._id, {
status: dataItem.times > 0 ? 1 : 0, // 还有重试次数则可以继续进行
$inc: {
// 剩余尝试次数-1
times: -1
}
});
}
}
generateAbstract(true);
}
/**
* 检查文本是否按格式返回
*/
function splitText(text: string) {
const regex = /\(\d+\):(\s*)(.*)(\s*)/g;
const matches = text.matchAll(regex); // 获取所有匹配到的结果
const result = []; // 存储最终的结果
for (const match of matches) {
if (match[2]) {
result.push({
abstract: match[2] as string
});
}
}
return result;
}

View File

@@ -20,7 +20,8 @@ export async function generateQA(next = false): Promise<any> {
// 找出一个需要生成的 dataItem // 找出一个需要生成的 dataItem
dataItem = await DataItem.findOne({ dataItem = await DataItem.findOne({
status: { $ne: 0 }, status: { $ne: 0 },
times: { $gt: 0 } times: { $gt: 0 },
type: 'QA'
}); });
if (!dataItem) { if (!dataItem) {
@@ -49,62 +50,72 @@ export async function generateQA(next = false): Promise<any> {
throw new Error('获取 openai key 失败'); throw new Error('获取 openai key 失败');
} }
console.log('正在生成一QA, ID:', dataItem._id, 'temperature: ', dataItem.temperature / 100); console.log('正在生成一QA, ID:', dataItem._id);
const startTime = Date.now(); const startTime = Date.now();
// 获取 openai 请求实例 // 获取 openai 请求实例
const chatAPI = getOpenAIApi(userApiKey || systemKey); const chatAPI = getOpenAIApi(userApiKey || systemKey);
// 请求 chatgpt 获取回答 // 请求 chatgpt 获取回答
const response = await chatAPI.createChatCompletion( const response = await Promise.allSettled(
{ [0, 0.5, 0.8].map((temperature) =>
model: ChatModelNameEnum.GPT35, chatAPI.createChatCompletion(
temperature: dataItem.temperature / 100,
n: 1,
messages: [
systemPrompt,
{ {
role: 'user', model: ChatModelNameEnum.GPT35,
content: dataItem.text temperature: temperature,
n: 1,
messages: [
systemPrompt,
{
role: 'user',
content: dataItem?.text || ''
}
]
},
{
timeout: 120000,
httpsAgent
} }
] )
}, )
{ );
timeout: 120000, // 过滤出成功的响应
httpsAgent const successResponse = response.filter((item) => item.status === 'fulfilled');
} // 提取响应内容
const rawContents: string[] = successResponse.map(
(item: any) => item?.value?.data.choices[0].message?.content || ''
); );
const content = response.data.choices[0].message?.content;
// 从 content 中提取 QA // 从 content 中提取 QA
const splitResponse = splitText(content || ''); const splitResponses = rawContents.map((content) => splitText(content)).flat();
// 插入数据库,并修改状态 // 插入数据库,并修改状态
await DataItem.findByIdAndUpdate(dataItem._id, { await DataItem.findByIdAndUpdate(dataItem._id, {
status: dataItem.temperature >= 90 ? 0 : 1, // 需要生成 4 组内容。0,0.3,0.6,0.9 status: 0,
temperature: dataItem.temperature >= 90 ? dataItem.temperature : dataItem.temperature + 30,
$push: { $push: {
rawResponse: content, rawResponse: {
$each: rawContents
},
result: { result: {
$each: splitResponse $each: splitResponses
} }
} }
}); });
// 计费 // 计费
!userApiKey && !userApiKey &&
splitResponse.length > 0 && splitResponses.length > 0 &&
pushSplitDataBill({ pushSplitDataBill({
userId: dataItem.userId, userId: dataItem.userId,
text: systemPrompt.content + dataItem.text + content type: 'QA',
text: systemPrompt.content + dataItem.text + rawContents.join('')
}); });
console.log( console.log(
'生成QA成功time:', '生成QA成功time:',
`${(Date.now() - startTime) / 1000}s`, `${(Date.now() - startTime) / 1000}s`,
'QA数量', 'QA数量',
splitResponse.length splitResponses.length
); );
} catch (error: any) { } catch (error: any) {
console.log('error: 生成QA错误', dataItem?._id); console.log('error: 生成QA错误', dataItem?._id);
console.log('response:', error?.response); console.log('response:', error?.response);
// 重置状态
if (dataItem?._id) { if (dataItem?._id) {
await DataItem.findByIdAndUpdate(dataItem._id, { await DataItem.findByIdAndUpdate(dataItem._id, {
status: dataItem.times > 0 ? 1 : 0, // 还有重试次数则可以继续进行 status: dataItem.times > 0 ? 1 : 0, // 还有重试次数则可以继续进行

View File

@@ -2,6 +2,7 @@ import { connectToDatabase, Bill, User } from '../mongo';
import { modelList, ChatModelNameEnum } from '@/constants/model'; import { modelList, ChatModelNameEnum } from '@/constants/model';
import { encode } from 'gpt-token-utils'; import { encode } from 'gpt-token-utils';
import { formatPrice } from '@/utils/user'; import { formatPrice } from '@/utils/user';
import type { DataType } from '@/types/data';
export const pushChatBill = async ({ export const pushChatBill = async ({
modelName, modelName,
@@ -59,7 +60,15 @@ export const pushChatBill = async ({
} }
}; };
export const pushSplitDataBill = async ({ userId, text }: { userId: string; text: string }) => { export const pushSplitDataBill = async ({
userId,
text,
type
}: {
userId: string;
text: string;
type: DataType;
}) => {
await connectToDatabase(); await connectToDatabase();
let billId; let billId;
@@ -83,7 +92,7 @@ export const pushSplitDataBill = async ({ userId, text }: { userId: string; text
// 插入 Bill 记录 // 插入 Bill 记录
const res = await Bill.create({ const res = await Bill.create({
userId, userId,
type: 'splitData', type,
modelName: ChatModelNameEnum.GPT35, modelName: ChatModelNameEnum.GPT35,
textLen: text.length, textLen: text.length,
tokenLen: tokens.length, tokenLen: tokens.length,

View File

@@ -1,5 +1,6 @@
import { Schema, model, models, Model } from 'mongoose'; import { Schema, model, models, Model } from 'mongoose';
import { DataItemSchema as Datatype } from '@/types/mongoSchema'; import { DataSchema as Datatype } from '@/types/mongoSchema';
import { DataTypeTextMap } from '@/constants/data';
const DataSchema = new Schema({ const DataSchema = new Schema({
userId: { userId: {
@@ -15,6 +16,11 @@ const DataSchema = new Schema({
type: Date, type: Date,
default: () => new Date() default: () => new Date()
}, },
type: {
type: String,
required: true,
enum: Object.keys(DataTypeTextMap)
},
isDeleted: { isDeleted: {
type: Boolean, type: Boolean,
default: false default: false

View File

@@ -1,5 +1,6 @@
import type { DataItemSchema as DataItemType } from '@/types/mongoSchema'; import type { DataItemSchema as DataItemType } from '@/types/mongoSchema';
import { Schema, model, models, Model } from 'mongoose'; import { Schema, model, models, Model } from 'mongoose';
import { DataTypeTextMap } from '@/constants/data';
const DataItemSchema = new Schema({ const DataItemSchema = new Schema({
userId: { userId: {
@@ -12,19 +13,23 @@ const DataItemSchema = new Schema({
ref: 'data', ref: 'data',
required: true required: true
}, },
type: {
type: String,
required: true,
enum: Object.keys(DataTypeTextMap)
},
times: { times: {
// 剩余重试次数
type: Number, type: Number,
default: 3 default: 3
}, },
text: { text: {
// 文本内容
type: String, type: String,
required: true required: true
}, },
temperature: {
type: Number,
required: true
},
rawResponse: { rawResponse: {
// 原始拆分结果
type: [String], type: [String],
default: [] default: []
}, },
@@ -33,11 +38,21 @@ const DataItemSchema = new Schema({
{ {
q: { q: {
type: String, type: String,
required: true default: ''
}, },
a: { a: {
type: String, type: String,
required: true default: ''
},
abstract: {
// 摘要
type: String,
default: ''
},
abstractVector: {
// 摘要对应的向量
type: [Number],
default: []
} }
} }
], ],

View File

@@ -1,5 +1,7 @@
import mongoose from 'mongoose'; import mongoose from 'mongoose';
import { generateQA } from './events/generateQA'; import { generateQA } from './events/generateQA';
import { generateAbstract } from './events/generateAbstract';
/** /**
* 连接 MongoDB 数据库 * 连接 MongoDB 数据库
*/ */
@@ -24,8 +26,8 @@ export async function connectToDatabase(): Promise<void> {
global.mongodb = null; global.mongodb = null;
} }
// 递归 QA 生成
generateQA(); generateQA();
generateAbstract();
} }
export * from './models/authCode'; export * from './models/authCode';

2
src/types/data.d.ts vendored
View File

@@ -1,5 +1,7 @@
import type { DataSchema } from './mongoSchema'; import type { DataSchema } from './mongoSchema';
export type DataType = 'QA' | 'abstract';
export interface DataListItem extends DataSchema { export interface DataListItem extends DataSchema {
trainingData: number; trainingData: number;
totalData: number; totalData: number;

View File

@@ -3,6 +3,7 @@ import type { Mongoose } from 'mongoose';
declare global { declare global {
var mongodb: Mongoose | string | null; var mongodb: Mongoose | string | null;
var generatingQA: boolean; var generatingQA: boolean;
var generatingAbstract: boolean;
var QRCode: any; var QRCode: any;
interface Window { interface Window {
['pdfjs-dist/build/pdf']: any; ['pdfjs-dist/build/pdf']: any;

View File

@@ -1,5 +1,6 @@
import type { ChatItemType } from './chat'; import type { ChatItemType } from './chat';
import { ModelStatusEnum, TrainingStatusEnum, ChatModelNameEnum } from '@/constants/model'; import { ModelStatusEnum, TrainingStatusEnum, ChatModelNameEnum } from '@/constants/model';
import type { DataType } from './data';
export type ServiceName = 'openai'; export type ServiceName = 'openai';
@@ -102,19 +103,21 @@ export interface DataSchema {
userId: string; userId: string;
name: string; name: string;
createTime: string; createTime: string;
type: DataType;
} }
export interface DataItemSchema { export interface DataItemSchema {
_id: string; _id: string;
userId: string; userId: string;
dataId: string; dataId: string;
type: DataType;
times: number; times: number;
temperature: number;
text: string; text: string;
rawResponse: string[]; rawResponse: string[];
result: { result: {
q: string; q?: string;
a: string; a?: string;
abstract?: string;
}[]; }[];
status: 0 | 1 | 2; status: 0 | 1 | 2;
} }