Files
FastGPT/src/service/events/generateQA.ts
2023-04-03 00:37:40 +08:00

159 lines
4.5 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { SplitData } from '@/service/mongo';
import { getOpenAIApi } from '@/service/utils/chat';
import { httpsAgent, getOpenApiKey } from '@/service/utils/tools';
import type { ChatCompletionRequestMessage } from 'openai';
import { ChatModelNameEnum } from '@/constants/model';
import { pushSplitDataBill } from '@/service/events/pushBill';
import { generateVector } from './generateVector';
import { connectRedis } from '../redis';
import { VecModelDataPrefix } from '@/constants/redis';
import { customAlphabet } from 'nanoid';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
export async function generateQA(next = false): Promise<any> {
if (global.generatingQA && !next) return;
global.generatingQA = true;
try {
const redis = await connectRedis();
// 找出一个需要生成的 dataItem
const dataItem = await SplitData.findOne({
textList: { $exists: true, $ne: [] }
});
if (!dataItem) {
console.log('没有需要生成 QA 的数据');
global.generatingQA = false;
return;
}
// 源文本
const text = dataItem.textList[dataItem.textList.length - 1];
if (!text) {
await SplitData.findByIdAndUpdate(dataItem._id, { $pop: { textList: 1 } }); // 弹出无效文本
throw new Error('无文本');
}
// 获取 openapi Key
let userApiKey, systemKey;
try {
const key = await getOpenApiKey(dataItem.userId, true);
userApiKey = key.userApiKey;
systemKey = key.systemKey;
} catch (error: any) {
if (error?.code === 501) {
// 余额不够了, 清空该记录
await SplitData.findByIdAndUpdate(dataItem._id, {
textList: [],
errorText: error.message
});
}
throw new Error('获取 openai key 失败');
}
console.log('正在生成一组QA, ID:', dataItem._id);
const startTime = Date.now();
// 获取 openai 请求实例
const chatAPI = getOpenAIApi(userApiKey || systemKey);
const systemPrompt: ChatCompletionRequestMessage = {
role: 'system',
content: `${
dataItem.prompt || '下面是一段长文本'
},请从中总结出5至30个问题和答案,答案尽量详细,并按以下格式返回: Q1:\nA1:\nQ2:\nA2:\n`
};
// 请求 chatgpt 获取回答
const response = await chatAPI
.createChatCompletion(
{
model: ChatModelNameEnum.GPT35,
temperature: 0.4,
n: 1,
messages: [
systemPrompt,
{
role: 'user',
content: text
}
]
},
{
timeout: 120000,
httpsAgent
}
)
.then((res) => ({
rawContent: res?.data.choices[0].message?.content || '', // chatgpt原本的回复
result: splitText(res?.data.choices[0].message?.content || '') // 格式化后的QA对
}));
await Promise.allSettled([
SplitData.findByIdAndUpdate(dataItem._id, { $pop: { textList: 1 } }), // 弹出已经拆分的文本
...response.result.map((item) => {
// 插入 redis
return redis.sendCommand([
'HMSET',
`${VecModelDataPrefix}:${nanoid()}`,
'userId',
String(dataItem.userId),
'modelId',
String(dataItem.modelId),
'q',
item.q,
'text',
item.a,
'status',
'waiting'
]);
})
]);
console.log(
'生成QA成功time:',
`${(Date.now() - startTime) / 1000}s`,
'QA数量',
response.result.length
);
// 计费
pushSplitDataBill({
isPay: !userApiKey && response.result.length > 0,
userId: dataItem.userId,
type: 'QA',
text: systemPrompt.content + text + response.rawContent
});
generateQA(true);
generateVector(true);
} catch (error: any) {
console.log(error);
console.log('生成QA错误:', error?.response);
setTimeout(() => {
generateQA(true);
}, 5000);
}
}
/**
* 检查文本是否按格式返回
*/
function splitText(text: string) {
const regex = /Q\d+:(\s*)(.*)(\s*)A\d+:(\s*)(.*)(\s*)/g; // 匹配Q和A的正则表达式
const matches = text.matchAll(regex); // 获取所有匹配到的结果
const result = []; // 存储最终的结果
for (const match of matches) {
const q = match[2];
const a = match[5];
if (q && a) {
result.push({ q, a }); // 如果Q和A都存在就将其添加到结果中
}
}
return result;
}