perf: 加快拆分QA和生成向量;余额不足提醒

This commit is contained in:
archer
2023-04-05 20:37:37 +08:00
parent dc329041f3
commit fbbc32361b
8 changed files with 89 additions and 93 deletions

View File

@@ -1,7 +1,7 @@
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, DataItem, Data } from '@/service/mongo';
import { connectToDatabase, SplitData } from '@/service/mongo';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
@@ -10,20 +10,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
}
await connectToDatabase();
// await DataItem.updateMany(
// {},
// {
// type: 'QA'
// // times: 2
// }
// );
const data = await SplitData.aggregate([
{ $match: { textList: { $exists: true, $ne: [] } } },
{ $sample: { size: 1 } }
]);
await Data.updateMany(
{},
{
type: 'QA'
}
);
const dataItem: any = data[0];
const textList: string[] = dataItem.textList.slice(-5);
console.log(textList);
console.log(dataItem.textList.slice(0, -5));
await SplitData.findByIdAndUpdate(dataItem._id, {
textList: dataItem.textList.slice(0, -5)
});
jsonRes(res, {
data: {}

View File

@@ -6,6 +6,9 @@ export const openaiError: Record<string, string> = {
'Too Many Requests': '请求次数太多了,请慢点~',
'Bad Gateway': '网关异常,请重试'
};
export const openaiError2: Record<string, string> = {
insufficient_quota: 'API 余额不足'
};
export const proxyError: Record<string, boolean> = {
ECONNABORTED: true,
ECONNRESET: true

View File

@@ -9,39 +9,35 @@ import { generateVector } from './generateVector';
import { connectRedis } from '../redis';
import { VecModelDataPrefix } from '@/constants/redis';
import { customAlphabet } from 'nanoid';
import { ModelSplitDataSchema } from '@/types/mongoSchema';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
export async function generateQA(): Promise<any> {
// 最多 5 个进程
if (global.generatingQA >= 5) {
console.log('QA 最多5个进程');
return;
}
global.generatingQA++;
export async function generateQA(next = false): Promise<any> {
if (global.generatingQA === true && !next) return;
global.generatingQA = true;
let dataId = null;
try {
const redis = await connectRedis();
// 找出一个需要生成的 dataItem
const dataItem = await SplitData.findOne({
textList: { $exists: true, $ne: [] }
});
const data = await SplitData.aggregate([
{ $match: { textList: { $exists: true, $ne: [] } } },
{ $sample: { size: 1 } }
]);
const dataItem: ModelSplitDataSchema = data[0];
if (!dataItem) {
console.log('没有需要生成 QA 的数据');
global.generatingQA = 0;
global.generatingQA = false;
return;
}
dataId = dataItem._id;
// 源文本
const text = dataItem.textList[dataItem.textList.length - 1];
if (!text) {
await SplitData.findByIdAndUpdate(dataItem._id, { $pop: { textList: 1 } }); // 弹出无效文本
throw new Error('无文本');
}
// 获取 5 个源文本
const textList: string[] = dataItem.textList.slice(-5);
// 获取 openapi Key
let userApiKey, systemKey;
@@ -62,7 +58,7 @@ export async function generateQA(): Promise<any> {
throw new Error('获取 openai key 失败');
}
console.log('正在生成一组QA, ID:', dataItem._id);
console.log(`正在生成一组QA, 包含 ${textList.length} 组文本。ID: ${dataItem._id}`);
const startTime = Date.now();
@@ -76,33 +72,50 @@ export async function generateQA(): Promise<any> {
};
// 请求 chatgpt 获取回答
const response = await chatAPI
.createChatCompletion(
{
model: ChatModelNameEnum.GPT35,
temperature: 0.8,
n: 1,
messages: [
systemPrompt,
const response = await Promise.allSettled(
textList.map((text) =>
chatAPI
.createChatCompletion(
{
role: 'user',
content: text
model: ChatModelNameEnum.GPT35,
temperature: 0.8,
n: 1,
messages: [
systemPrompt,
{
role: 'user',
content: text
}
]
},
{
timeout: 180000,
httpsAgent
}
]
},
{
timeout: 180000,
httpsAgent
}
)
.then((res) => ({
rawContent: res?.data.choices[0].message?.content || '', // chatgpt原本的回复
result: splitText(res?.data.choices[0].message?.content || '') // 格式化后的QA对
}))
)
.then((res) => ({
rawContent: res?.data.choices[0].message?.content || '', // chatgpt原本的回复
result: splitText(res?.data.choices[0].message?.content || '') // 格式化后的QA对
}));
);
// 获取成功的回答
const successResponse: {
rawContent: string;
result: {
q: string;
a: string;
}[];
}[] = response.filter((item) => item.status === 'fulfilled').map((item: any) => item.value);
const resultList = successResponse.map((item) => item.result).flat();
await Promise.allSettled([
SplitData.findByIdAndUpdate(dataItem._id, { $pop: { textList: 1 } }), // 弹出已经拆分的文本
...response.result.map((item) => {
SplitData.findByIdAndUpdate(dataItem._id, {
textList: dataItem.textList.slice(0, -5)
}), // 删掉后5个数据
...resultList.map((item) => {
// 插入 redis
return redis.sendCommand([
'HMSET',
@@ -125,20 +138,21 @@ export async function generateQA(): Promise<any> {
'生成QA成功time:',
`${(Date.now() - startTime) / 1000}s`,
'QA数量',
response.result.length
resultList.length
);
// 计费
pushSplitDataBill({
isPay: !userApiKey && response.result.length > 0,
isPay: !userApiKey && resultList.length > 0,
userId: dataItem.userId,
type: 'QA',
text: systemPrompt.content + text + response.rawContent
text:
systemPrompt.content +
textList.join('') +
successResponse.map((item) => item.rawContent).join('')
});
global.generatingQA--;
generateQA();
generateQA(true);
generateVector();
} catch (error: any) {
// log
@@ -157,14 +171,13 @@ export async function generateQA(): Promise<any> {
errorText: 'api 余额不足'
});
generateQA();
generateQA(true);
return;
}
setTimeout(() => {
global.generatingQA--;
generateQA();
}, 5000);
generateQA(true);
}, 4000);
}
}

View File

@@ -75,9 +75,7 @@ export async function generateVector(next = false): Promise<any> {
console.log(`生成向量成功: ${dataItem.id}`);
setTimeout(() => {
generateVector(true);
}, 4000);
generateVector(true);
} catch (error: any) {
// log
if (error?.response) {
@@ -88,7 +86,7 @@ export async function generateVector(next = false): Promise<any> {
}
if (dataId && error?.response?.data?.error?.type === 'insufficient_quota') {
console.log('api 余额不足');
console.log('api 余额不足,删除 redis 模型数据');
const redis = await connectRedis();
redis.del(dataId);
generateVector(true);

View File

@@ -27,7 +27,6 @@ export async function connectToDatabase(): Promise<void> {
global.mongodb = null;
}
global.generatingQA = 0;
generateQA();
// generateAbstract();
generateVector(true);

View File

@@ -1,21 +0,0 @@
import { ChatItemType } from '../types/chat';
export const chatWindows = new Map<string, ChatItemType[]>();
/**
* 获取聊天窗口信息
*/
export const getWindowMessages = (id: string) => {
return chatWindows.get(id) || [];
};
export const pushWindowMessage = (id: string, prompt: ChatItemType) => {
const messages = chatWindows.get(id) || [];
messages.push(prompt);
chatWindows.set(id, messages);
return messages;
};
export const deleteWindow = (id: string) => {
chatWindows.delete(id);
};

View File

@@ -1,5 +1,5 @@
import { NextApiResponse } from 'next';
import { openaiError, proxyError } from './errorCode';
import { openaiError, openaiError2, proxyError } from './errorCode';
export interface ResponseType<T = any> {
code: number;
@@ -25,13 +25,19 @@ export const jsonRes = <T = any>(
msg = error;
} else if (proxyError[error?.code]) {
msg = '服务器代理出错';
} else if (openaiError2[error?.response?.data?.error?.type]) {
msg = openaiError2[error?.response?.data?.error?.type];
} else if (openaiError[error?.response?.statusText]) {
msg = openaiError[error.response.statusText];
}
console.log('error->');
console.log('code:', error.code);
console.log('statusText:', error?.response?.statusText);
console.log('msg:', msg);
// request 时候报错
if (error?.response) {
console.log('statusText:', error?.response?.statusText);
console.log('type:', error?.response?.data?.error?.type);
}
}
res.json({

View File

@@ -4,7 +4,7 @@ import type { RedisClientType } from 'redis';
declare global {
var mongodb: Mongoose | string | null;
var redisClient: RedisClientType | null;
var generatingQA: number;
var generatingQA: boolean;
var generatingAbstract: boolean;
var generatingVector: boolean;
var QRCode: any;