feat: qa生成

This commit is contained in:
archer
2023-03-24 01:19:38 +08:00
parent 8b72dca533
commit d8290f0809
15 changed files with 321 additions and 121 deletions

View File

@@ -9,7 +9,7 @@ import { jsonRes } from '@/service/response';
import type { ModelSchema } from '@/types/mongoSchema';
import { PassThrough } from 'stream';
import { ModelList } from '@/constants/model';
import { pushBill } from '@/service/events/bill';
import { pushBill } from '@/service/events/pushChatBill';
/* 发送提示词 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
@@ -98,7 +98,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
}
);
console.log('api response time:', `time: ${(Date.now() - startTime) / 1000}s`);
console.log('api response time:', `${(Date.now() - startTime) / 1000}s`);
// 创建响应流
res.setHeader('Content-Type', 'text/event-stream;charset-utf-8');

View File

@@ -6,7 +6,7 @@ import { getOpenAIApi, authChat } from '@/service/utils/chat';
import { ChatItemType } from '@/types/chat';
import { httpsAgent } from '@/service/utils/tools';
import { ModelList } from '@/constants/model';
import { pushBill } from '@/service/events/bill';
import { pushBill } from '@/service/events/pushChatBill';
/* 发送提示词 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {

View File

@@ -0,0 +1,65 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, Data, DataItem } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import { generateQA } from '@/service/events/generateQA';
/* 定时删除那些不活跃的内容 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
let { text, name } = req.body as { text: string; name: string };
if (!text || !name) {
throw new Error('参数错误');
}
text = text.replace(/\n+/g, '\n');
await connectToDatabase();
const { authorization } = req.headers;
const userId = await authToken(authorization);
// 生成 data 父级
const data = await Data.create({
userId,
name
});
const dataItems: any[] = [];
// 格式化文本长度
for (let i = 0; i <= text.length / 1000; i++) {
const dataItem = {
userId,
dataId: data._id,
text: text.slice(i * 1000, (i + 1) * 1000),
status: 1
};
[0, 0.2, 0.4, 0.6, 0.8, 1.0].forEach((temperature) => {
dataItems.push({
temperature,
...dataItem
});
});
}
// 批量插入数据
await DataItem.insertMany(dataItems);
generateQA();
jsonRes(res, {
data: dataItems.length
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}
/**
* 检查文本是否按格式返回
*/
function splitText(text: string) {}

View File

@@ -0,0 +1,29 @@
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, Bill } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import type { BillSchema } from '@/types/mongoSchema';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await connectToDatabase();
await Bill.updateMany(
{},
{
type: 'chat',
modelName: 'gpt-3.5-turbo'
}
);
jsonRes(res, {
data: {}
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -1,103 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, Data } from '@/service/mongo';
import { getOpenAIApi } from '@/service/utils/chat';
import { httpsAgent, getOpenApiKey } from '@/service/utils/tools';
import type { ChatCompletionRequestMessage, CreateChatCompletionResponse } from 'openai';
import fs from 'fs';
import { v4 } from 'uuid';
/* 定时删除那些不活跃的内容 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
const systemPrompt: ChatCompletionRequestMessage = {
role: 'system',
content: `总结助手。我会向你发送一段长文本请从中总结出10个以上问题和答案并转化成中文请按以下格式返回
"Q1:"
"A1:"
######
"Q2:"
"A2:"
######
`
};
try {
const { text } = req.body as { text: string };
const { authorization } = req.headers;
const replaceText = text.replace(/\n/g, ' ');
await connectToDatabase();
const { userId, userApiKey, systemKey } = await getOpenApiKey(authorization);
const chatAPI = getOpenAIApi(userApiKey || systemKey);
const docId = v4();
const queue: {
requestPrompts: ChatCompletionRequestMessage;
temperature: number;
}[] = [
{
requestPrompts: {
role: 'user',
content: `跟 AI ChatGPT 聊了 11 个我过去关心的话题\n原创 刘飞Lufy 刘言飞语 2022-12-05 21:40 发表于浙江\n收录于合集\n#行业观察\n33\n个\n#AIGC\n11\n个\n\n\n\n\n这几天沉浸式体验 ChatGPTOpenAI发布的一款人工智能对话引擎突发奇想想知道它对我曾经聊的话题都怎么看。于是我用一些过去公众号发过的文章主题跟它进行了一轮对话。\n\n以下是我们聊天的内容原始记录没有做文字篡改为方便没有用截图做了文本排版。各位可以直观体会下它的智能程度。\n\n\n本文内容都是在以上这样的界面里完成的\n\n\n——\n\n参考微信搜索的未来\n\n刘飞\n\n微信搜索现在的使用量大吗\n\nChatGPT\n\n微信搜索是一项非常流行的功能它可以帮助用户快速搜索到想要的信息。由于微信是中国最流行的社交软件微信搜索的使用量也非常大。根据官方数据截至2022年微信的日活跃用户数量已经超过10亿其中每天都会有大量用户使用微信搜索功能搜索各种信息。因此微信搜索的使用量非常大。\n\n刘飞\n\n跟百度搜索比呢\n\nChatGPT\n\n微信搜索和百度搜索都是帮助用户快速搜索到想要的信息的工具。但是两者有着明显的不同。\n\n首先微信搜索和百度搜索的使用场景不同。微信搜索主要用于社交场景因此它更多地用于搜索好友、群聊、文章等信息而百度搜索则更加注重为用户提供更丰富的信息因此它更多地用于搜索各种知识信息和新闻信息。\n\n\n其次微信搜索和百度搜索的搜索结果也不同。微信搜索的搜索结果主要是来自用户自己的微信聊天记录和朋友圈发布的信息而百度搜索的搜索结果则是来自互联网上的各种信息包括网站、新闻、论坛等。因此微信搜索的搜索结果更加个性化而百度搜索的搜索结果则更加全面。\n\n总之微信搜索和百度搜索都是帮助用户快速搜索到想要的信息的工具但是它们在使用场景、搜索结果等方面存在明显的差异。\n\n刘飞\n\n那字节跳动的搜索跟它们相比呢\n\nChatGPT\n\n字节跳动的搜索也是一款帮助用户快速搜索到想要的信息的工具。但是字节跳动的搜索主要是针对短视频内容而设计的因此它的搜索结果主要是来自抖音、头条等短视频平台上的视频内容。与微信搜索和百度搜索相比字节跳动的搜索更加专注于短视频内容并且搜索结果也更加丰富多样。\n\n——\n\n\n参考我们离财务自由的距离\n\n\n刘飞\n\n怎样才算财务自由\n\nChatGPT\n\n财务自由是指一个人在不必工作的情况下能够通过自己的财富维持生活的质量和生活水平。通常情况下一个人可以通过节约开支、投资理财等方式来实现财务自由。\n\n具体来说要达到财务自由一个人需要符合以下几个条件\n\n\n实现收入和支出平衡。要想实现财务自由首先需要通过节约开支来控制支出并通过提高收入来提升生活水平。\n\n\n建立良好的投资理财计划。要想实现财务自由需要通过投资理财来增加财富的增长速度并且要有一个完善的理财计划来指导投资决策。\n\n\n控制风险。在投资理财的过程中要尽量降低风险并通过多样化投资来降低投资的风险。`
},
temperature: 0.5
}
];
/**
* 生成QA
*/
const generateQA = async () => {
const prompt = queue.pop();
if (!prompt) return;
try {
const response = await chatAPI.createChatCompletion(
{
model: 'gpt-3.5-turbo',
temperature: prompt.temperature,
n: 1,
messages: [systemPrompt, prompt.requestPrompts]
},
{
timeout: 60000,
httpsAgent
}
);
return response.data.choices[0].message?.content;
} catch (error) {
console.log('部分训练出现错误');
}
};
// 格式化文本长度
// for (let i = 0; i <= replaceText.length / 2048; i++) {
// const slicePrompts: ChatCompletionRequestMessage = {
// role: 'user',
// content: replaceText.slice(i * 2048, (i + 1) * 2048)
// };
// [0.1, 0.3, 0.5, 0.7, 0.9].forEach((temperature) => {
// queue.push({
// temperature,
// requestPrompts: slicePrompts
// });
// });
// }
jsonRes(res, {
data: await generateQA()
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}
/**
* 检查文本是否按格式返回
*/
function splitText(text: string) {}

View File

@@ -0,0 +1,129 @@
import { DataItem } from '@/service/mongo';
import { getOpenAIApi } from '@/service/utils/chat';
import { httpsAgent, getOpenApiKey } from '@/service/utils/tools';
import type { ChatCompletionRequestMessage } from 'openai';
import { DataItemSchema } from '@/types/mongoSchema';
import { ChatModelNameEnum } from '@/constants/model';
export async function generateQA(next = false): Promise<any> {
if (global.generatingQA && !next) return;
global.generatingQA = true;
const systemPrompt: ChatCompletionRequestMessage = {
role: 'system',
content: `总结助手。我会向你发送一段长文本请从中总结出10个问题和答案答案请尽量详细请按以下格式返回
"Q1:"
"A1:"
"Q2:"
"A2:"
`
};
let dataItem: DataItemSchema | null = null;
try {
// 找出一个需要生成的 dataItem
dataItem = await DataItem.findOne({
status: 1,
times: { $gt: 0 }
});
if (!dataItem) {
console.log('没有需要生成 QA 的数据');
global.generatingQA = false;
return;
}
// 减少一次重试次数, 并更新状态为生成中
await DataItem.findByIdAndUpdate(dataItem._id, {
status: 2,
$inc: {
time: -1
}
});
// 获取 openapi Key
let userApiKey, systemKey;
try {
const key = await getOpenApiKey(dataItem.userId);
userApiKey = key.userApiKey;
systemKey = key.systemKey;
} catch (error) {
// 余额不够了, 把用户所有记录改成闲置
await DataItem.updateMany({
userId: dataItem.userId,
status: 0
});
throw new Error('获取 openai key 失败');
}
console.log('正在生成一个QA', dataItem._id);
const startTime = Date.now();
// 获取 openai 请求实例
const chatAPI = getOpenAIApi(userApiKey || systemKey);
// 请求 chatgpt 获取回答
const response = await chatAPI.createChatCompletion(
{
model: ChatModelNameEnum.GPT35,
temperature: dataItem.temperature,
n: 1,
messages: [
systemPrompt,
{
role: 'user',
content: dataItem.text
}
]
},
{
timeout: 60000,
httpsAgent
}
);
const content = response.data.choices[0].message?.content;
// 从 content 中提取 QA
const splitResponse = splitText(content || '');
if (splitResponse.length > 0) {
// 插入数据库,并修改状态
await DataItem.findByIdAndUpdate(dataItem._id, {
status: 0,
$push: {
result: {
$each: splitResponse
}
}
});
console.log('生成成功time:', `${(Date.now() - startTime) / 1000}s`);
}
} catch (error: any) {
console.log('error: 生成QA错误', dataItem?._id);
console.log('statusText:', error?.response?.statusText);
// 重置状态
if (dataItem?._id) {
await DataItem.findByIdAndUpdate(dataItem._id, {
status: dataItem.times > 0 ? 1 : 0 // 还有重试次数则可以继续进行
});
}
}
generateQA(true);
}
/**
* 检查文本是否按格式返回
*/
function splitText(text: string) {
const regex = /Q\d+:\s(.+)?\nA\d+:\s(.+)?/g; // 匹配Q和A的正则表达式
const matches = text.matchAll(regex); // 获取所有匹配到的结果
const result = []; // 存储最终的结果
for (const match of matches) {
const q = match[1];
const a = match[2];
if (q && a) {
result.push({ q, a }); // 如果Q和A都存在就将其添加到结果中
}
}
return result;
}

View File

@@ -26,6 +26,8 @@ export const pushBill = async ({
// 插入 Bill 记录
const res = await Bill.create({
userId,
type: 'chat',
modelName: modelItem.model,
chatId,
textLen,
price

View File

@@ -1,4 +1,5 @@
import { Schema, model, models } from 'mongoose';
import { ModelList } from '@/constants/model';
const BillSchema = new Schema({
userId: {
@@ -6,6 +7,16 @@ const BillSchema = new Schema({
ref: 'user',
required: true
},
type: {
type: String,
enum: ['chat', 'generateData', 'return'],
required: true
},
modelName: {
type: String,
enum: ModelList.map((item) => item.model),
required: true
},
chatId: {
type: Schema.Types.ObjectId,
ref: 'chat',

View File

@@ -6,21 +6,13 @@ const DataSchema = new Schema({
ref: 'user',
required: true
},
docId: {
name: {
type: String,
required: true
},
createTime: {
type: Date,
default: () => new Date()
},
q: {
type: String,
required: true
},
a: {
type: String,
required: true
}
});

View File

@@ -0,0 +1,48 @@
import { Schema, model, models } from 'mongoose';
const DataItemSchema = new Schema({
userId: {
type: Schema.Types.ObjectId,
ref: 'user',
required: true
},
dataId: {
type: Schema.Types.ObjectId,
ref: 'data',
required: true
},
times: {
type: Number,
default: 3
},
text: {
type: String,
required: true
},
temperature: {
type: Number,
required: true
},
result: {
type: [
{
q: {
type: String,
required: true
},
a: {
type: String,
required: true
}
}
],
default: []
},
status: {
// 0-闲置1-待生成2-生成中
type: Number,
default: 1
}
});
export const DataItem = models['dataItem'] || model('dataItem', DataItemSchema);

View File

@@ -1,5 +1,5 @@
import mongoose from 'mongoose';
import { generateQA } from './events/generateQA';
/**
* 连接 MongoDB 数据库
*/
@@ -23,6 +23,9 @@ export async function connectToDatabase(): Promise<void> {
console.log('error->', 'mongo connect error');
global.mongodb = null;
}
// 递归 QA 生成
generateQA();
}
export * from './models/authCode';
@@ -33,3 +36,4 @@ export * from './models/training';
export * from './models/bill';
export * from './models/pay';
export * from './models/data';
export * from './models/dataItem';

View File

@@ -55,8 +55,7 @@ export const getUserOpenaiKey = async (userId: string) => {
};
/* 获取key如果没有就用平台的用平台记得加账单 */
export const getOpenApiKey = async (authorization?: string) => {
const userId = await authToken(authorization);
export const getOpenApiKey = async (userId: string) => {
const user = await User.findById<UserModelSchema>(userId);
if (!user) return Promise.reject('用户不存在');
@@ -66,7 +65,6 @@ export const getOpenApiKey = async (authorization?: string) => {
// 有自己的key 直接使用
if (userApiKey) {
return {
userId,
userApiKey: await getUserOpenaiKey(userId),
systemKey: ''
};
@@ -78,7 +76,6 @@ export const getOpenApiKey = async (authorization?: string) => {
}
return {
userId,
userApiKey: '',
systemKey: process.env.OPENAIKEY as string
};

View File

@@ -2,6 +2,7 @@ import type { Mongoose } from 'mongoose';
declare global {
var mongodb: Mongoose | string | null;
var generatingQA: boolean;
var QRCode: any;
}

View File

@@ -94,3 +94,28 @@ export interface PaySchema {
orderId: string;
status: 'SUCCESS' | 'REFUND' | 'NOTPAY' | 'CLOSED';
}
export interface DataSchema {
_id: string;
userId: string;
name: string;
createTime: string;
}
export interface DataItemSchema {
_id: string;
userId: string;
dataId: string;
times: number;
temperature: number;
text: string;
result: {
q: string;
a: string;
}[];
status: 0 | 1 | 2;
}
export interface DataItemPopulate extends DataItemSchema {
userId: UserModelSchema;
}

View File

@@ -1,6 +1,6 @@
{
"compilerOptions": {
"target": "es5",
"target": "es2015",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,