feat: qa生成

2026-03-26 01:02:28 +08:00 · 2023-03-24 01:19:38 +08:00
parent 8b72dca533
commit d8290f0809
15 changed files with 321 additions and 121 deletions
--- a/src/pages/api/chat/chatGpt.ts
+++ b/src/pages/api/chat/chatGpt.ts
@@ -9,7 +9,7 @@ import { jsonRes } from '@/service/response';
 import type { ModelSchema } from '@/types/mongoSchema';
 import { PassThrough } from 'stream';
 import { ModelList } from '@/constants/model';
-import { pushBill } from '@/service/events/bill';
+import { pushBill } from '@/service/events/pushChatBill';

 /* 发送提示词 */
 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
@@ -98,7 +98,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
      }
    );

-    console.log('api response time:', `time: ${(Date.now() - startTime) / 1000}s`);
+    console.log('api response time:', `${(Date.now() - startTime) / 1000}s`);

    // 创建响应流
    res.setHeader('Content-Type', 'text/event-stream;charset-utf-8');
--- a/src/pages/api/chat/gpt3.ts
+++ b/src/pages/api/chat/gpt3.ts
@@ -6,7 +6,7 @@ import { getOpenAIApi, authChat } from '@/service/utils/chat';
 import { ChatItemType } from '@/types/chat';
 import { httpsAgent } from '@/service/utils/tools';
 import { ModelList } from '@/constants/model';
-import { pushBill } from '@/service/events/bill';
+import { pushBill } from '@/service/events/pushChatBill';

 /* 发送提示词 */
 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
--- a/src/pages/api/data/splitData.ts
+++ b/src/pages/api/data/splitData.ts
@@ -0,0 +1,65 @@
+import type { NextApiRequest, NextApiResponse } from 'next';
+import { jsonRes } from '@/service/response';
+import { connectToDatabase, Data, DataItem } from '@/service/mongo';
+import { authToken } from '@/service/utils/tools';
+import { generateQA } from '@/service/events/generateQA';
+
+/* 定时删除那些不活跃的内容 */
+export default async function handler(req: NextApiRequest, res: NextApiResponse) {
+  try {
+    let { text, name } = req.body as { text: string; name: string };
+    if (!text || !name) {
+      throw new Error('参数错误');
+    }
+    text = text.replace(/\n+/g, '\n');
+    await connectToDatabase();
+
+    const { authorization } = req.headers;
+
+    const userId = await authToken(authorization);
+
+    // 生成 data 父级
+    const data = await Data.create({
+      userId,
+      name
+    });
+
+    const dataItems: any[] = [];
+
+    // 格式化文本长度
+    for (let i = 0; i <= text.length / 1000; i++) {
+      const dataItem = {
+        userId,
+        dataId: data._id,
+        text: text.slice(i * 1000, (i + 1) * 1000),
+        status: 1
+      };
+
+      [0, 0.2, 0.4, 0.6, 0.8, 1.0].forEach((temperature) => {
+        dataItems.push({
+          temperature,
+          ...dataItem
+        });
+      });
+    }
+
+    // 批量插入数据
+    await DataItem.insertMany(dataItems);
+
+    generateQA();
+
+    jsonRes(res, {
+      data: dataItems.length
+    });
+  } catch (err) {
+    jsonRes(res, {
+      code: 500,
+      error: err
+    });
+  }
+}
+
+/**
+ * 检查文本是否按格式返回
+ */
+function splitText(text: string) {}
--- a/src/pages/api/timer/initBill.ts
+++ b/src/pages/api/timer/initBill.ts
@@ -0,0 +1,29 @@
+// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
+import type { NextApiRequest, NextApiResponse } from 'next';
+import { jsonRes } from '@/service/response';
+import { connectToDatabase, Bill } from '@/service/mongo';
+import { authToken } from '@/service/utils/tools';
+import type { BillSchema } from '@/types/mongoSchema';
+
+export default async function handler(req: NextApiRequest, res: NextApiResponse) {
+  try {
+    await connectToDatabase();
+
+    await Bill.updateMany(
+      {},
+      {
+        type: 'chat',
+        modelName: 'gpt-3.5-turbo'
+      }
+    );
+
+    jsonRes(res, {
+      data: {}
+    });
+  } catch (err) {
+    jsonRes(res, {
+      code: 500,
+      error: err
+    });
+  }
+}
--- a/src/pages/api/train/generateQA.ts
+++ b/src/pages/api/train/generateQA.ts
@@ -1,103 +0,0 @@
-import type { NextApiRequest, NextApiResponse } from 'next';
-import { jsonRes } from '@/service/response';
-import { connectToDatabase, Data } from '@/service/mongo';
-import { getOpenAIApi } from '@/service/utils/chat';
-import { httpsAgent, getOpenApiKey } from '@/service/utils/tools';
-import type { ChatCompletionRequestMessage, CreateChatCompletionResponse } from 'openai';
-import fs from 'fs';
-import { v4 } from 'uuid';
-
-/* 定时删除那些不活跃的内容 */
-export default async function handler(req: NextApiRequest, res: NextApiResponse) {
-  const systemPrompt: ChatCompletionRequestMessage = {
-    role: 'system',
-    content: `总结助手。我会向你发送一段长文本，请从中总结出10个以上问题和答案，并转化成中文！请按以下格式返回：
-"Q1:"
-"A1:"
-######
-"Q2:"
-"A2:"
-######
-`
-  };
-
-  try {
-    const { text } = req.body as { text: string };
-    const { authorization } = req.headers;
-    const replaceText = text.replace(/\n/g, ' ');
-    await connectToDatabase();
-
-    const { userId, userApiKey, systemKey } = await getOpenApiKey(authorization);
-
-    const chatAPI = getOpenAIApi(userApiKey || systemKey);
-
-    const docId = v4();
-
-    const queue: {
-      requestPrompts: ChatCompletionRequestMessage;
-      temperature: number;
-    }[] = [
-      {
-        requestPrompts: {
-          role: 'user',
-          content: `跟 AI ChatGPT 聊了 11 个我过去关心的话题\n原创 刘飞Lufy 刘言飞语 2022-12-05 21:40 发表于浙江\n收录于合集\n#行业观察\n33\n个\n#AIGC\n11\n个\n\n\n\n\n这几天沉浸式体验 ChatGPT（OpenAI发布的一款人工智能对话引擎），突发奇想，想知道它对我曾经聊的话题都怎么看。于是我用一些过去公众号发过的文章主题，跟它进行了一轮对话。\n\n以下是我们聊天的内容原始记录，没有做文字篡改，为方便没有用截图，做了文本排版。各位可以直观体会下它的智能程度。\n\n\n（本文内容都是在以上这样的界面里完成的）\n\n\n——\n\n参考：微信搜索的未来\n\n刘飞：\n\n微信搜索现在的使用量大吗？\n\nChatGPT：\n\n微信搜索是一项非常流行的功能，它可以帮助用户快速搜索到想要的信息。由于微信是中国最流行的社交软件，微信搜索的使用量也非常大。根据官方数据，截至2022年，微信的日活跃用户数量已经超过10亿，其中，每天都会有大量用户使用微信搜索功能搜索各种信息。因此，微信搜索的使用量非常大。\n\n刘飞：\n\n跟百度搜索比呢？\n\nChatGPT：\n\n微信搜索和百度搜索都是帮助用户快速搜索到想要的信息的工具。但是，两者有着明显的不同。\n\n首先，微信搜索和百度搜索的使用场景不同。微信搜索主要用于社交场景，因此，它更多地用于搜索好友、群聊、文章等信息，而百度搜索则更加注重为用户提供更丰富的信息，因此它更多地用于搜索各种知识信息和新闻信息。\n\n\n其次，微信搜索和百度搜索的搜索结果也不同。微信搜索的搜索结果主要是来自用户自己的微信聊天记录和朋友圈发布的信息，而百度搜索的搜索结果则是来自互联网上的各种信息，包括网站、新闻、论坛等。因此，微信搜索的搜索结果更加个性化，而百度搜索的搜索结果则更加全面。\n\n总之，微信搜索和百度搜索都是帮助用户快速搜索到想要的信息的工具，但是它们在使用场景、搜索结果等方面存在明显的差异。\n\n刘飞：\n\n那字节跳动的搜索跟它们相比呢？\n\nChatGPT：\n\n字节跳动的搜索也是一款帮助用户快速搜索到想要的信息的工具。但是，字节跳动的搜索主要是针对短视频内容而设计的，因此，它的搜索结果主要是来自抖音、头条等短视频平台上的视频内容。与微信搜索和百度搜索相比，字节跳动的搜索更加专注于短视频内容，并且搜索结果也更加丰富多样。\n\n——\n\n\n参考：我们离财务自由的距离\n\n\n刘飞：\n\n怎样才算财务自由？\n\nChatGPT：\n\n财务自由是指一个人在不必工作的情况下，能够通过自己的财富维持生活的质量和生活水平。通常情况下，一个人可以通过节约开支、投资理财等方式来实现财务自由。\n\n具体来说，要达到财务自由，一个人需要符合以下几个条件：\n\n\n实现收入和支出平衡。要想实现财务自由，首先需要通过节约开支来控制支出，并通过提高收入来提升生活水平。\n\n\n建立良好的投资理财计划。要想实现财务自由，需要通过投资理财来增加财富的增长速度，并且要有一个完善的理财计划来指导投资决策。\n\n\n控制风险。在投资理财的过程中，要尽量降低风险，并通过多样化投资来降低投资的风险。`
-        },
-        temperature: 0.5
-      }
-    ];
-
-    /**
-     * 生成QA
-     */
-    const generateQA = async () => {
-      const prompt = queue.pop();
-      if (!prompt) return;
-
-      try {
-        const response = await chatAPI.createChatCompletion(
-          {
-            model: 'gpt-3.5-turbo',
-            temperature: prompt.temperature,
-            n: 1,
-            messages: [systemPrompt, prompt.requestPrompts]
-          },
-          {
-            timeout: 60000,
-            httpsAgent
-          }
-        );
-        return response.data.choices[0].message?.content;
-      } catch (error) {
-        console.log('部分训练出现错误');
-      }
-    };
-    // 格式化文本长度
-    // for (let i = 0; i <= replaceText.length / 2048; i++) {
-    //   const slicePrompts: ChatCompletionRequestMessage = {
-    //     role: 'user',
-    //     content: replaceText.slice(i * 2048, (i + 1) * 2048)
-    //   };
-
-    //   [0.1, 0.3, 0.5, 0.7, 0.9].forEach((temperature) => {
-    //     queue.push({
-    //       temperature,
-    //       requestPrompts: slicePrompts
-    //     });
-    //   });
-    // }
-
-    jsonRes(res, {
-      data: await generateQA()
-    });
-  } catch (err) {
-    jsonRes(res, {
-      code: 500,
-      error: err
-    });
-  }
-}
-
-/**
- * 检查文本是否按格式返回
- */
-function splitText(text: string) {}
--- a/src/service/events/generateQA.ts
+++ b/src/service/events/generateQA.ts
@@ -0,0 +1,129 @@
+import { DataItem } from '@/service/mongo';
+import { getOpenAIApi } from '@/service/utils/chat';
+import { httpsAgent, getOpenApiKey } from '@/service/utils/tools';
+import type { ChatCompletionRequestMessage } from 'openai';
+import { DataItemSchema } from '@/types/mongoSchema';
+import { ChatModelNameEnum } from '@/constants/model';
+
+export async function generateQA(next = false): Promise<any> {
+  if (global.generatingQA && !next) return;
+  global.generatingQA = true;
+
+  const systemPrompt: ChatCompletionRequestMessage = {
+    role: 'system',
+    content: `总结助手。我会向你发送一段长文本，请从中总结出10个问题和答案，答案请尽量详细，请按以下格式返回：
+"Q1:"
+"A1:"
+"Q2:"
+"A2:"
+`
+  };
+  let dataItem: DataItemSchema | null = null;
+
+  try {
+    // 找出一个需要生成的 dataItem
+    dataItem = await DataItem.findOne({
+      status: 1,
+      times: { $gt: 0 }
+    });
+
+    if (!dataItem) {
+      console.log('没有需要生成 QA 的数据');
+      global.generatingQA = false;
+      return;
+    }
+
+    // 减少一次重试次数, 并更新状态为生成中
+    await DataItem.findByIdAndUpdate(dataItem._id, {
+      status: 2,
+      $inc: {
+        time: -1
+      }
+    });
+
+    // 获取 openapi Key
+    let userApiKey, systemKey;
+    try {
+      const key = await getOpenApiKey(dataItem.userId);
+      userApiKey = key.userApiKey;
+      systemKey = key.systemKey;
+    } catch (error) {
+      // 余额不够了, 把用户所有记录改成闲置
+      await DataItem.updateMany({
+        userId: dataItem.userId,
+        status: 0
+      });
+      throw new Error('获取 openai key 失败');
+    }
+
+    console.log('正在生成一个QA', dataItem._id);
+    const startTime = Date.now();
+
+    // 获取 openai 请求实例
+    const chatAPI = getOpenAIApi(userApiKey || systemKey);
+    // 请求 chatgpt 获取回答
+    const response = await chatAPI.createChatCompletion(
+      {
+        model: ChatModelNameEnum.GPT35,
+        temperature: dataItem.temperature,
+        n: 1,
+        messages: [
+          systemPrompt,
+          {
+            role: 'user',
+            content: dataItem.text
+          }
+        ]
+      },
+      {
+        timeout: 60000,
+        httpsAgent
+      }
+    );
+    const content = response.data.choices[0].message?.content;
+    // 从 content 中提取 QA
+    const splitResponse = splitText(content || '');
+    if (splitResponse.length > 0) {
+      // 插入数据库，并修改状态
+      await DataItem.findByIdAndUpdate(dataItem._id, {
+        status: 0,
+        $push: {
+          result: {
+            $each: splitResponse
+          }
+        }
+      });
+      console.log('生成成功，time:', `${(Date.now() - startTime) / 1000}s`);
+    }
+  } catch (error: any) {
+    console.log('error: 生成QA错误', dataItem?._id);
+    console.log('statusText:', error?.response?.statusText);
+    // 重置状态
+    if (dataItem?._id) {
+      await DataItem.findByIdAndUpdate(dataItem._id, {
+        status: dataItem.times > 0 ? 1 : 0 // 还有重试次数则可以继续进行
+      });
+    }
+  }
+
+  generateQA(true);
+}
+
+/**
+ * 检查文本是否按格式返回
+ */
+function splitText(text: string) {
+  const regex = /Q\d+:\s(.+)?\nA\d+:\s(.+)?/g; // 匹配Q和A的正则表达式
+  const matches = text.matchAll(regex); // 获取所有匹配到的结果
+
+  const result = []; // 存储最终的结果
+  for (const match of matches) {
+    const q = match[1];
+    const a = match[2];
+    if (q && a) {
+      result.push({ q, a }); // 如果Q和A都存在，就将其添加到结果中
+    }
+  }
+
+  return result;
+}
--- a/src/service/events/pushChatBill.ts
+++ b/src/service/events/pushChatBill.ts
@@ -26,6 +26,8 @@ export const pushBill = async ({
      // 插入 Bill 记录
      const res = await Bill.create({
        userId,
+        type: 'chat',
+        modelName: modelItem.model,
        chatId,
        textLen,
        price
--- a/src/service/models/bill.ts
+++ b/src/service/models/bill.ts
@@ -1,4 +1,5 @@
 import { Schema, model, models } from 'mongoose';
+import { ModelList } from '@/constants/model';

 const BillSchema = new Schema({
  userId: {
@@ -6,6 +7,16 @@ const BillSchema = new Schema({
    ref: 'user',
    required: true
  },
+  type: {
+    type: String,
+    enum: ['chat', 'generateData', 'return'],
+    required: true
+  },
+  modelName: {
+    type: String,
+    enum: ModelList.map((item) => item.model),
+    required: true
+  },
  chatId: {
    type: Schema.Types.ObjectId,
    ref: 'chat',
--- a/src/service/models/data.ts
+++ b/src/service/models/data.ts
@@ -6,21 +6,13 @@ const DataSchema = new Schema({
    ref: 'user',
    required: true
  },
-  docId: {
+  name: {
    type: String,
    required: true
  },
  createTime: {
    type: Date,
    default: () => new Date()
-  },
-  q: {
-    type: String,
-    required: true
-  },
-  a: {
-    type: String,
-    required: true
  }
 });

--- a/src/service/models/dataItem.ts
+++ b/src/service/models/dataItem.ts
@@ -0,0 +1,48 @@
+import { Schema, model, models } from 'mongoose';
+
+const DataItemSchema = new Schema({
+  userId: {
+    type: Schema.Types.ObjectId,
+    ref: 'user',
+    required: true
+  },
+  dataId: {
+    type: Schema.Types.ObjectId,
+    ref: 'data',
+    required: true
+  },
+  times: {
+    type: Number,
+    default: 3
+  },
+  text: {
+    type: String,
+    required: true
+  },
+  temperature: {
+    type: Number,
+    required: true
+  },
+  result: {
+    type: [
+      {
+        q: {
+          type: String,
+          required: true
+        },
+        a: {
+          type: String,
+          required: true
+        }
+      }
+    ],
+    default: []
+  },
+  status: {
+    // 0-闲置，1-待生成，2-生成中
+    type: Number,
+    default: 1
+  }
+});
+
+export const DataItem = models['dataItem'] || model('dataItem', DataItemSchema);
--- a/src/service/mongo.ts
+++ b/src/service/mongo.ts
@@ -1,5 +1,5 @@
 import mongoose from 'mongoose';
-
+import { generateQA } from './events/generateQA';
 /**
 * 连接 MongoDB 数据库
 */
@@ -23,6 +23,9 @@ export async function connectToDatabase(): Promise<void> {
    console.log('error->', 'mongo connect error');
    global.mongodb = null;
  }
+
+  // 递归 QA 生成
+  generateQA();
 }

 export * from './models/authCode';
@@ -33,3 +36,4 @@ export * from './models/training';
 export * from './models/bill';
 export * from './models/pay';
 export * from './models/data';
+export * from './models/dataItem';
--- a/src/service/utils/tools.ts
+++ b/src/service/utils/tools.ts
@@ -55,8 +55,7 @@ export const getUserOpenaiKey = async (userId: string) => {
 };

 /* 获取key，如果没有就用平台的，用平台记得加账单 */
-export const getOpenApiKey = async (authorization?: string) => {
-  const userId = await authToken(authorization);
+export const getOpenApiKey = async (userId: string) => {
  const user = await User.findById<UserModelSchema>(userId);

  if (!user) return Promise.reject('用户不存在');
@@ -66,7 +65,6 @@ export const getOpenApiKey = async (authorization?: string) => {
  // 有自己的key， 直接使用
  if (userApiKey) {
    return {
-      userId,
      userApiKey: await getUserOpenaiKey(userId),
      systemKey: ''
    };
@@ -78,7 +76,6 @@ export const getOpenApiKey = async (authorization?: string) => {
  }

  return {
-    userId,
    userApiKey: '',
    systemKey: process.env.OPENAIKEY as string
  };
--- a/src/types/index.d.ts
+++ b/src/types/index.d.ts
@@ -2,6 +2,7 @@ import type { Mongoose } from 'mongoose';

 declare global {
  var mongodb: Mongoose | string | null;
+  var generatingQA: boolean;
  var QRCode: any;
 }

--- a/src/types/mongoSchema.d.ts
+++ b/src/types/mongoSchema.d.ts
@@ -94,3 +94,28 @@ export interface PaySchema {
  orderId: string;
  status: 'SUCCESS' | 'REFUND' | 'NOTPAY' | 'CLOSED';
 }
+
+export interface DataSchema {
+  _id: string;
+  userId: string;
+  name: string;
+  createTime: string;
+}
+
+export interface DataItemSchema {
+  _id: string;
+  userId: string;
+  dataId: string;
+  times: number;
+  temperature: number;
+  text: string;
+  result: {
+    q: string;
+    a: string;
+  }[];
+  status: 0 | 1 | 2;
+}
+
+export interface DataItemPopulate extends DataItemSchema {
+  userId: UserModelSchema;
+}
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -1,6 +1,6 @@
 {
  "compilerOptions": {
-    "target": "es5",
+    "target": "es2015",
    "lib": ["dom", "dom.iterable", "esnext"],
    "allowJs": true,
    "skipLibCheck": true,