import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@/service/response'; import { connectToDatabase, DataItem, Data } from '@/service/mongo'; import { authToken } from '@/service/utils/tools'; import { generateQA } from '@/service/events/generateQA'; import { generateAbstract } from '@/service/events/generateAbstract'; import { encode } from 'gpt-token-utils'; /* 拆分数据成QA */ export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { const { text, dataId } = req.body as { text: string; dataId: string }; if (!text || !dataId) { throw new Error('参数错误'); } await connectToDatabase(); const { authorization } = req.headers; const userId = await authToken(authorization); const DataRecord = await Data.findById(dataId); if (!DataRecord) { throw new Error('找不到数据集'); } const replaceText = text.replace(/[\r\n\\n]+/g, ' '); // 文本拆分成 chunk let chunks = replaceText.match(/[^!?.。]+[!?.。]/g) || []; const dataItems: any[] = []; let splitText = ''; chunks.forEach((chunk) => { splitText += chunk; const tokens = encode(splitText).length; if (tokens >= 980) { dataItems.push({ userId, dataId, type: DataRecord.type, text: splitText, status: 1 }); splitText = ''; } }); // 批量插入数据 await DataItem.insertMany(dataItems); try { generateQA(); generateAbstract(); } catch (error) { error; } jsonRes(res, { data: { chunks, replaceText } }); } catch (err) { jsonRes(res, { code: 500, error: err }); } }