From 7fb6f62cf606e50ab0a0d00e50ca993f588328b5 Mon Sep 17 00:00:00 2001 From: archer <545436317@qq.com> Date: Mon, 27 Mar 2023 19:19:47 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E6=96=87=E6=9C=AC=E6=8B=86=E5=88=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/pages/api/data/splitData.ts | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/pages/api/data/splitData.ts b/src/pages/api/data/splitData.ts index d6beaae20..f25dd3244 100644 --- a/src/pages/api/data/splitData.ts +++ b/src/pages/api/data/splitData.ts @@ -8,11 +8,10 @@ import { generateAbstract } from '@/service/events/generateAbstract'; /* 拆分数据成QA */ export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { - let { text, dataId } = req.body as { text: string; dataId: string }; + const { text, dataId } = req.body as { text: string; dataId: string }; if (!text || !dataId) { throw new Error('参数错误'); } - text = text.replace(/\n+/g, '\n'); await connectToDatabase(); const { authorization } = req.headers; @@ -24,19 +23,27 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) if (!DataRecord) { throw new Error('找不到数据集'); } + const replaceText = text.replace(/[\r\n\\n]+/g, ' '); + + // 文本拆分成 chunk + let chunks = replaceText.match(/[^!?.。]+[!?.。]/g) || []; const dataItems: any[] = []; + let splitText = ''; - // 每 1000 字符一组 - for (let i = 0; i <= text.length / 1000; i++) { - dataItems.push({ - userId, - dataId, - type: DataRecord.type, - text: text.slice(i * 1000, (i + 1) * 1000), - status: 1 - }); - } + chunks.forEach((chunk) => { + splitText += chunk; + if (splitText.length >= 980) { + dataItems.push({ + userId, + dataId, + type: DataRecord.type, + text: splitText, + status: 1 + }); + splitText = ''; + } + }); // 批量插入数据 await DataItem.insertMany(dataItems); @@ -49,7 +56,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) } jsonRes(res, { - data: '' + data: { chunks, replaceText } }); } catch (err) { jsonRes(res, {