From 7f26b31f53486532fdd21a322c4f36d191096727 Mon Sep 17 00:00:00 2001 From: archer <545436317@qq.com> Date: Mon, 10 Apr 2023 20:58:23 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20csv=E5=AF=BC=E5=85=A5=E5=8E=BB=E9=87=8D?= =?UTF-8?q?;=E6=96=87=E6=A1=A3=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- public/docs/csvSelect.md | 3 +- public/docs/versionIntro.md | 6 ++-- src/api/model.ts | 2 +- src/pages/api/model/data/pushModelDataCsv.ts | 36 ++++++++++++++++--- .../detail/components/SelectCsvModal.tsx | 11 +++--- src/pages/openapi/index.tsx | 5 +-- 6 files changed, 46 insertions(+), 17 deletions(-) diff --git a/public/docs/csvSelect.md b/public/docs/csvSelect.md index c82e5160e..257b98137 100644 --- a/public/docs/csvSelect.md +++ b/public/docs/csvSelect.md @@ -1,4 +1,5 @@ -接受一个csv文件,表格头包含 question 和 answer。question 代表问题,answer 代表答案。 +接受一个csv文件,表格头包含 question 和 answer。question 代表问题,answer 代表答案。 +导入前会进行去重,如果问题和答案完全相同,则不会被导入,所以最终导入的内容可能会比文件的内容少。 | question | answer | | --- | --- | | 什么是 laf | laf 是一个云函数开发平台…… | diff --git a/public/docs/versionIntro.md b/public/docs/versionIntro.md index 2748fba08..50ea8b612 100644 --- a/public/docs/versionIntro.md +++ b/public/docs/versionIntro.md @@ -1,3 +1,3 @@ -## Fast GPT V2.5 -* 内容压缩,替换中文标点符号和多余符号,减少一些上下文tokens。 -* 优化 QA 拆分记账。 \ No newline at end of file +## Fast GPT V2.7 +* FastGpt Api 允许你将 Fast Gpt 的部分功能通过 api 的形式,将知识库接入到自己的应用中,例如:飞书、企业微信、客服助手. +* 通过 csv 文件导入和导出你的问答对。你可以将你的 csv 文件放置在飞书文档上,以便团队共享。 \ No newline at end of file diff --git a/src/api/model.ts b/src/api/model.ts index 286553f4d..a98c5ac00 100644 --- a/src/api/model.ts +++ b/src/api/model.ts @@ -91,7 +91,7 @@ export const postModelDataSplitData = (data: { modelId: string; text: string; pr * json导入数据 */ export const postModelDataCsvData = (modelId: string, data: string[][]) => - POST(`/model/data/pushModelDataCsv`, { modelId, data: data }); + POST(`/model/data/pushModelDataCsv`, { modelId, data: data }); /** * 更新模型数据 diff --git a/src/pages/api/model/data/pushModelDataCsv.ts b/src/pages/api/model/data/pushModelDataCsv.ts index d060941bc..425c3ba99 100644 --- a/src/pages/api/model/data/pushModelDataCsv.ts +++ b/src/pages/api/model/data/pushModelDataCsv.ts @@ -3,9 +3,9 @@ import { jsonRes } from '@/service/response'; import { connectToDatabase, Model } from '@/service/mongo'; import { authToken } from '@/service/utils/tools'; import { generateVector } from '@/service/events/generateVector'; -import { vectorToBuffer, formatVector } from '@/utils/tools'; import { connectRedis } from '@/service/redis'; import { VecModelDataPrefix, ModelDataStatusEnum } from '@/constants/redis'; +import { VecModelDataIdx } from '@/constants/redis'; import { customAlphabet } from 'nanoid'; const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12); @@ -41,9 +41,35 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< throw new Error('无权操作该模型'); } + // 去重 + const searchRes = await Promise.allSettled( + data.map(async ([q, a]) => { + try { + q = q.replace(/\\n/g, '\n'); + a = a.replace(/\\n/g, '\n'); + const redisSearch = await redis.ft.search(VecModelDataIdx, `@q:${q} @text:${a}`, { + RETURN: ['q', 'text'] + }); + if (redisSearch.total > 0) { + return Promise.reject('已经存在'); + } + } catch (error) { + error; + } + return Promise.resolve({ + q, + a + }); + }) + ); + + const filterData = searchRes + .filter((item) => item.status === 'fulfilled') + .map<{ q: string; a: string }>((item: any) => item.value); + // 插入 redis const insertRedisRes = await Promise.allSettled( - data.map((item) => { + filterData.map((item) => { return redis.sendCommand([ 'HMSET', `${VecModelDataPrefix}:${nanoid()}`, @@ -52,9 +78,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< 'modelId', String(modelId), 'q', - item[0], + item.q, 'text', - item[1], + item.a, 'status', ModelDataStatusEnum.waiting ]); @@ -64,7 +90,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< generateVector(); jsonRes(res, { - data: insertRedisRes.filter((item) => item.status === 'rejected').length + data: insertRedisRes.filter((item) => item.status === 'fulfilled').length }); } catch (err) { jsonRes(res, { diff --git a/src/pages/model/detail/components/SelectCsvModal.tsx b/src/pages/model/detail/components/SelectCsvModal.tsx index da01d03e1..041b14120 100644 --- a/src/pages/model/detail/components/SelectCsvModal.tsx +++ b/src/pages/model/detail/components/SelectCsvModal.tsx @@ -64,10 +64,11 @@ const SelectJsonModal = ({ const { mutate, isLoading } = useMutation({ mutationFn: async () => { if (!fileData) return; - await postModelDataCsvData(modelId, fileData); + const res = await postModelDataCsvData(modelId, fileData); toast({ - title: '导入数据成功,需要一段时间训练', - status: 'success' + title: `导入数据成功,最终导入: ${res || 0} 条数据。需要一段时间训练`, + status: 'success', + duration: 4000 }); onClose(); onSuccess(); @@ -90,7 +91,7 @@ const SelectJsonModal = ({ - + 一共 {fileData.length} 组数据 - + {fileData.map((item, index) => ( diff --git a/src/pages/openapi/index.tsx b/src/pages/openapi/index.tsx index 603c36aa4..ab17b2fa0 100644 --- a/src/pages/openapi/index.tsx +++ b/src/pages/openapi/index.tsx @@ -55,10 +55,11 @@ const OpenApi = () => { <> - Open Api + FastGpt Api - Open Api 允许你将 Fast Gpt 的部分功能通过 api 的形式接入到自己的应用中。请注意保管你的 Api + FastGpt Api 允许你将 Fast Gpt 的部分功能通过 api + 的形式接入到自己的应用中,例如:飞书、企业微信、客服助手。请注意保管你的 Api Key,不要泄露!