From e860c56b7783cfaa8945e378171faafa202d2217 Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Mon, 3 Mar 2025 12:49:13 +0800 Subject: [PATCH] perf: delete dataset (#3949) * fix: collection list count * fix: collection list count * ai proxy ui * perf: delete dataset * perf: add dataset text index * update doc --- .../zh-cn/docs/development/upgrading/4823.md | 3 +- .../service/common/file/image/controller.ts | 2 +- .../core/dataset/collection/controller.ts | 95 ++++++++++--------- packages/service/core/dataset/controller.ts | 44 ++++----- .../core/dataset/data/dataTextSchema.ts | 1 + .../account/model/Log/index.tsx | 2 +- .../app/src/pages/api/core/dataset/delete.ts | 12 +-- 7 files changed, 85 insertions(+), 74 deletions(-) diff --git a/docSite/content/zh-cn/docs/development/upgrading/4823.md b/docSite/content/zh-cn/docs/development/upgrading/4823.md index 14c72f345..446245b1e 100644 --- a/docSite/content/zh-cn/docs/development/upgrading/4823.md +++ b/docSite/content/zh-cn/docs/development/upgrading/4823.md @@ -50,4 +50,5 @@ curl --location --request POST 'https://{{host}}/api/admin/initv4823' \ 2. 暂时移除 md 阅读优化,避免链接分割错误。 3. 离开团队时,未刷新成员列表。 4. PPTX 编码错误,导致解析失败。 -5. 删除知识库单条数据时,全文索引未跟随删除。 \ No newline at end of file +5. 删除知识库单条数据时,全文索引未跟随删除。 +6. 修复 Mongo Dataset text 索引在查询数据时未生效。 \ No newline at end of file diff --git a/packages/service/common/file/image/controller.ts b/packages/service/common/file/image/controller.ts index f368e45e0..c2772bcde 100644 --- a/packages/service/common/file/image/controller.ts +++ b/packages/service/common/file/image/controller.ts @@ -118,7 +118,7 @@ export async function delImgByRelatedId({ }: { teamId: string; relateIds: string[]; - session: ClientSession; + session?: ClientSession; }) { if (relateIds.length === 0) return; diff --git a/packages/service/core/dataset/collection/controller.ts b/packages/service/core/dataset/collection/controller.ts index 02686ab21..159e62354 100644 --- a/packages/service/core/dataset/collection/controller.ts +++ b/packages/service/core/dataset/collection/controller.ts @@ -25,6 +25,7 @@ import { MongoImage } from '../../../common/file/image/schema'; import { hashStr } from '@fastgpt/global/common/string/tools'; import { addDays } from 'date-fns'; import { MongoDatasetDataText } from '../data/dataTextSchema'; +import { delay, retryFn } from '@fastgpt/global/common/system/utils'; export const createCollectionAndInsertData = async ({ dataset, @@ -234,7 +235,7 @@ export const delCollectionRelatedSource = async ({ relatedImgId?: string; }; }[]; - session: ClientSession; + session?: ClientSession; }) => { if (collections.length === 0) return; @@ -282,47 +283,55 @@ export async function delCollection({ const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId)))); const collectionIds = collections.map((item) => String(item._id)); - // Delete training data - await MongoDatasetTraining.deleteMany({ - teamId, - datasetId: { $in: datasetIds }, - collectionId: { $in: collectionIds } + await retryFn(async () => { + await Promise.all([ + // Delete training data + MongoDatasetTraining.deleteMany({ + teamId, + datasetId: { $in: datasetIds }, + collectionId: { $in: collectionIds } + }), + // Delete dataset_data_texts + MongoDatasetDataText.deleteMany({ + teamId, + datasetId: { $in: datasetIds }, + collectionId: { $in: collectionIds } + }), + // Delete dataset_datas + MongoDatasetData.deleteMany({ + teamId, + datasetId: { $in: datasetIds }, + collectionId: { $in: collectionIds } + }), + ...(delImg + ? [ + delImgByRelatedId({ + teamId, + relateIds: collections + .map((item) => item?.metadata?.relatedImgId || '') + .filter(Boolean) + }) + ] + : []), + ...(delFile + ? [ + delFileByFileIdList({ + bucketName: BucketNameEnum.dataset, + fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean) + }) + ] + : []), + // Delete vector data + deleteDatasetDataVector({ teamId, datasetIds, collectionIds }) + ]); + + // delete collections + await MongoDatasetCollection.deleteMany( + { + teamId, + _id: { $in: collectionIds } + }, + { session } + ); }); - - if (delImg) { - await delImgByRelatedId({ - teamId, - relateIds: collections.map((item) => item?.metadata?.relatedImgId || '').filter(Boolean), - session - }); - } - if (delFile) { - await delFileByFileIdList({ - bucketName: BucketNameEnum.dataset, - fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean) - }); - } - - // Delete dataset_datas - await MongoDatasetData.deleteMany( - { teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } }, - { session } - ); - // Delete dataset_data_texts - await MongoDatasetDataText.deleteMany( - { teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } }, - { session } - ); - - // delete collections - await MongoDatasetCollection.deleteMany( - { - teamId, - _id: { $in: collectionIds } - }, - { session } - ); - - // no session delete: delete files, vector data - await deleteDatasetDataVector({ teamId, datasetIds, collectionIds }); } diff --git a/packages/service/core/dataset/controller.ts b/packages/service/core/dataset/controller.ts index 87f8f78b5..06be050a9 100644 --- a/packages/service/core/dataset/controller.ts +++ b/packages/service/core/dataset/controller.ts @@ -8,6 +8,7 @@ import { MongoDatasetData } from './data/schema'; import { deleteDatasetDataVector } from '../../common/vectorStore/controller'; import { MongoDatasetDataText } from './data/dataTextSchema'; import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset'; +import { retryFn } from '@fastgpt/global/common/system/utils'; /* ============= dataset ========== */ /* find all datasetId by top datasetId */ @@ -78,40 +79,39 @@ export async function delDatasetRelevantData({ const datasetIds = datasets.map((item) => item._id); - // delete training data - await MongoDatasetTraining.deleteMany({ - teamId, - datasetId: { $in: datasetIds } - }); - // Get _id, teamId, fileId, metadata.relatedImgId for all collections const collections = await MongoDatasetCollection.find( { teamId, datasetId: { $in: datasetIds } }, - '_id teamId datasetId fileId metadata', - { session } + '_id teamId datasetId fileId metadata' ).lean(); - // Delete Image and file - await delCollectionRelatedSource({ collections, session }); + await retryFn(async () => { + await Promise.all([ + // delete training data + MongoDatasetTraining.deleteMany({ + teamId, + datasetId: { $in: datasetIds } + }), + //Delete dataset_data_texts + MongoDatasetDataText.deleteMany({ + teamId, + datasetId: { $in: datasetIds } + }), + //delete dataset_datas + MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }), + // Delete Image and file + delCollectionRelatedSource({ collections }), + // Delete vector data + deleteDatasetDataVector({ teamId, datasetIds }) + ]); + }); // delete collections await MongoDatasetCollection.deleteMany({ teamId, datasetId: { $in: datasetIds } }).session(session); - - // No session delete: - // Delete dataset_data_texts - await MongoDatasetDataText.deleteMany({ - teamId, - datasetId: { $in: datasetIds } - }); - // delete dataset_datas - await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }); - - // Delete vector data - await deleteDatasetDataVector({ teamId, datasetIds }); } diff --git a/packages/service/core/dataset/data/dataTextSchema.ts b/packages/service/core/dataset/data/dataTextSchema.ts index 8bf1ba5d8..ae85b3ef3 100644 --- a/packages/service/core/dataset/data/dataTextSchema.ts +++ b/packages/service/core/dataset/data/dataTextSchema.ts @@ -40,6 +40,7 @@ try { default_language: 'none' } ); + DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 }); DatasetDataTextSchema.index({ dataId: 1 }, { unique: true }); } catch (error) { console.log(error); diff --git a/projects/app/src/pageComponents/account/model/Log/index.tsx b/projects/app/src/pageComponents/account/model/Log/index.tsx index 6da4e2954..7b65f435b 100644 --- a/projects/app/src/pageComponents/account/model/Log/index.tsx +++ b/projects/app/src/pageComponents/account/model/Log/index.tsx @@ -404,7 +404,7 @@ const LogDetail = ({ data, onClose }: { data: LogDetailType; onClose: () => void )} {detailData?.response_body && ( - + Response Body {detailData?.response_body} diff --git a/projects/app/src/pages/api/core/dataset/delete.ts b/projects/app/src/pages/api/core/dataset/delete.ts index 4e7c47388..9f4edae54 100644 --- a/projects/app/src/pages/api/core/dataset/delete.ts +++ b/projects/app/src/pages/api/core/dataset/delete.ts @@ -34,17 +34,17 @@ async function handler(req: NextApiRequest) { }); const datasetIds = datasets.map((d) => d._id); + // delete collection.tags + await MongoDatasetCollectionTags.deleteMany({ + teamId, + datasetId: { $in: datasetIds } + }); + // delete all dataset.data and pg data await mongoSessionRun(async (session) => { // delete dataset data await delDatasetRelevantData({ datasets, session }); - // delete collection.tags - await MongoDatasetCollectionTags.deleteMany({ - teamId, - datasetId: { $in: datasetIds } - }).session(session); - // delete dataset await MongoDataset.deleteMany( {