perf: delete dataset (#3949)

* fix: collection list count

* fix: collection list count

* ai proxy ui

* perf: delete dataset

* perf: add dataset text index

* update doc
This commit is contained in:
Archer
2025-03-03 12:49:13 +08:00
committed by GitHub
parent efac5312b4
commit e860c56b77
7 changed files with 85 additions and 74 deletions

View File

@@ -25,6 +25,7 @@ import { MongoImage } from '../../../common/file/image/schema';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { addDays } from 'date-fns';
import { MongoDatasetDataText } from '../data/dataTextSchema';
import { delay, retryFn } from '@fastgpt/global/common/system/utils';
export const createCollectionAndInsertData = async ({
dataset,
@@ -234,7 +235,7 @@ export const delCollectionRelatedSource = async ({
relatedImgId?: string;
};
}[];
session: ClientSession;
session?: ClientSession;
}) => {
if (collections.length === 0) return;
@@ -282,47 +283,55 @@ export async function delCollection({
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
const collectionIds = collections.map((item) => String(item._id));
// Delete training data
await MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
await retryFn(async () => {
await Promise.all([
// Delete training data
MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
// Delete dataset_data_texts
MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
// Delete dataset_datas
MongoDatasetData.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
...(delImg
? [
delImgByRelatedId({
teamId,
relateIds: collections
.map((item) => item?.metadata?.relatedImgId || '')
.filter(Boolean)
})
]
: []),
...(delFile
? [
delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
})
]
: []),
// Delete vector data
deleteDatasetDataVector({ teamId, datasetIds, collectionIds })
]);
// delete collections
await MongoDatasetCollection.deleteMany(
{
teamId,
_id: { $in: collectionIds }
},
{ session }
);
});
if (delImg) {
await delImgByRelatedId({
teamId,
relateIds: collections.map((item) => item?.metadata?.relatedImgId || '').filter(Boolean),
session
});
}
if (delFile) {
await delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
});
}
// Delete dataset_datas
await MongoDatasetData.deleteMany(
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
// Delete dataset_data_texts
await MongoDatasetDataText.deleteMany(
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
// delete collections
await MongoDatasetCollection.deleteMany(
{
teamId,
_id: { $in: collectionIds }
},
{ session }
);
// no session delete: delete files, vector data
await deleteDatasetDataVector({ teamId, datasetIds, collectionIds });
}

View File

@@ -8,6 +8,7 @@ import { MongoDatasetData } from './data/schema';
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
import { MongoDatasetDataText } from './data/dataTextSchema';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { retryFn } from '@fastgpt/global/common/system/utils';
/* ============= dataset ========== */
/* find all datasetId by top datasetId */
@@ -78,40 +79,39 @@ export async function delDatasetRelevantData({
const datasetIds = datasets.map((item) => item._id);
// delete training data
await MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds }
});
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds }
},
'_id teamId datasetId fileId metadata',
{ session }
'_id teamId datasetId fileId metadata'
).lean();
// Delete Image and file
await delCollectionRelatedSource({ collections, session });
await retryFn(async () => {
await Promise.all([
// delete training data
MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}),
//Delete dataset_data_texts
MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}),
//delete dataset_datas
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
// Delete Image and file
delCollectionRelatedSource({ collections }),
// Delete vector data
deleteDatasetDataVector({ teamId, datasetIds })
]);
});
// delete collections
await MongoDatasetCollection.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}).session(session);
// No session delete:
// Delete dataset_data_texts
await MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds }
});
// delete dataset_datas
await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } });
// Delete vector data
await deleteDatasetDataVector({ teamId, datasetIds });
}

View File

@@ -40,6 +40,7 @@ try {
default_language: 'none'
}
);
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
} catch (error) {
console.log(error);