mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 12:20:34 +00:00
perf: delete dataset (#3949)
* fix: collection list count * fix: collection list count * ai proxy ui * perf: delete dataset * perf: add dataset text index * update doc
This commit is contained in:
@@ -25,6 +25,7 @@ import { MongoImage } from '../../../common/file/image/schema';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { addDays } from 'date-fns';
|
||||
import { MongoDatasetDataText } from '../data/dataTextSchema';
|
||||
import { delay, retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
export const createCollectionAndInsertData = async ({
|
||||
dataset,
|
||||
@@ -234,7 +235,7 @@ export const delCollectionRelatedSource = async ({
|
||||
relatedImgId?: string;
|
||||
};
|
||||
}[];
|
||||
session: ClientSession;
|
||||
session?: ClientSession;
|
||||
}) => {
|
||||
if (collections.length === 0) return;
|
||||
|
||||
@@ -282,47 +283,55 @@ export async function delCollection({
|
||||
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
|
||||
const collectionIds = collections.map((item) => String(item._id));
|
||||
|
||||
// Delete training data
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIds }
|
||||
await retryFn(async () => {
|
||||
await Promise.all([
|
||||
// Delete training data
|
||||
MongoDatasetTraining.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIds }
|
||||
}),
|
||||
// Delete dataset_data_texts
|
||||
MongoDatasetDataText.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIds }
|
||||
}),
|
||||
// Delete dataset_datas
|
||||
MongoDatasetData.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIds }
|
||||
}),
|
||||
...(delImg
|
||||
? [
|
||||
delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: collections
|
||||
.map((item) => item?.metadata?.relatedImgId || '')
|
||||
.filter(Boolean)
|
||||
})
|
||||
]
|
||||
: []),
|
||||
...(delFile
|
||||
? [
|
||||
delFileByFileIdList({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
|
||||
})
|
||||
]
|
||||
: []),
|
||||
// Delete vector data
|
||||
deleteDatasetDataVector({ teamId, datasetIds, collectionIds })
|
||||
]);
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany(
|
||||
{
|
||||
teamId,
|
||||
_id: { $in: collectionIds }
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
});
|
||||
|
||||
if (delImg) {
|
||||
await delImgByRelatedId({
|
||||
teamId,
|
||||
relateIds: collections.map((item) => item?.metadata?.relatedImgId || '').filter(Boolean),
|
||||
session
|
||||
});
|
||||
}
|
||||
if (delFile) {
|
||||
await delFileByFileIdList({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
|
||||
});
|
||||
}
|
||||
|
||||
// Delete dataset_datas
|
||||
await MongoDatasetData.deleteMany(
|
||||
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
{ session }
|
||||
);
|
||||
// Delete dataset_data_texts
|
||||
await MongoDatasetDataText.deleteMany(
|
||||
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
{ session }
|
||||
);
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany(
|
||||
{
|
||||
teamId,
|
||||
_id: { $in: collectionIds }
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
|
||||
// no session delete: delete files, vector data
|
||||
await deleteDatasetDataVector({ teamId, datasetIds, collectionIds });
|
||||
}
|
||||
|
@@ -8,6 +8,7 @@ import { MongoDatasetData } from './data/schema';
|
||||
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
|
||||
import { MongoDatasetDataText } from './data/dataTextSchema';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
/* ============= dataset ========== */
|
||||
/* find all datasetId by top datasetId */
|
||||
@@ -78,40 +79,39 @@ export async function delDatasetRelevantData({
|
||||
|
||||
const datasetIds = datasets.map((item) => item._id);
|
||||
|
||||
// delete training data
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds }
|
||||
});
|
||||
|
||||
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
|
||||
const collections = await MongoDatasetCollection.find(
|
||||
{
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds }
|
||||
},
|
||||
'_id teamId datasetId fileId metadata',
|
||||
{ session }
|
||||
'_id teamId datasetId fileId metadata'
|
||||
).lean();
|
||||
|
||||
// Delete Image and file
|
||||
await delCollectionRelatedSource({ collections, session });
|
||||
await retryFn(async () => {
|
||||
await Promise.all([
|
||||
// delete training data
|
||||
MongoDatasetTraining.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds }
|
||||
}),
|
||||
//Delete dataset_data_texts
|
||||
MongoDatasetDataText.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds }
|
||||
}),
|
||||
//delete dataset_datas
|
||||
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
|
||||
// Delete Image and file
|
||||
delCollectionRelatedSource({ collections }),
|
||||
// Delete vector data
|
||||
deleteDatasetDataVector({ teamId, datasetIds })
|
||||
]);
|
||||
});
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds }
|
||||
}).session(session);
|
||||
|
||||
// No session delete:
|
||||
// Delete dataset_data_texts
|
||||
await MongoDatasetDataText.deleteMany({
|
||||
teamId,
|
||||
datasetId: { $in: datasetIds }
|
||||
});
|
||||
// delete dataset_datas
|
||||
await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } });
|
||||
|
||||
// Delete vector data
|
||||
await deleteDatasetDataVector({ teamId, datasetIds });
|
||||
}
|
||||
|
@@ -40,6 +40,7 @@ try {
|
||||
default_language: 'none'
|
||||
}
|
||||
);
|
||||
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
|
||||
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
Reference in New Issue
Block a user