mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
4.6.7-alpha commit (#743)
Co-authored-by: Archer <545436317@qq.com> Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -1,87 +1,2 @@
|
||||
import { MongoDatasetData } from './schema';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { delFileByFileIdList, delFileByMetadata } from '../../../common/file/gridfs/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { MongoDatasetCollection } from '../collection/schema';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { delImgByFileIdList } from '../../../common/file/image/controller';
|
||||
import { deleteDatasetDataVector } from '../../../common/vectorStore/controller';
|
||||
|
||||
/* delete all data by datasetIds */
|
||||
export async function delDatasetRelevantData({ datasetIds }: { datasetIds: string[] }) {
|
||||
datasetIds = datasetIds.map((item) => String(item));
|
||||
|
||||
// delete training data(There could be a training mission)
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
datasetId: { $in: datasetIds }
|
||||
});
|
||||
|
||||
await delay(2000);
|
||||
|
||||
// delete dataset.datas
|
||||
await MongoDatasetData.deleteMany({ datasetId: { $in: datasetIds } });
|
||||
// delete pg data
|
||||
await deleteDatasetDataVector({ datasetIds });
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany({
|
||||
datasetId: { $in: datasetIds }
|
||||
});
|
||||
|
||||
// delete related files
|
||||
await Promise.all(
|
||||
datasetIds.map((id) => delFileByMetadata({ bucketName: BucketNameEnum.dataset, datasetId: id }))
|
||||
);
|
||||
}
|
||||
/**
|
||||
* delete all data by collectionIds
|
||||
*/
|
||||
export async function delCollectionRelevantData({
|
||||
collectionIds,
|
||||
fileIds
|
||||
}: {
|
||||
collectionIds: string[];
|
||||
fileIds: string[];
|
||||
}) {
|
||||
collectionIds = collectionIds.filter(Boolean).map((item) => String(item));
|
||||
const filterFileIds = fileIds.filter(Boolean).map((item) => String(item));
|
||||
|
||||
// delete training data
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
collectionId: { $in: collectionIds }
|
||||
});
|
||||
|
||||
await delay(2000);
|
||||
|
||||
// delete dataset.datas
|
||||
await MongoDatasetData.deleteMany({ collectionId: { $in: collectionIds } });
|
||||
// delete pg data
|
||||
await deleteDatasetDataVector({ collectionIds });
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany({
|
||||
_id: { $in: collectionIds }
|
||||
});
|
||||
|
||||
// delete file and imgs
|
||||
await Promise.all([
|
||||
delImgByFileIdList(filterFileIds),
|
||||
delFileByFileIdList({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileIdList: filterFileIds
|
||||
})
|
||||
]);
|
||||
}
|
||||
/**
|
||||
* delete one data by mongoDataId
|
||||
*/
|
||||
export async function delDatasetDataByDataId({
|
||||
collectionId,
|
||||
mongoDataId
|
||||
}: {
|
||||
collectionId: string;
|
||||
mongoDataId: string;
|
||||
}) {
|
||||
await deleteDatasetDataVector({ collectionId, dataIds: [mongoDataId] });
|
||||
await MongoDatasetData.findByIdAndDelete(mongoDataId);
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@ import { DatasetColCollectionName } from '../collection/schema';
|
||||
import {
|
||||
DatasetDataIndexTypeEnum,
|
||||
DatasetDataIndexTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
export const DatasetDataCollectionName = 'dataset.datas';
|
||||
|
||||
@@ -71,6 +71,7 @@ const DatasetDataSchema = new Schema({
|
||||
],
|
||||
default: []
|
||||
},
|
||||
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
@@ -85,13 +86,18 @@ const DatasetDataSchema = new Schema({
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetDataSchema.index({ teamId: 1 });
|
||||
DatasetDataSchema.index({ datasetId: 1 });
|
||||
DatasetDataSchema.index({ collectionId: 1 });
|
||||
DatasetDataSchema.index({ updateTime: -1 });
|
||||
DatasetDataSchema.index({ collectionId: 1, q: 1, a: 1 });
|
||||
// same data check
|
||||
DatasetDataSchema.index({ teamId: 1, collectionId: 1, q: 1, a: 1 }, { background: true });
|
||||
// list collection and count data; list data
|
||||
DatasetDataSchema.index(
|
||||
{ teamId: 1, datasetId: 1, collectionId: 1, chunkIndex: 1, updateTime: -1 },
|
||||
{ background: true }
|
||||
);
|
||||
// full text index
|
||||
DatasetDataSchema.index({ datasetId: 1, fullTextToken: 'text' });
|
||||
DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' }, { background: true });
|
||||
// Recall vectors after data matching
|
||||
DatasetDataSchema.index({ teamId: 1, datasetId: 1, 'indexes.dataId': 1 }, { background: true });
|
||||
DatasetDataSchema.index({ updateTime: 1 }, { background: true });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
Reference in New Issue
Block a user