4.6.7-alpha commit (#743)

Co-authored-by: Archer <545436317@qq.com>
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-01-19 11:17:28 +08:00
committed by GitHub
parent 8ee7407c4c
commit c031e6dcc9
324 changed files with 8509 additions and 4757 deletions

View File

@@ -1,87 +1,2 @@
import { MongoDatasetData } from './schema';
import { MongoDatasetTraining } from '../training/schema';
import { delFileByFileIdList, delFileByMetadata } from '../../../common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { MongoDatasetCollection } from '../collection/schema';
import { delay } from '@fastgpt/global/common/system/utils';
import { delImgByFileIdList } from '../../../common/file/image/controller';
import { deleteDatasetDataVector } from '../../../common/vectorStore/controller';
/* delete all data by datasetIds */
export async function delDatasetRelevantData({ datasetIds }: { datasetIds: string[] }) {
datasetIds = datasetIds.map((item) => String(item));
// delete training data(There could be a training mission)
await MongoDatasetTraining.deleteMany({
datasetId: { $in: datasetIds }
});
await delay(2000);
// delete dataset.datas
await MongoDatasetData.deleteMany({ datasetId: { $in: datasetIds } });
// delete pg data
await deleteDatasetDataVector({ datasetIds });
// delete collections
await MongoDatasetCollection.deleteMany({
datasetId: { $in: datasetIds }
});
// delete related files
await Promise.all(
datasetIds.map((id) => delFileByMetadata({ bucketName: BucketNameEnum.dataset, datasetId: id }))
);
}
/**
* delete all data by collectionIds
*/
export async function delCollectionRelevantData({
collectionIds,
fileIds
}: {
collectionIds: string[];
fileIds: string[];
}) {
collectionIds = collectionIds.filter(Boolean).map((item) => String(item));
const filterFileIds = fileIds.filter(Boolean).map((item) => String(item));
// delete training data
await MongoDatasetTraining.deleteMany({
collectionId: { $in: collectionIds }
});
await delay(2000);
// delete dataset.datas
await MongoDatasetData.deleteMany({ collectionId: { $in: collectionIds } });
// delete pg data
await deleteDatasetDataVector({ collectionIds });
// delete collections
await MongoDatasetCollection.deleteMany({
_id: { $in: collectionIds }
});
// delete file and imgs
await Promise.all([
delImgByFileIdList(filterFileIds),
delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList: filterFileIds
})
]);
}
/**
* delete one data by mongoDataId
*/
export async function delDatasetDataByDataId({
collectionId,
mongoDataId
}: {
collectionId: string;
mongoDataId: string;
}) {
await deleteDatasetDataVector({ collectionId, dataIds: [mongoDataId] });
await MongoDatasetData.findByIdAndDelete(mongoDataId);
}

View File

@@ -10,7 +10,7 @@ import { DatasetColCollectionName } from '../collection/schema';
import {
DatasetDataIndexTypeEnum,
DatasetDataIndexTypeMap
} from '@fastgpt/global/core/dataset/constant';
} from '@fastgpt/global/core/dataset/constants';
export const DatasetDataCollectionName = 'dataset.datas';
@@ -71,6 +71,7 @@ const DatasetDataSchema = new Schema({
],
default: []
},
updateTime: {
type: Date,
default: () => new Date()
@@ -85,13 +86,18 @@ const DatasetDataSchema = new Schema({
});
try {
DatasetDataSchema.index({ teamId: 1 });
DatasetDataSchema.index({ datasetId: 1 });
DatasetDataSchema.index({ collectionId: 1 });
DatasetDataSchema.index({ updateTime: -1 });
DatasetDataSchema.index({ collectionId: 1, q: 1, a: 1 });
// same data check
DatasetDataSchema.index({ teamId: 1, collectionId: 1, q: 1, a: 1 }, { background: true });
// list collection and count data; list data
DatasetDataSchema.index(
{ teamId: 1, datasetId: 1, collectionId: 1, chunkIndex: 1, updateTime: -1 },
{ background: true }
);
// full text index
DatasetDataSchema.index({ datasetId: 1, fullTextToken: 'text' });
DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' }, { background: true });
// Recall vectors after data matching
DatasetDataSchema.index({ teamId: 1, datasetId: 1, 'indexes.dataId': 1 }, { background: true });
DatasetDataSchema.index({ updateTime: 1 }, { background: true });
} catch (error) {
console.log(error);
}