perf: delete dataset (#3949)

* fix: collection list count

* fix: collection list count

* ai proxy ui

* perf: delete dataset

* perf: add dataset text index

* update doc
This commit is contained in:
Archer
2025-03-03 12:49:13 +08:00
committed by GitHub
parent efac5312b4
commit e860c56b77
7 changed files with 85 additions and 74 deletions

View File

@@ -50,4 +50,5 @@ curl --location --request POST 'https://{{host}}/api/admin/initv4823' \
2. 暂时移除 md 阅读优化,避免链接分割错误。 2. 暂时移除 md 阅读优化,避免链接分割错误。
3. 离开团队时,未刷新成员列表。 3. 离开团队时,未刷新成员列表。
4. PPTX 编码错误,导致解析失败。 4. PPTX 编码错误,导致解析失败。
5. 删除知识库单条数据时,全文索引未跟随删除。 5. 删除知识库单条数据时,全文索引未跟随删除。
6. 修复 Mongo Dataset text 索引在查询数据时未生效。

View File

@@ -118,7 +118,7 @@ export async function delImgByRelatedId({
}: { }: {
teamId: string; teamId: string;
relateIds: string[]; relateIds: string[];
session: ClientSession; session?: ClientSession;
}) { }) {
if (relateIds.length === 0) return; if (relateIds.length === 0) return;

View File

@@ -25,6 +25,7 @@ import { MongoImage } from '../../../common/file/image/schema';
import { hashStr } from '@fastgpt/global/common/string/tools'; import { hashStr } from '@fastgpt/global/common/string/tools';
import { addDays } from 'date-fns'; import { addDays } from 'date-fns';
import { MongoDatasetDataText } from '../data/dataTextSchema'; import { MongoDatasetDataText } from '../data/dataTextSchema';
import { delay, retryFn } from '@fastgpt/global/common/system/utils';
export const createCollectionAndInsertData = async ({ export const createCollectionAndInsertData = async ({
dataset, dataset,
@@ -234,7 +235,7 @@ export const delCollectionRelatedSource = async ({
relatedImgId?: string; relatedImgId?: string;
}; };
}[]; }[];
session: ClientSession; session?: ClientSession;
}) => { }) => {
if (collections.length === 0) return; if (collections.length === 0) return;
@@ -282,47 +283,55 @@ export async function delCollection({
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId)))); const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
const collectionIds = collections.map((item) => String(item._id)); const collectionIds = collections.map((item) => String(item._id));
// Delete training data await retryFn(async () => {
await MongoDatasetTraining.deleteMany({ await Promise.all([
teamId, // Delete training data
datasetId: { $in: datasetIds }, MongoDatasetTraining.deleteMany({
collectionId: { $in: collectionIds } teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
// Delete dataset_data_texts
MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
// Delete dataset_datas
MongoDatasetData.deleteMany({
teamId,
datasetId: { $in: datasetIds },
collectionId: { $in: collectionIds }
}),
...(delImg
? [
delImgByRelatedId({
teamId,
relateIds: collections
.map((item) => item?.metadata?.relatedImgId || '')
.filter(Boolean)
})
]
: []),
...(delFile
? [
delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
})
]
: []),
// Delete vector data
deleteDatasetDataVector({ teamId, datasetIds, collectionIds })
]);
// delete collections
await MongoDatasetCollection.deleteMany(
{
teamId,
_id: { $in: collectionIds }
},
{ session }
);
}); });
if (delImg) {
await delImgByRelatedId({
teamId,
relateIds: collections.map((item) => item?.metadata?.relatedImgId || '').filter(Boolean),
session
});
}
if (delFile) {
await delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
});
}
// Delete dataset_datas
await MongoDatasetData.deleteMany(
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
// Delete dataset_data_texts
await MongoDatasetDataText.deleteMany(
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
// delete collections
await MongoDatasetCollection.deleteMany(
{
teamId,
_id: { $in: collectionIds }
},
{ session }
);
// no session delete: delete files, vector data
await deleteDatasetDataVector({ teamId, datasetIds, collectionIds });
} }

View File

@@ -8,6 +8,7 @@ import { MongoDatasetData } from './data/schema';
import { deleteDatasetDataVector } from '../../common/vectorStore/controller'; import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
import { MongoDatasetDataText } from './data/dataTextSchema'; import { MongoDatasetDataText } from './data/dataTextSchema';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset'; import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { retryFn } from '@fastgpt/global/common/system/utils';
/* ============= dataset ========== */ /* ============= dataset ========== */
/* find all datasetId by top datasetId */ /* find all datasetId by top datasetId */
@@ -78,40 +79,39 @@ export async function delDatasetRelevantData({
const datasetIds = datasets.map((item) => item._id); const datasetIds = datasets.map((item) => item._id);
// delete training data
await MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds }
});
// Get _id, teamId, fileId, metadata.relatedImgId for all collections // Get _id, teamId, fileId, metadata.relatedImgId for all collections
const collections = await MongoDatasetCollection.find( const collections = await MongoDatasetCollection.find(
{ {
teamId, teamId,
datasetId: { $in: datasetIds } datasetId: { $in: datasetIds }
}, },
'_id teamId datasetId fileId metadata', '_id teamId datasetId fileId metadata'
{ session }
).lean(); ).lean();
// Delete Image and file await retryFn(async () => {
await delCollectionRelatedSource({ collections, session }); await Promise.all([
// delete training data
MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}),
//Delete dataset_data_texts
MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}),
//delete dataset_datas
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
// Delete Image and file
delCollectionRelatedSource({ collections }),
// Delete vector data
deleteDatasetDataVector({ teamId, datasetIds })
]);
});
// delete collections // delete collections
await MongoDatasetCollection.deleteMany({ await MongoDatasetCollection.deleteMany({
teamId, teamId,
datasetId: { $in: datasetIds } datasetId: { $in: datasetIds }
}).session(session); }).session(session);
// No session delete:
// Delete dataset_data_texts
await MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds }
});
// delete dataset_datas
await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } });
// Delete vector data
await deleteDatasetDataVector({ teamId, datasetIds });
} }

View File

@@ -40,6 +40,7 @@ try {
default_language: 'none' default_language: 'none'
} }
); );
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true }); DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
} catch (error) { } catch (error) {
console.log(error); console.log(error);

View File

@@ -404,7 +404,7 @@ const LogDetail = ({ data, onClose }: { data: LogDetailType; onClose: () => void
</GridItem> </GridItem>
)} )}
{detailData?.response_body && ( {detailData?.response_body && (
<GridItem display={'flex'} borderBottomWidth="1px" borderRightWidth="1px" colSpan={2}> <GridItem display={'flex'} colSpan={2}>
<Title>Response Body</Title> <Title>Response Body</Title>
<Container>{detailData?.response_body}</Container> <Container>{detailData?.response_body}</Container>
</GridItem> </GridItem>

View File

@@ -34,17 +34,17 @@ async function handler(req: NextApiRequest) {
}); });
const datasetIds = datasets.map((d) => d._id); const datasetIds = datasets.map((d) => d._id);
// delete collection.tags
await MongoDatasetCollectionTags.deleteMany({
teamId,
datasetId: { $in: datasetIds }
});
// delete all dataset.data and pg data // delete all dataset.data and pg data
await mongoSessionRun(async (session) => { await mongoSessionRun(async (session) => {
// delete dataset data // delete dataset data
await delDatasetRelevantData({ datasets, session }); await delDatasetRelevantData({ datasets, session });
// delete collection.tags
await MongoDatasetCollectionTags.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}).session(session);
// delete dataset // delete dataset
await MongoDataset.deleteMany( await MongoDataset.deleteMany(
{ {