mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 12:20:34 +00:00
perf: delete dataset (#3949)
* fix: collection list count * fix: collection list count * ai proxy ui * perf: delete dataset * perf: add dataset text index * update doc
This commit is contained in:
@@ -51,3 +51,4 @@ curl --location --request POST 'https://{{host}}/api/admin/initv4823' \
|
|||||||
3. 离开团队时,未刷新成员列表。
|
3. 离开团队时,未刷新成员列表。
|
||||||
4. PPTX 编码错误,导致解析失败。
|
4. PPTX 编码错误,导致解析失败。
|
||||||
5. 删除知识库单条数据时,全文索引未跟随删除。
|
5. 删除知识库单条数据时,全文索引未跟随删除。
|
||||||
|
6. 修复 Mongo Dataset text 索引在查询数据时未生效。
|
@@ -118,7 +118,7 @@ export async function delImgByRelatedId({
|
|||||||
}: {
|
}: {
|
||||||
teamId: string;
|
teamId: string;
|
||||||
relateIds: string[];
|
relateIds: string[];
|
||||||
session: ClientSession;
|
session?: ClientSession;
|
||||||
}) {
|
}) {
|
||||||
if (relateIds.length === 0) return;
|
if (relateIds.length === 0) return;
|
||||||
|
|
||||||
|
@@ -25,6 +25,7 @@ import { MongoImage } from '../../../common/file/image/schema';
|
|||||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||||
import { addDays } from 'date-fns';
|
import { addDays } from 'date-fns';
|
||||||
import { MongoDatasetDataText } from '../data/dataTextSchema';
|
import { MongoDatasetDataText } from '../data/dataTextSchema';
|
||||||
|
import { delay, retryFn } from '@fastgpt/global/common/system/utils';
|
||||||
|
|
||||||
export const createCollectionAndInsertData = async ({
|
export const createCollectionAndInsertData = async ({
|
||||||
dataset,
|
dataset,
|
||||||
@@ -234,7 +235,7 @@ export const delCollectionRelatedSource = async ({
|
|||||||
relatedImgId?: string;
|
relatedImgId?: string;
|
||||||
};
|
};
|
||||||
}[];
|
}[];
|
||||||
session: ClientSession;
|
session?: ClientSession;
|
||||||
}) => {
|
}) => {
|
||||||
if (collections.length === 0) return;
|
if (collections.length === 0) return;
|
||||||
|
|
||||||
@@ -282,47 +283,55 @@ export async function delCollection({
|
|||||||
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
|
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
|
||||||
const collectionIds = collections.map((item) => String(item._id));
|
const collectionIds = collections.map((item) => String(item._id));
|
||||||
|
|
||||||
// Delete training data
|
await retryFn(async () => {
|
||||||
await MongoDatasetTraining.deleteMany({
|
await Promise.all([
|
||||||
teamId,
|
// Delete training data
|
||||||
datasetId: { $in: datasetIds },
|
MongoDatasetTraining.deleteMany({
|
||||||
collectionId: { $in: collectionIds }
|
teamId,
|
||||||
|
datasetId: { $in: datasetIds },
|
||||||
|
collectionId: { $in: collectionIds }
|
||||||
|
}),
|
||||||
|
// Delete dataset_data_texts
|
||||||
|
MongoDatasetDataText.deleteMany({
|
||||||
|
teamId,
|
||||||
|
datasetId: { $in: datasetIds },
|
||||||
|
collectionId: { $in: collectionIds }
|
||||||
|
}),
|
||||||
|
// Delete dataset_datas
|
||||||
|
MongoDatasetData.deleteMany({
|
||||||
|
teamId,
|
||||||
|
datasetId: { $in: datasetIds },
|
||||||
|
collectionId: { $in: collectionIds }
|
||||||
|
}),
|
||||||
|
...(delImg
|
||||||
|
? [
|
||||||
|
delImgByRelatedId({
|
||||||
|
teamId,
|
||||||
|
relateIds: collections
|
||||||
|
.map((item) => item?.metadata?.relatedImgId || '')
|
||||||
|
.filter(Boolean)
|
||||||
|
})
|
||||||
|
]
|
||||||
|
: []),
|
||||||
|
...(delFile
|
||||||
|
? [
|
||||||
|
delFileByFileIdList({
|
||||||
|
bucketName: BucketNameEnum.dataset,
|
||||||
|
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
|
||||||
|
})
|
||||||
|
]
|
||||||
|
: []),
|
||||||
|
// Delete vector data
|
||||||
|
deleteDatasetDataVector({ teamId, datasetIds, collectionIds })
|
||||||
|
]);
|
||||||
|
|
||||||
|
// delete collections
|
||||||
|
await MongoDatasetCollection.deleteMany(
|
||||||
|
{
|
||||||
|
teamId,
|
||||||
|
_id: { $in: collectionIds }
|
||||||
|
},
|
||||||
|
{ session }
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (delImg) {
|
|
||||||
await delImgByRelatedId({
|
|
||||||
teamId,
|
|
||||||
relateIds: collections.map((item) => item?.metadata?.relatedImgId || '').filter(Boolean),
|
|
||||||
session
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if (delFile) {
|
|
||||||
await delFileByFileIdList({
|
|
||||||
bucketName: BucketNameEnum.dataset,
|
|
||||||
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete dataset_datas
|
|
||||||
await MongoDatasetData.deleteMany(
|
|
||||||
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
|
||||||
{ session }
|
|
||||||
);
|
|
||||||
// Delete dataset_data_texts
|
|
||||||
await MongoDatasetDataText.deleteMany(
|
|
||||||
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
|
||||||
{ session }
|
|
||||||
);
|
|
||||||
|
|
||||||
// delete collections
|
|
||||||
await MongoDatasetCollection.deleteMany(
|
|
||||||
{
|
|
||||||
teamId,
|
|
||||||
_id: { $in: collectionIds }
|
|
||||||
},
|
|
||||||
{ session }
|
|
||||||
);
|
|
||||||
|
|
||||||
// no session delete: delete files, vector data
|
|
||||||
await deleteDatasetDataVector({ teamId, datasetIds, collectionIds });
|
|
||||||
}
|
}
|
||||||
|
@@ -8,6 +8,7 @@ import { MongoDatasetData } from './data/schema';
|
|||||||
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
|
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
|
||||||
import { MongoDatasetDataText } from './data/dataTextSchema';
|
import { MongoDatasetDataText } from './data/dataTextSchema';
|
||||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||||
|
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||||
|
|
||||||
/* ============= dataset ========== */
|
/* ============= dataset ========== */
|
||||||
/* find all datasetId by top datasetId */
|
/* find all datasetId by top datasetId */
|
||||||
@@ -78,40 +79,39 @@ export async function delDatasetRelevantData({
|
|||||||
|
|
||||||
const datasetIds = datasets.map((item) => item._id);
|
const datasetIds = datasets.map((item) => item._id);
|
||||||
|
|
||||||
// delete training data
|
|
||||||
await MongoDatasetTraining.deleteMany({
|
|
||||||
teamId,
|
|
||||||
datasetId: { $in: datasetIds }
|
|
||||||
});
|
|
||||||
|
|
||||||
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
|
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
|
||||||
const collections = await MongoDatasetCollection.find(
|
const collections = await MongoDatasetCollection.find(
|
||||||
{
|
{
|
||||||
teamId,
|
teamId,
|
||||||
datasetId: { $in: datasetIds }
|
datasetId: { $in: datasetIds }
|
||||||
},
|
},
|
||||||
'_id teamId datasetId fileId metadata',
|
'_id teamId datasetId fileId metadata'
|
||||||
{ session }
|
|
||||||
).lean();
|
).lean();
|
||||||
|
|
||||||
// Delete Image and file
|
await retryFn(async () => {
|
||||||
await delCollectionRelatedSource({ collections, session });
|
await Promise.all([
|
||||||
|
// delete training data
|
||||||
|
MongoDatasetTraining.deleteMany({
|
||||||
|
teamId,
|
||||||
|
datasetId: { $in: datasetIds }
|
||||||
|
}),
|
||||||
|
//Delete dataset_data_texts
|
||||||
|
MongoDatasetDataText.deleteMany({
|
||||||
|
teamId,
|
||||||
|
datasetId: { $in: datasetIds }
|
||||||
|
}),
|
||||||
|
//delete dataset_datas
|
||||||
|
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
|
||||||
|
// Delete Image and file
|
||||||
|
delCollectionRelatedSource({ collections }),
|
||||||
|
// Delete vector data
|
||||||
|
deleteDatasetDataVector({ teamId, datasetIds })
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
// delete collections
|
// delete collections
|
||||||
await MongoDatasetCollection.deleteMany({
|
await MongoDatasetCollection.deleteMany({
|
||||||
teamId,
|
teamId,
|
||||||
datasetId: { $in: datasetIds }
|
datasetId: { $in: datasetIds }
|
||||||
}).session(session);
|
}).session(session);
|
||||||
|
|
||||||
// No session delete:
|
|
||||||
// Delete dataset_data_texts
|
|
||||||
await MongoDatasetDataText.deleteMany({
|
|
||||||
teamId,
|
|
||||||
datasetId: { $in: datasetIds }
|
|
||||||
});
|
|
||||||
// delete dataset_datas
|
|
||||||
await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } });
|
|
||||||
|
|
||||||
// Delete vector data
|
|
||||||
await deleteDatasetDataVector({ teamId, datasetIds });
|
|
||||||
}
|
}
|
||||||
|
@@ -40,6 +40,7 @@ try {
|
|||||||
default_language: 'none'
|
default_language: 'none'
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
|
||||||
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
|
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
|
@@ -404,7 +404,7 @@ const LogDetail = ({ data, onClose }: { data: LogDetailType; onClose: () => void
|
|||||||
</GridItem>
|
</GridItem>
|
||||||
)}
|
)}
|
||||||
{detailData?.response_body && (
|
{detailData?.response_body && (
|
||||||
<GridItem display={'flex'} borderBottomWidth="1px" borderRightWidth="1px" colSpan={2}>
|
<GridItem display={'flex'} colSpan={2}>
|
||||||
<Title>Response Body</Title>
|
<Title>Response Body</Title>
|
||||||
<Container>{detailData?.response_body}</Container>
|
<Container>{detailData?.response_body}</Container>
|
||||||
</GridItem>
|
</GridItem>
|
||||||
|
@@ -34,17 +34,17 @@ async function handler(req: NextApiRequest) {
|
|||||||
});
|
});
|
||||||
const datasetIds = datasets.map((d) => d._id);
|
const datasetIds = datasets.map((d) => d._id);
|
||||||
|
|
||||||
|
// delete collection.tags
|
||||||
|
await MongoDatasetCollectionTags.deleteMany({
|
||||||
|
teamId,
|
||||||
|
datasetId: { $in: datasetIds }
|
||||||
|
});
|
||||||
|
|
||||||
// delete all dataset.data and pg data
|
// delete all dataset.data and pg data
|
||||||
await mongoSessionRun(async (session) => {
|
await mongoSessionRun(async (session) => {
|
||||||
// delete dataset data
|
// delete dataset data
|
||||||
await delDatasetRelevantData({ datasets, session });
|
await delDatasetRelevantData({ datasets, session });
|
||||||
|
|
||||||
// delete collection.tags
|
|
||||||
await MongoDatasetCollectionTags.deleteMany({
|
|
||||||
teamId,
|
|
||||||
datasetId: { $in: datasetIds }
|
|
||||||
}).session(session);
|
|
||||||
|
|
||||||
// delete dataset
|
// delete dataset
|
||||||
await MongoDataset.deleteMany(
|
await MongoDataset.deleteMany(
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user