mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 20:37:48 +00:00

Co-authored-by: Mufei <327958099@qq.com> Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
97 lines
3.0 KiB
TypeScript
97 lines
3.0 KiB
TypeScript
import type { NextApiRequest, NextApiResponse } from 'next';
|
||
import { jsonRes } from '@fastgpt/service/common/response';
|
||
import { connectToDatabase } from '@/service/mongo';
|
||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||
import { addLog } from '@fastgpt/service/common/system/log';
|
||
import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||
import { addHours } from 'date-fns';
|
||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||
|
||
/*
|
||
检测无效的 Mongo 数据
|
||
异常情况:
|
||
1. 训练过程删除知识库,可能导致还会有新的数据插入,导致无效。
|
||
*/
|
||
|
||
let deleteAmount = 0;
|
||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||
try {
|
||
const { startHour = 3, endHour = 1 } = req.body as { startHour?: number; endHour?: number };
|
||
await authCert({ req, authRoot: true });
|
||
await connectToDatabase();
|
||
|
||
// start: now - maxDay, end: now - endHour
|
||
const start = addHours(new Date(), -startHour);
|
||
const end = addHours(new Date(), -endHour);
|
||
deleteAmount = 0;
|
||
|
||
await checkInvalidCollection(start, end);
|
||
|
||
jsonRes(res, {
|
||
data: deleteAmount,
|
||
message: 'success'
|
||
});
|
||
} catch (error) {
|
||
addLog.error(`check Invalid user error`, error);
|
||
|
||
jsonRes(res, {
|
||
code: 500,
|
||
error
|
||
});
|
||
}
|
||
}
|
||
|
||
export async function checkInvalidCollection(start: Date, end: Date) {
|
||
// 1. 获取时间范围的所有data
|
||
const rows = await MongoDatasetData.find(
|
||
{
|
||
updateTime: {
|
||
$gte: start,
|
||
$lte: end
|
||
}
|
||
},
|
||
'_id teamId collectionId'
|
||
).lean();
|
||
|
||
// 2. 合并所有的collectionId
|
||
const map = new Map<string, { teamId: string; collectionId: string }>();
|
||
for (const item of rows) {
|
||
const collectionId = String(item.collectionId);
|
||
if (!map.has(collectionId)) {
|
||
map.set(collectionId, { teamId: item.teamId, collectionId });
|
||
}
|
||
}
|
||
const list = Array.from(map.values());
|
||
console.log('total collections', list.length);
|
||
let index = 0;
|
||
|
||
for await (const item of list) {
|
||
try {
|
||
// 3. 查看该collection是否存在,不存在,则删除对应的数据
|
||
const collection = await MongoDatasetCollection.findOne({ _id: item.collectionId });
|
||
if (!collection) {
|
||
const result = await Promise.all([
|
||
MongoDatasetTraining.deleteMany({
|
||
teamId: item.teamId,
|
||
collectionId: item.collectionId
|
||
}),
|
||
MongoDatasetData.deleteMany({
|
||
teamId: item.teamId,
|
||
collectionId: item.collectionId
|
||
}),
|
||
deleteDatasetDataVector({
|
||
teamId: item.teamId,
|
||
collectionIds: [String(item.collectionId)]
|
||
})
|
||
]);
|
||
console.log(result);
|
||
console.log('collection is not found', item);
|
||
continue;
|
||
}
|
||
} catch (error) {}
|
||
console.log(++index);
|
||
}
|
||
}
|