mirror of
https://github.com/labring/FastGPT.git
synced 2025-08-03 13:38:00 +00:00
4.7.1-alpha (#1120)
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -2,12 +2,14 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
|
||||
import { addHours } from 'date-fns';
|
||||
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
|
||||
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import {
|
||||
checkInvalidDatasetFiles,
|
||||
checkInvalidDatasetData,
|
||||
checkInvalidVector
|
||||
} from '@/service/common/system/cronTask';
|
||||
|
||||
let deleteImageAmount = 0;
|
||||
async function checkInvalidImg(start: Date, end: Date, limit = 50) {
|
||||
@@ -60,11 +62,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
(async () => {
|
||||
try {
|
||||
console.log('执行脏数据清理任务');
|
||||
const end = addHours(new Date(), -1);
|
||||
// 360天 ~ 2小时前
|
||||
const end = addHours(new Date(), -2);
|
||||
const start = addHours(new Date(), -360 * 24);
|
||||
await checkFiles(start, end);
|
||||
await checkInvalidDatasetFiles(start, end);
|
||||
await checkInvalidImg(start, end);
|
||||
await checkInvalidCollection(start, end);
|
||||
await checkInvalidDatasetData(start, end);
|
||||
await checkInvalidVector(start, end);
|
||||
console.log('执行脏数据清理任务完毕');
|
||||
} catch (error) {
|
||||
|
@@ -1,98 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
|
||||
import { connectionMongo } from '@fastgpt/service/common/mongo';
|
||||
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
|
||||
import { addHours } from 'date-fns';
|
||||
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
|
||||
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
|
||||
let deleteImageAmount = 0;
|
||||
export async function checkInvalidImg(start: Date, end: Date, limit = 50) {
|
||||
const images = await MongoImage.find(
|
||||
{
|
||||
createTime: {
|
||||
$gte: start,
|
||||
$lte: end
|
||||
},
|
||||
'metadata.relatedId': { $exists: true }
|
||||
},
|
||||
'_id teamId metadata'
|
||||
);
|
||||
console.log('total images', images.length);
|
||||
let index = 0;
|
||||
|
||||
for await (const image of images) {
|
||||
try {
|
||||
// 1. 检测是否有对应的集合
|
||||
const collection = await MongoDatasetCollection.findOne(
|
||||
{
|
||||
teamId: image.teamId,
|
||||
'metadata.relatedImgId': image.metadata?.relatedId
|
||||
},
|
||||
'_id'
|
||||
);
|
||||
|
||||
if (!collection) {
|
||||
await image.deleteOne();
|
||||
deleteImageAmount++;
|
||||
}
|
||||
|
||||
index++;
|
||||
|
||||
index % 100 === 0 && console.log(index);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
|
||||
}
|
||||
|
||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
await authCert({ req, authRoot: true });
|
||||
|
||||
// 检查 usage 是否有记录
|
||||
const totalUsages = await MongoUsage.countDocuments();
|
||||
if (totalUsages === 0) {
|
||||
// 重命名 bills 集合成 usages
|
||||
await connectionMongo.connection.db.renameCollection('bills', 'usages', {
|
||||
// 强制
|
||||
dropTarget: true
|
||||
});
|
||||
}
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
console.log('执行脏数据清理任务');
|
||||
const end = addHours(new Date(), -1);
|
||||
const start = addHours(new Date(), -360 * 24);
|
||||
await checkFiles(start, end);
|
||||
await checkInvalidImg(start, end);
|
||||
await checkInvalidCollection(start, end);
|
||||
await checkInvalidVector(start, end);
|
||||
console.log('执行脏数据清理任务完毕');
|
||||
} catch (error) {
|
||||
console.log('执行脏数据清理任务出错了');
|
||||
}
|
||||
})();
|
||||
|
||||
jsonRes(res, {
|
||||
message: 'success'
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
@@ -4,7 +4,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
@@ -19,7 +19,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
|
||||
const { teamId } = await authFile({ req, authToken: true, fileId });
|
||||
|
||||
const { rawText } = await readFileContent({
|
||||
const { rawText } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
|
@@ -90,6 +90,7 @@ export async function initSystemConfig() {
|
||||
// get config from database
|
||||
const config: FastGPTConfigFileType = {
|
||||
feConfigs: {
|
||||
...fileRes?.feConfigs,
|
||||
...defaultFeConfigs,
|
||||
...(dbConfig.feConfigs || {}),
|
||||
isPlus: !!FastGPTProUrl
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
@@ -36,7 +36,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
});
|
||||
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContent({
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
|
@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import {
|
||||
delFileByFileIdList,
|
||||
readFileContent
|
||||
readFileContentFromMongo
|
||||
} from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
@@ -47,7 +47,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
});
|
||||
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContent({
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
|
@@ -4,7 +4,7 @@ import { connectToDatabase } from '@/service/mongo';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { PostPreviewFilesChunksProps } from '@/global/core/dataset/api';
|
||||
import { readFileContent } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
|
||||
@@ -28,7 +28,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
|
||||
const fileId = String(file._id);
|
||||
|
||||
const { rawText } = await readFileContent({
|
||||
const { rawText } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
@@ -53,7 +53,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
if (type === ImportDataSourceEnum.csvTable) {
|
||||
const { file, teamId } = await authFile({ req, authToken: true, fileId: sourceId });
|
||||
const fileId = String(file._id);
|
||||
const { rawText } = await readFileContent({
|
||||
const { rawText } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId,
|
||||
|
69
projects/app/src/pages/api/lafApi/[...path].ts
Normal file
69
projects/app/src/pages/api/lafApi/[...path].ts
Normal file
@@ -0,0 +1,69 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { request } from 'https';
|
||||
import { FastGPTProUrl } from '@fastgpt/service/common/system/constants';
|
||||
import url from 'url';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { path = [], ...query } = req.query as any;
|
||||
|
||||
const queryStr = new URLSearchParams(query).toString();
|
||||
const requestPath = queryStr
|
||||
? `/${path?.join('/')}?${new URLSearchParams(query).toString()}`
|
||||
: `/${path?.join('/')}`;
|
||||
|
||||
if (!requestPath) {
|
||||
throw new Error('url is empty');
|
||||
}
|
||||
|
||||
const lafEnv = global.feConfigs?.lafEnv;
|
||||
|
||||
if (!lafEnv) {
|
||||
throw new Error('lafEnv is empty');
|
||||
}
|
||||
|
||||
const parsedUrl = url.parse(lafEnv);
|
||||
delete req.headers?.cookie;
|
||||
delete req.headers?.host;
|
||||
delete req.headers?.origin;
|
||||
|
||||
const requestResult = request({
|
||||
protocol: parsedUrl.protocol,
|
||||
hostname: parsedUrl.hostname,
|
||||
port: parsedUrl.port,
|
||||
path: requestPath,
|
||||
method: req.method,
|
||||
headers: req.headers,
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
req.pipe(requestResult);
|
||||
|
||||
requestResult.on('response', (response) => {
|
||||
Object.keys(response.headers).forEach((key) => {
|
||||
// @ts-ignore
|
||||
res.setHeader(key, response.headers[key]);
|
||||
});
|
||||
response.statusCode && res.writeHead(response.statusCode);
|
||||
response.pipe(res);
|
||||
});
|
||||
requestResult.on('error', (e) => {
|
||||
res.send(e);
|
||||
res.end();
|
||||
});
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: false
|
||||
}
|
||||
};
|
@@ -11,7 +11,7 @@ import { MongoTeamMember } from '@fastgpt/service/support/user/team/teamMemberSc
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { avatar, timezone, openaiAccount } = req.body as UserUpdateParams;
|
||||
const { avatar, timezone, openaiAccount, lafAccount } = req.body as UserUpdateParams;
|
||||
|
||||
const { tmbId } = await authCert({ req, authToken: true });
|
||||
const tmb = await MongoTeamMember.findById(tmbId);
|
||||
@@ -47,7 +47,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
{
|
||||
...(avatar && { avatar }),
|
||||
...(timezone && { timezone }),
|
||||
openaiAccount: openaiAccount?.key ? openaiAccount : null
|
||||
openaiAccount: openaiAccount?.key ? openaiAccount : null,
|
||||
lafAccount: lafAccount?.token ? lafAccount : null
|
||||
}
|
||||
);
|
||||
|
||||
|
@@ -1,91 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import {
|
||||
delFileByFileIdList,
|
||||
getGFSCollection
|
||||
} from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { addHours } from 'date-fns';
|
||||
|
||||
/*
|
||||
check dataset.files data. If there is no match in dataset.collections, delete it
|
||||
可能异常情况
|
||||
1. 上传了文件,未成功创建集合
|
||||
*/
|
||||
let deleteFileAmount = 0;
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
const { startHour = 24, endHour = 1 } = req.body as {
|
||||
startHour?: number;
|
||||
endHour?: number;
|
||||
limit?: number;
|
||||
};
|
||||
await authCert({ req, authRoot: true });
|
||||
await connectToDatabase();
|
||||
|
||||
// start: now - maxDay, end: now - 3 day
|
||||
const start = addHours(new Date(), -startHour);
|
||||
const end = addHours(new Date(), -endHour);
|
||||
deleteFileAmount = 0;
|
||||
console.log(start, end);
|
||||
|
||||
await checkFiles(start, end);
|
||||
|
||||
jsonRes(res, {
|
||||
data: deleteFileAmount,
|
||||
message: 'success'
|
||||
});
|
||||
} catch (error) {
|
||||
addLog.error(`check valid dataset files error`, error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function checkFiles(start: Date, end: Date) {
|
||||
const collection = getGFSCollection('dataset');
|
||||
const where = {
|
||||
uploadDate: { $gte: start, $lte: end }
|
||||
};
|
||||
|
||||
// 1. get all file _id
|
||||
const files = await collection
|
||||
.find(where, {
|
||||
projection: {
|
||||
metadata: 1,
|
||||
_id: 1
|
||||
}
|
||||
})
|
||||
.toArray();
|
||||
console.log('total files', files.length);
|
||||
|
||||
let index = 0;
|
||||
for await (const file of files) {
|
||||
try {
|
||||
// 2. find fileId in dataset.collections
|
||||
const hasCollection = await MongoDatasetCollection.countDocuments({
|
||||
teamId: file.metadata.teamId,
|
||||
fileId: file._id
|
||||
});
|
||||
|
||||
// 3. if not found, delete file
|
||||
if (hasCollection === 0) {
|
||||
await delFileByFileIdList({ bucketName: 'dataset', fileIdList: [String(file._id)] });
|
||||
console.log('delete file', file._id);
|
||||
deleteFileAmount++;
|
||||
}
|
||||
index++;
|
||||
index % 100 === 0 && console.log(index);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
}
|
||||
console.log(`检测完成,共删除 ${deleteFileAmount} 个无效文件`);
|
||||
}
|
@@ -1,96 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { addHours } from 'date-fns';
|
||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
|
||||
/*
|
||||
检测无效的 Mongo 数据
|
||||
异常情况:
|
||||
1. 训练过程删除知识库,可能导致还会有新的数据插入,导致无效。
|
||||
*/
|
||||
|
||||
let deleteAmount = 0;
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
const { startHour = 3, endHour = 1 } = req.body as { startHour?: number; endHour?: number };
|
||||
await authCert({ req, authRoot: true });
|
||||
await connectToDatabase();
|
||||
|
||||
// start: now - maxDay, end: now - endHour
|
||||
const start = addHours(new Date(), -startHour);
|
||||
const end = addHours(new Date(), -endHour);
|
||||
deleteAmount = 0;
|
||||
|
||||
await checkInvalidCollection(start, end);
|
||||
|
||||
jsonRes(res, {
|
||||
data: deleteAmount,
|
||||
message: 'success'
|
||||
});
|
||||
} catch (error) {
|
||||
addLog.error(`check Invalid user error`, error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function checkInvalidCollection(start: Date, end: Date) {
|
||||
// 1. 获取时间范围的所有data
|
||||
const rows = await MongoDatasetData.find(
|
||||
{
|
||||
updateTime: {
|
||||
$gte: start,
|
||||
$lte: end
|
||||
}
|
||||
},
|
||||
'_id teamId collectionId'
|
||||
).lean();
|
||||
|
||||
// 2. 合并所有的collectionId
|
||||
const map = new Map<string, { teamId: string; collectionId: string }>();
|
||||
for (const item of rows) {
|
||||
const collectionId = String(item.collectionId);
|
||||
if (!map.has(collectionId)) {
|
||||
map.set(collectionId, { teamId: item.teamId, collectionId });
|
||||
}
|
||||
}
|
||||
const list = Array.from(map.values());
|
||||
console.log('total collections', list.length);
|
||||
let index = 0;
|
||||
|
||||
for await (const item of list) {
|
||||
try {
|
||||
// 3. 查看该collection是否存在,不存在,则删除对应的数据
|
||||
const collection = await MongoDatasetCollection.findOne({ _id: item.collectionId });
|
||||
if (!collection) {
|
||||
const result = await Promise.all([
|
||||
MongoDatasetTraining.deleteMany({
|
||||
teamId: item.teamId,
|
||||
collectionId: item.collectionId
|
||||
}),
|
||||
MongoDatasetData.deleteMany({
|
||||
teamId: item.teamId,
|
||||
collectionId: item.collectionId
|
||||
}),
|
||||
deleteDatasetDataVector({
|
||||
teamId: item.teamId,
|
||||
collectionIds: [String(item.collectionId)]
|
||||
})
|
||||
]);
|
||||
console.log(result);
|
||||
console.log('collection is not found', item);
|
||||
continue;
|
||||
}
|
||||
} catch (error) {}
|
||||
console.log(++index);
|
||||
}
|
||||
}
|
@@ -1,86 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
import {
|
||||
deleteDatasetDataVector,
|
||||
getVectorDataByTime
|
||||
} from '@fastgpt/service/common/vectorStore/controller';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { addHours } from 'date-fns';
|
||||
|
||||
/*
|
||||
检测无效的 Vector 数据.
|
||||
异常情况:
|
||||
1. 插入数据时,vector成功,mongo失败
|
||||
2. 更新数据,也会有插入 vector
|
||||
*/
|
||||
|
||||
let deletedVectorAmount = 0;
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
const { startHour = 5, endHour = 1 } = req.body as { startHour?: number; endHour?: number };
|
||||
await authCert({ req, authRoot: true });
|
||||
await connectToDatabase();
|
||||
|
||||
// start: now - maxDay, end: now - endHour
|
||||
const start = addHours(new Date(), -startHour);
|
||||
const end = addHours(new Date(), -endHour);
|
||||
deletedVectorAmount = 0;
|
||||
|
||||
await checkInvalidVector(start, end);
|
||||
|
||||
jsonRes(res, {
|
||||
data: deletedVectorAmount,
|
||||
message: 'success'
|
||||
});
|
||||
} catch (error) {
|
||||
addLog.error(`check Invalid user error`, error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function checkInvalidVector(start: Date, end: Date) {
|
||||
// 1. get all vector data
|
||||
const rows = await getVectorDataByTime(start, end);
|
||||
console.log('total data', rows.length);
|
||||
|
||||
let index = 0;
|
||||
|
||||
for await (const item of rows) {
|
||||
if (!item.teamId || !item.datasetId || !item.id) {
|
||||
console.log('error data', item);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
// 2. find dataset.data
|
||||
const hasData = await MongoDatasetData.countDocuments({
|
||||
teamId: item.teamId,
|
||||
datasetId: item.datasetId,
|
||||
'indexes.dataId': item.id
|
||||
});
|
||||
|
||||
// 3. if not found, delete vector
|
||||
if (hasData === 0) {
|
||||
await deleteDatasetDataVector({
|
||||
teamId: item.teamId,
|
||||
id: item.id
|
||||
});
|
||||
console.log('delete vector data', item.id);
|
||||
deletedVectorAmount++;
|
||||
}
|
||||
|
||||
index++;
|
||||
index % 100 === 0 && console.log(index);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`检测完成,共删除 ${deletedVectorAmount} 个无效 向量 数据`);
|
||||
}
|
Reference in New Issue
Block a user