mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-15 07:31:19 +00:00
feat: operation index (#5056)
* feat: operation index * fix: delete update vector * perf: Clear invalid data * perf: index * perf: cleare invalid data * index
This commit is contained in:
@@ -21,4 +21,5 @@ weight: 787
|
|||||||
|
|
||||||
1. 对话日志,日期范围选择问题。
|
1. 对话日志,日期范围选择问题。
|
||||||
2. API 调用时,传入的 system 提示词可能会重复。
|
2. API 调用时,传入的 system 提示词可能会重复。
|
||||||
3. AI 对话/工具调用,未选择文件链接时,也会从历史记录读取文件。
|
3. AI 对话/工具调用,未选择文件链接时,也会从历史记录读取文件。
|
||||||
|
4. 手动更新知识库索引时,错误的删除旧索引,导致手动索引无效。
|
@@ -23,6 +23,12 @@ try {
|
|||||||
ImageSchema.index({ type: 1 });
|
ImageSchema.index({ type: 1 });
|
||||||
// delete related img
|
// delete related img
|
||||||
ImageSchema.index({ teamId: 1, 'metadata.relatedId': 1 });
|
ImageSchema.index({ teamId: 1, 'metadata.relatedId': 1 });
|
||||||
|
|
||||||
|
// Cron clear invalid img
|
||||||
|
ImageSchema.index(
|
||||||
|
{ createTime: 1 },
|
||||||
|
{ partialFilterExpression: { 'metadata.relatedId': { $exists: true } } }
|
||||||
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
}
|
}
|
||||||
|
@@ -132,6 +132,12 @@ try {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Clear invalid image
|
||||||
|
DatasetCollectionSchema.index({
|
||||||
|
teamId: 1,
|
||||||
|
'metadata.relatedImgId': 1
|
||||||
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
}
|
}
|
||||||
|
@@ -103,12 +103,14 @@ try {
|
|||||||
});
|
});
|
||||||
// Recall vectors after data matching
|
// Recall vectors after data matching
|
||||||
DatasetDataSchema.index({ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 });
|
DatasetDataSchema.index({ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 });
|
||||||
DatasetDataSchema.index({ updateTime: 1 });
|
|
||||||
// rebuild data
|
// rebuild data
|
||||||
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
|
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
|
||||||
|
|
||||||
// 为查询 initJieba 字段不存在的数据添加索引
|
// 为查询 initJieba 字段不存在的数据添加索引
|
||||||
DatasetDataSchema.index({ initJieba: 1, updateTime: 1 });
|
DatasetDataSchema.index({ initJieba: 1, updateTime: 1 });
|
||||||
|
|
||||||
|
// Cron clear invalid data
|
||||||
|
DatasetDataSchema.index({ updateTime: 1 });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
}
|
}
|
||||||
|
@@ -6,7 +6,7 @@ import {
|
|||||||
TeamMemberCollectionName
|
TeamMemberCollectionName
|
||||||
} from '@fastgpt/global/support/user/team/constant';
|
} from '@fastgpt/global/support/user/team/constant';
|
||||||
|
|
||||||
export const OperationLogCollectionName = 'operationLog';
|
export const OperationLogCollectionName = 'operationLogs';
|
||||||
|
|
||||||
const OperationLogSchema = new Schema({
|
const OperationLogSchema = new Schema({
|
||||||
tmbId: {
|
tmbId: {
|
||||||
@@ -34,6 +34,9 @@ const OperationLogSchema = new Schema({
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
OperationLogSchema.index({ teamId: 1, tmbId: 1, event: 1 });
|
||||||
|
OperationLogSchema.index({ timestamp: 1 }, { expireAfterSeconds: 14 * 24 * 60 * 60 }); // Auto delete after 14 days
|
||||||
|
|
||||||
export const MongoOperationLog = getMongoLogModel<OperationLogSchema>(
|
export const MongoOperationLog = getMongoLogModel<OperationLogSchema>(
|
||||||
OperationLogCollectionName,
|
OperationLogCollectionName,
|
||||||
OperationLogSchema
|
OperationLogSchema
|
||||||
|
@@ -2,16 +2,20 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
|||||||
import { jsonRes } from '@fastgpt/service/common/response';
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||||
import { addHours } from 'date-fns';
|
import { addHours } from 'date-fns';
|
||||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
|
||||||
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
|
||||||
import {
|
import {
|
||||||
checkInvalidDatasetFiles,
|
checkInvalidDatasetFiles,
|
||||||
checkInvalidDatasetData,
|
checkInvalidDatasetData,
|
||||||
checkInvalidVector
|
checkInvalidVector
|
||||||
} from '@/service/common/system/cronTask';
|
} from '@/service/common/system/cronTask';
|
||||||
|
import dayjs from 'dayjs';
|
||||||
|
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||||
|
import { NextAPI } from '@/service/middleware/entry';
|
||||||
|
import { useIPFrequencyLimit } from '@fastgpt/service/common/middle/reqFrequencyLimit';
|
||||||
|
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||||
|
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
|
||||||
|
|
||||||
let deleteImageAmount = 0;
|
let deleteImageAmount = 0;
|
||||||
async function checkInvalidImg(start: Date, end: Date, limit = 50) {
|
async function checkInvalidImg(start: Date, end: Date) {
|
||||||
const images = await MongoImage.find(
|
const images = await MongoImage.find(
|
||||||
{
|
{
|
||||||
createTime: {
|
createTime: {
|
||||||
@@ -52,8 +56,8 @@ async function checkInvalidImg(start: Date, end: Date, limit = 50) {
|
|||||||
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
|
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
deleteImageAmount = 0;
|
||||||
try {
|
try {
|
||||||
await authCert({ req, authRoot: true });
|
await authCert({ req, authRoot: true });
|
||||||
const { start = -2, end = -360 * 24 } = req.body as { start: number; end: number };
|
const { start = -2, end = -360 * 24 } = req.body as { start: number; end: number };
|
||||||
@@ -61,13 +65,37 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
(async () => {
|
(async () => {
|
||||||
try {
|
try {
|
||||||
console.log('执行脏数据清理任务');
|
console.log('执行脏数据清理任务');
|
||||||
// 360天 ~ 2小时前
|
|
||||||
const endTime = addHours(new Date(), start);
|
// Split time range into 6-hour chunks to avoid processing too much data at once
|
||||||
const startTime = addHours(new Date(), end);
|
const totalHours = Math.abs(start - end);
|
||||||
await checkInvalidDatasetFiles(startTime, endTime);
|
const chunkHours = 6;
|
||||||
await checkInvalidImg(startTime, endTime);
|
const chunks = Math.ceil(totalHours / chunkHours);
|
||||||
await checkInvalidDatasetData(startTime, endTime);
|
|
||||||
await checkInvalidVector(startTime, endTime);
|
console.log(
|
||||||
|
`Total time range: ${totalHours} hours, split into ${chunks} chunks of ${chunkHours} hours each`
|
||||||
|
);
|
||||||
|
|
||||||
|
for (let i = 0; i < chunks; i++) {
|
||||||
|
const chunkStart = start - i * chunkHours;
|
||||||
|
const chunkEnd = Math.max(start - (i + 1) * chunkHours, end);
|
||||||
|
|
||||||
|
const chunkEndTime = addHours(new Date(), chunkStart);
|
||||||
|
const chunkStartTime = addHours(new Date(), chunkEnd);
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`Processing chunk ${i + 1}/${chunks}: ${dayjs(chunkStartTime).format(
|
||||||
|
'YYYY-MM-DD HH:mm'
|
||||||
|
)} to ${dayjs(chunkEndTime).format('YYYY-MM-DD HH:mm')}`
|
||||||
|
);
|
||||||
|
|
||||||
|
await retryFn(() => checkInvalidDatasetFiles(chunkStartTime, chunkEndTime));
|
||||||
|
await retryFn(() => checkInvalidImg(chunkStartTime, chunkEndTime));
|
||||||
|
await retryFn(() => checkInvalidDatasetData(chunkStartTime, chunkEndTime));
|
||||||
|
await retryFn(() => checkInvalidVector(chunkStartTime, chunkEndTime));
|
||||||
|
|
||||||
|
console.log(`Chunk ${i + 1}/${chunks} completed`);
|
||||||
|
}
|
||||||
|
|
||||||
console.log('执行脏数据清理任务完毕');
|
console.log('执行脏数据清理任务完毕');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log('执行脏数据清理任务出错了');
|
console.log('执行脏数据清理任务出错了');
|
||||||
@@ -86,3 +114,5 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export default NextAPI(useIPFrequencyLimit({ id: 'admin-api', seconds: 60, limit: 1 }), handler);
|
||||||
|
@@ -145,16 +145,20 @@ export async function checkInvalidDatasetData(start: Date, end: Date) {
|
|||||||
datasetId: item.datasetId,
|
datasetId: item.datasetId,
|
||||||
collectionId: item.collectionId
|
collectionId: item.collectionId
|
||||||
});
|
});
|
||||||
await MongoDatasetDataText.deleteMany({
|
|
||||||
teamId: item.teamId,
|
await Promise.all([
|
||||||
datasetId: item.datasetId,
|
MongoDatasetDataText.deleteMany({
|
||||||
collectionId: item.collectionId
|
teamId: item.teamId,
|
||||||
});
|
datasetId: item.datasetId,
|
||||||
await deleteDatasetDataVector({
|
collectionId: item.collectionId
|
||||||
teamId: item.teamId,
|
}),
|
||||||
datasetIds: [item.datasetId],
|
deleteDatasetDataVector({
|
||||||
collectionIds: [item.collectionId]
|
teamId: item.teamId,
|
||||||
});
|
datasetIds: [item.datasetId],
|
||||||
|
collectionIds: [item.collectionId]
|
||||||
|
})
|
||||||
|
]);
|
||||||
|
|
||||||
await MongoDatasetData.deleteMany({
|
await MongoDatasetData.deleteMany({
|
||||||
teamId: item.teamId,
|
teamId: item.teamId,
|
||||||
datasetId: item.datasetId,
|
datasetId: item.datasetId,
|
||||||
|
@@ -318,6 +318,11 @@ export async function updateData2Dataset({
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const deleteVectorIdList = patchResult
|
||||||
|
.filter((item) => item.type === 'delete' || item.type === 'update')
|
||||||
|
.map((item) => item.index.dataId)
|
||||||
|
.filter(Boolean) as string[];
|
||||||
|
|
||||||
// 4. Update mongo updateTime(便于脏数据检查器识别)
|
// 4. Update mongo updateTime(便于脏数据检查器识别)
|
||||||
const updateTime = mongoData.updateTime;
|
const updateTime = mongoData.updateTime;
|
||||||
mongoData.updateTime = new Date();
|
mongoData.updateTime = new Date();
|
||||||
@@ -377,14 +382,10 @@ export async function updateData2Dataset({
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Delete vector
|
// Delete vector
|
||||||
const deleteIdList = patchResult
|
if (deleteVectorIdList.length > 0) {
|
||||||
.filter((item) => item.type === 'delete' || item.type === 'update')
|
|
||||||
.map((item) => item.index.dataId)
|
|
||||||
.filter(Boolean) as string[];
|
|
||||||
if (deleteIdList.length > 0) {
|
|
||||||
await deleteDatasetDataVector({
|
await deleteDatasetDataVector({
|
||||||
teamId: mongoData.teamId,
|
teamId: mongoData.teamId,
|
||||||
idList: deleteIdList
|
idList: deleteVectorIdList
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
Reference in New Issue
Block a user