Feat: pptx and xlsx loader (#1118)

* perf: plan tip

* perf: upload size controller

* feat: add image ttl index

* feat: new upload file ux

* remove file

* feat: support read pptx

* feat: support xlsx

* fix: rerank docker flie
This commit is contained in:
Archer
2024-04-01 19:01:26 +08:00
committed by GitHub
parent f9d266a6af
commit 21288d1736
90 changed files with 2707 additions and 1678 deletions

View File

@@ -0,0 +1,86 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
import { addHours } from 'date-fns';
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
let deleteImageAmount = 0;
async function checkInvalidImg(start: Date, end: Date, limit = 50) {
const images = await MongoImage.find(
{
createTime: {
$gte: start,
$lte: end
},
'metadata.relatedId': { $exists: true }
},
'_id teamId metadata'
);
console.log('total images', images.length);
let index = 0;
for await (const image of images) {
try {
// 1. 检测是否有对应的集合
const collection = await MongoDatasetCollection.findOne(
{
teamId: image.teamId,
'metadata.relatedImgId': image.metadata?.relatedId
},
'_id'
);
if (!collection) {
await image.deleteOne();
deleteImageAmount++;
}
index++;
index % 100 === 0 && console.log(index);
} catch (error) {
console.log(error);
}
}
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
}
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await connectToDatabase();
await authCert({ req, authRoot: true });
(async () => {
try {
console.log('执行脏数据清理任务');
const end = addHours(new Date(), -1);
const start = addHours(new Date(), -360 * 24);
await checkFiles(start, end);
await checkInvalidImg(start, end);
await checkInvalidCollection(start, end);
await checkInvalidVector(start, end);
console.log('执行脏数据清理任务完毕');
} catch (error) {
console.log('执行脏数据清理任务出错了');
}
})();
jsonRes(res, {
message: 'success'
});
} catch (error) {
console.log(error);
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -6,9 +6,52 @@ import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
import { addHours } from 'date-fns';
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
let deleteImageAmount = 0;
export async function checkInvalidImg(start: Date, end: Date, limit = 50) {
const images = await MongoImage.find(
{
createTime: {
$gte: start,
$lte: end
},
'metadata.relatedId': { $exists: true }
},
'_id teamId metadata'
);
console.log('total images', images.length);
let index = 0;
for await (const image of images) {
try {
// 1. 检测是否有对应的集合
const collection = await MongoDatasetCollection.findOne(
{
teamId: image.teamId,
'metadata.relatedImgId': image.metadata?.relatedId
},
'_id'
);
if (!collection) {
await image.deleteOne();
deleteImageAmount++;
}
index++;
index % 100 === 0 && console.log(index);
} catch (error) {
console.log(error);
}
}
console.log(`检测完成,共删除 ${deleteImageAmount} 个无效图片`);
}
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
export default async function handler(req: NextApiRequest, res: NextApiResponse) {

View File

@@ -2,13 +2,6 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoUsage } from '@fastgpt/service/support/wallet/usage/schema';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { checkFiles } from '../timerTask/dataset/checkInValidDatasetFiles';
import { addHours } from 'date-fns';
import { checkInvalid as checkInvalidImg } from '../timerTask/dataset/checkInvalidDatasetImage';
import { checkInvalidCollection } from '../timerTask/dataset/checkInvalidMongoCollection';
import { checkInvalidVector } from '../timerTask/dataset/checkInvalidVector';
import { MongoPlugin } from '@fastgpt/service/core/plugin/schema';
import { PluginTypeEnum } from '@fastgpt/global/core/plugin/constants';