mirror of
https://github.com/labring/FastGPT.git
synced 2026-03-10 01:01:23 +08:00
V4.14.4 features (#6036)
* feat: add query optimize and bill (#6021) * add query optimize and bill * perf: query extension * fix: embe model * remove log * remove log * fix: test --------- Co-authored-by: xxyyh <2289112474@qq> Co-authored-by: archer <545436317@qq.com> * feat: notice (#6013) * feat: record user's language * feat: notice points/dataset indexes; support count limit; update docker-compose.yml * fix: ts error * feat: send auth code i18n * chore: dataset notice limit * chore: adjust * fix: ts * fix: countLimit race condition; i18n en-prefix locale fallback to en --------- Co-authored-by: archer <545436317@qq.com> * perf: comment * perf: send inform code * fix: type error (#6029) * feat: add ip region for chat logs (#6010) * feat: add ip region for chat logs * refactor: use Geolite2.mmdb * fix: export chat logs * fix: return location directly * test: add unit test * perf: log show ip data * adjust commercial plans (#6008) * plan frontend * plan limit * coupon * discount coupon * fix * type * fix audit * type * plan name * legacy plan * track * feat: add discount coupon * fix * fix discount coupon * openapi * type * type * env * api type * fix * fix: simple agent plugin input & agent dashboard card (#6034) * refactor: remove gridfs (#6031) * fix: replace gridfs multer operations with s3 compatible ops * wip: s3 features * refactor: remove gridfs * fix * perf: mock test * doc * doc * doc * fix: test * fix: s3 * fix: mock s3 * remove invalid config * fix: init query extension * initv4144 (#6037) * chore: initv4144 * fix * version * fix: new plans (#6039) * fix: new plans * qr modal tip * fix: buffer raw text filename (#6040) * fix: initv4144 (#6041) * fix: pay refresh (#6042) * fix: migration shell * rename collection * clear timerlock * clear timerlock * perf: faq * perf: bill schema * fix: openapi * doc * fix: share var render * feat: delete dataset queue * plan usage display (#6043) * plan usage display * text * fix * fix: ts * perf: remove invalid code * perf: init shell * doc * perf: rename field * perf: avatar presign * init * custom plan text (#6045) * fix plans * fix * fixed * computed --------- Co-authored-by: archer <545436317@qq.com> * init shell * plan text & price page back button (#6046) * init * index * delete dataset * delete dataset * perf: delete dataset * init --------- Co-authored-by: YeYuheng <57035043+YYH211@users.noreply.github.com> Co-authored-by: xxyyh <2289112474@qq> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: Roy <whoeverimf5@gmail.com> Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
@@ -1,180 +0,0 @@
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
import { connectionMongo, Types } from '../../mongo';
|
||||
import { MongoRawTextBufferSchema, bucketName } from './schema';
|
||||
import { addLog } from '../../system/log';
|
||||
import { setCron } from '../../system/cron';
|
||||
import { checkTimerLock } from '../../system/timerLock/utils';
|
||||
import { TimerIdEnum } from '../../system/timerLock/constants';
|
||||
import { gridFsStream2Buffer } from '../../file/gridfs/utils';
|
||||
import { readRawContentFromBuffer } from '../../../worker/function';
|
||||
|
||||
const getGridBucket = () => {
|
||||
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
|
||||
bucketName: bucketName
|
||||
});
|
||||
};
|
||||
|
||||
export const addRawTextBuffer = async ({
|
||||
sourceId,
|
||||
sourceName,
|
||||
text,
|
||||
expiredTime
|
||||
}: {
|
||||
sourceId: string;
|
||||
sourceName: string;
|
||||
text: string;
|
||||
expiredTime: Date;
|
||||
}) => {
|
||||
const gridBucket = getGridBucket();
|
||||
const metadata = {
|
||||
sourceId,
|
||||
sourceName,
|
||||
expiredTime
|
||||
};
|
||||
|
||||
const buffer = Buffer.from(text);
|
||||
|
||||
const fileSize = buffer.length;
|
||||
// 单块大小:尽可能大,但不超过 14MB,不小于128KB
|
||||
const chunkSizeBytes = (() => {
|
||||
// 计算理想块大小:文件大小 ÷ 目标块数(10)。 并且每个块需要小于 14MB
|
||||
const idealChunkSize = Math.min(Math.ceil(fileSize / 10), 14 * 1024 * 1024);
|
||||
|
||||
// 确保块大小至少为128KB
|
||||
const minChunkSize = 128 * 1024; // 128KB
|
||||
|
||||
// 取理想块大小和最小块大小中的较大值
|
||||
let chunkSize = Math.max(idealChunkSize, minChunkSize);
|
||||
|
||||
// 将块大小向上取整到最接近的64KB的倍数,使其更整齐
|
||||
chunkSize = Math.ceil(chunkSize / (64 * 1024)) * (64 * 1024);
|
||||
|
||||
return chunkSize;
|
||||
})();
|
||||
|
||||
const uploadStream = gridBucket.openUploadStream(sourceId, {
|
||||
metadata,
|
||||
chunkSizeBytes
|
||||
});
|
||||
|
||||
return retryFn(async () => {
|
||||
return new Promise((resolve, reject) => {
|
||||
uploadStream.end(buffer);
|
||||
uploadStream.on('finish', () => {
|
||||
resolve(uploadStream.id);
|
||||
});
|
||||
uploadStream.on('error', (error) => {
|
||||
addLog.error('addRawTextBuffer error', error);
|
||||
resolve('');
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const getRawTextBuffer = async (sourceId: string) => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
return retryFn(async () => {
|
||||
const bufferData = await MongoRawTextBufferSchema.findOne(
|
||||
{
|
||||
'metadata.sourceId': sourceId
|
||||
},
|
||||
'_id metadata'
|
||||
).lean();
|
||||
if (!bufferData) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Read file content
|
||||
const downloadStream = gridBucket.openDownloadStream(new Types.ObjectId(bufferData._id));
|
||||
|
||||
const fileBuffers = await gridFsStream2Buffer(downloadStream);
|
||||
|
||||
const rawText = await (async () => {
|
||||
if (fileBuffers.length < 10000000) {
|
||||
return fileBuffers.toString('utf8');
|
||||
} else {
|
||||
return (
|
||||
await readRawContentFromBuffer({
|
||||
extension: 'txt',
|
||||
encoding: 'utf8',
|
||||
buffer: fileBuffers
|
||||
})
|
||||
).rawText;
|
||||
}
|
||||
})();
|
||||
|
||||
return {
|
||||
text: rawText,
|
||||
sourceName: bufferData.metadata?.sourceName || ''
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
export const deleteRawTextBuffer = async (sourceId: string): Promise<boolean> => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
return retryFn(async () => {
|
||||
const buffer = await MongoRawTextBufferSchema.findOne({ 'metadata.sourceId': sourceId });
|
||||
if (!buffer) {
|
||||
return false;
|
||||
}
|
||||
|
||||
await gridBucket.delete(new Types.ObjectId(buffer._id));
|
||||
return true;
|
||||
});
|
||||
};
|
||||
|
||||
export const updateRawTextBufferExpiredTime = async ({
|
||||
sourceId,
|
||||
expiredTime
|
||||
}: {
|
||||
sourceId: string;
|
||||
expiredTime: Date;
|
||||
}) => {
|
||||
return retryFn(async () => {
|
||||
return MongoRawTextBufferSchema.updateOne(
|
||||
{ 'metadata.sourceId': sourceId },
|
||||
{ $set: { 'metadata.expiredTime': expiredTime } }
|
||||
);
|
||||
});
|
||||
};
|
||||
|
||||
export const clearExpiredRawTextBufferCron = async () => {
|
||||
const gridBucket = getGridBucket();
|
||||
|
||||
const clearExpiredRawTextBuffer = async () => {
|
||||
addLog.debug('Clear expired raw text buffer start');
|
||||
|
||||
const data = await MongoRawTextBufferSchema.find(
|
||||
{
|
||||
'metadata.expiredTime': { $lt: new Date() }
|
||||
},
|
||||
'_id'
|
||||
).lean();
|
||||
|
||||
for (const item of data) {
|
||||
try {
|
||||
await gridBucket.delete(new Types.ObjectId(item._id));
|
||||
} catch (error) {
|
||||
addLog.error('Delete expired raw text buffer error', error);
|
||||
}
|
||||
}
|
||||
addLog.debug('Clear expired raw text buffer end');
|
||||
};
|
||||
|
||||
setCron('*/10 * * * *', async () => {
|
||||
if (
|
||||
await checkTimerLock({
|
||||
timerId: TimerIdEnum.clearExpiredRawTextBuffer,
|
||||
lockMinuted: 9
|
||||
})
|
||||
) {
|
||||
try {
|
||||
await clearExpiredRawTextBuffer();
|
||||
} catch (error) {
|
||||
addLog.error('clearExpiredRawTextBufferCron error', error);
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
@@ -1,22 +0,0 @@
|
||||
import { getMongoModel, type Types, Schema } from '../../mongo';
|
||||
|
||||
export const bucketName = 'buffer_rawtext';
|
||||
|
||||
const RawTextBufferSchema = new Schema({
|
||||
metadata: {
|
||||
sourceId: { type: String, required: true },
|
||||
sourceName: { type: String, required: true },
|
||||
expiredTime: { type: Date, required: true }
|
||||
}
|
||||
});
|
||||
RawTextBufferSchema.index({ 'metadata.sourceId': 'hashed' });
|
||||
RawTextBufferSchema.index({ 'metadata.expiredTime': -1 });
|
||||
|
||||
export const MongoRawTextBufferSchema = getMongoModel<{
|
||||
_id: Types.ObjectId;
|
||||
metadata: {
|
||||
sourceId: string;
|
||||
sourceName: string;
|
||||
expiredTime: Date;
|
||||
};
|
||||
}>(`${bucketName}.files`, RawTextBufferSchema);
|
||||
Reference in New Issue
Block a user