perf: buffer;fix: back up split (#4913)

* perf: buffer

* fix: back up split

* fix: app limit

* doc
This commit is contained in:
Archer
2025-05-28 18:18:25 +08:00
committed by GitHub
parent 802de11363
commit a171c7b11c
11 changed files with 208 additions and 93 deletions

View File

@@ -6,13 +6,13 @@ import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
import { readRawContentByFileBuffer } from '../read/utils';
import { gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { readFromSecondary } from '../../mongo/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { Readable } from 'stream';
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
import { addMinutes } from 'date-fns';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoDatasetFileSchema;
@@ -225,13 +225,11 @@ export const readFileContentFromMongo = async ({
}> => {
const bufferId = `${fileId}-${customPdfParse}`;
// read buffer
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: bufferId }, undefined, {
...readFromSecondary
}).lean();
const fileBuffer = await getRawTextBuffer(bufferId);
if (fileBuffer) {
return {
rawText: fileBuffer.rawText,
filename: fileBuffer.metadata?.filename || ''
rawText: fileBuffer.text,
filename: fileBuffer?.sourceName
};
}
@@ -265,16 +263,13 @@ export const readFileContentFromMongo = async ({
}
});
// < 14M
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
MongoRawTextBuffer.create({
sourceId: bufferId,
rawText,
metadata: {
filename: file.filename
}
});
}
// Add buffer
addRawTextBuffer({
sourceId: bufferId,
sourceName: file.filename,
text: rawText,
expiredTime: addMinutes(new Date(), 20)
});
return {
rawText,

View File

@@ -1,16 +1,16 @@
import { Schema, getMongoModel } from '../../mongo';
const DatasetFileSchema = new Schema({});
const ChatFileSchema = new Schema({});
const DatasetFileSchema = new Schema({
metadata: Object
});
const ChatFileSchema = new Schema({
metadata: Object
});
try {
DatasetFileSchema.index({ uploadDate: -1 });
DatasetFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ 'metadata.chatId': 1 });
} catch (error) {
console.log(error);
}
ChatFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ 'metadata.chatId': 1 });
export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);