mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
4.8.5 test (#1805)
* perf: revert tip * feat: create copy app * perf: file stream read * perf: read directory over 100 files * perf: index * fix: team chat api error * lock * fix: i18n file
This commit is contained in:
@@ -8,7 +8,8 @@ import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
|
||||
import { readRawContentByFileBuffer } from '../read/utils';
|
||||
import { gridFsStream2Buffer } from './utils';
|
||||
import { gridFsStream2Buffer, stream2Encoding } from './utils';
|
||||
import { addLog } from '../../system/log';
|
||||
|
||||
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||
MongoFileSchema;
|
||||
@@ -44,8 +45,11 @@ export async function uploadFile({
|
||||
const stats = await fsp.stat(path);
|
||||
if (!stats.isFile()) return Promise.reject(`${path} is not a file`);
|
||||
|
||||
const { stream: readStream, encoding } = await stream2Encoding(fs.createReadStream(path));
|
||||
|
||||
metadata.teamId = teamId;
|
||||
metadata.tmbId = tmbId;
|
||||
metadata.encoding = encoding;
|
||||
|
||||
// create a gridfs bucket
|
||||
const bucket = getGridBucket(bucketName);
|
||||
@@ -57,7 +61,7 @@ export async function uploadFile({
|
||||
|
||||
// save to gridfs
|
||||
await new Promise((resolve, reject) => {
|
||||
fs.createReadStream(path)
|
||||
readStream
|
||||
.pipe(stream as any)
|
||||
.on('finish', resolve)
|
||||
.on('error', reject);
|
||||
@@ -113,19 +117,8 @@ export async function getDownloadStream({
|
||||
fileId: string;
|
||||
}) {
|
||||
const bucket = getGridBucket(bucketName);
|
||||
const encodeStream = bucket.openDownloadStream(new Types.ObjectId(fileId), { end: 100 });
|
||||
const rawStream = bucket.openDownloadStream(new Types.ObjectId(fileId));
|
||||
|
||||
/* get encoding */
|
||||
const buffer = await gridFsStream2Buffer(encodeStream);
|
||||
|
||||
const encoding = detectFileEncoding(buffer);
|
||||
|
||||
return {
|
||||
fileStream: rawStream,
|
||||
encoding
|
||||
// encoding: 'utf-8'
|
||||
};
|
||||
return bucket.openDownloadStream(new Types.ObjectId(fileId));
|
||||
}
|
||||
|
||||
export const readFileContentFromMongo = async ({
|
||||
@@ -150,9 +143,8 @@ export const readFileContentFromMongo = async ({
|
||||
filename: fileBuffer.metadata?.filename || ''
|
||||
};
|
||||
}
|
||||
const start = Date.now();
|
||||
|
||||
const [file, { encoding, fileStream }] = await Promise.all([
|
||||
const [file, fileStream] = await Promise.all([
|
||||
getFileById({ bucketName, fileId }),
|
||||
getDownloadStream({ bucketName, fileId })
|
||||
]);
|
||||
@@ -163,8 +155,11 @@ export const readFileContentFromMongo = async ({
|
||||
|
||||
const extension = file?.filename?.split('.')?.pop()?.toLowerCase() || '';
|
||||
|
||||
const start = Date.now();
|
||||
const fileBuffers = await gridFsStream2Buffer(fileStream);
|
||||
// console.log('get file buffer', Date.now() - start);
|
||||
addLog.debug('get file buffer', { time: Date.now() - start });
|
||||
|
||||
const encoding = file?.metadata?.encoding || detectFileEncoding(fileBuffers);
|
||||
|
||||
const { rawText } = await readRawContentByFileBuffer({
|
||||
extension,
|
||||
@@ -177,7 +172,8 @@ export const readFileContentFromMongo = async ({
|
||||
}
|
||||
});
|
||||
|
||||
if (rawText.trim()) {
|
||||
// < 14M
|
||||
if (fileBuffers.length < 14 * 1024 * 1024 && rawText.trim()) {
|
||||
MongoRawTextBuffer.create({
|
||||
sourceId: fileId,
|
||||
rawText,
|
||||
|
@@ -1,3 +1,6 @@
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { PassThrough } from 'stream';
|
||||
|
||||
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
let tmpBuffer: Buffer = Buffer.from([]);
|
||||
@@ -13,3 +16,38 @@ export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
export const stream2Encoding = async (stream: NodeJS.ReadableStream) => {
|
||||
const start = Date.now();
|
||||
const copyStream = stream.pipe(new PassThrough());
|
||||
|
||||
/* get encoding */
|
||||
const buffer = await (() => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
let tmpBuffer: Buffer = Buffer.from([]);
|
||||
|
||||
stream.on('data', (chunk) => {
|
||||
if (tmpBuffer.length < 200) {
|
||||
tmpBuffer = Buffer.concat([tmpBuffer, chunk]);
|
||||
|
||||
if (tmpBuffer.length >= 200) {
|
||||
resolve(tmpBuffer);
|
||||
}
|
||||
}
|
||||
});
|
||||
stream.on('end', () => {
|
||||
resolve(tmpBuffer);
|
||||
});
|
||||
stream.on('error', (err) => {
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
})();
|
||||
|
||||
const enc = detectFileEncoding(buffer);
|
||||
console.log('Get encoding time', Date.now() - start, enc);
|
||||
return {
|
||||
encoding: enc,
|
||||
stream: copyStream
|
||||
};
|
||||
};
|
||||
|
@@ -15,12 +15,12 @@
|
||||
"decompress": "^4.2.1",
|
||||
"domino-ext": "^2.1.4",
|
||||
"encoding": "^0.1.13",
|
||||
"lodash": "^4.17.21",
|
||||
"file-type": "^19.0.0",
|
||||
"iconv-lite": "^0.6.3",
|
||||
"joplin-turndown-plugin-gfm": "^1.0.12",
|
||||
"json5": "^2.2.3",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"lodash": "^4.17.21",
|
||||
"mammoth": "^1.6.0",
|
||||
"mongoose": "^7.0.2",
|
||||
"multer": "1.4.5-lts.1",
|
||||
@@ -39,10 +39,10 @@
|
||||
"@types/cookie": "^0.5.2",
|
||||
"@types/decompress": "^4.2.7",
|
||||
"@types/jsonwebtoken": "^9.0.3",
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/multer": "^1.4.10",
|
||||
"@types/node-cron": "^3.0.11",
|
||||
"@types/papaparse": "5.3.7",
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/pg": "^8.6.6",
|
||||
"@types/tunnel": "^0.0.4",
|
||||
"@types/turndown": "^5.0.4"
|
||||
|
Reference in New Issue
Block a user