mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-25 06:14:06 +00:00

如果 Buffer.concat() 被频繁调用,尤其是在处理大量数据时,可能会导致内存分配压力增大,从而影响性能。 测试大于100M的PDF文件上传解析会导致/api/core/dataset/collection/create/fileId接口长时间无响应,其他接口也处于hang住的状态,导致整个服务不可用。 使用一次性拼接后问题解决
57 lines
1.4 KiB
TypeScript
57 lines
1.4 KiB
TypeScript
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
|
import { PassThrough } from 'stream';
|
|
|
|
export const gridFsStream2Buffer = (stream: NodeJS.ReadableStream) => {
|
|
return new Promise<Buffer>((resolve, reject) => {
|
|
const chunks: Buffer[] = [];
|
|
let totalLength = 0;
|
|
|
|
stream.on('data', (chunk) => {
|
|
chunks.push(chunk);
|
|
totalLength += chunk.length;
|
|
});
|
|
stream.on('end', () => {
|
|
const resultBuffer = Buffer.concat(chunks, totalLength); // 一次性拼接
|
|
resolve(resultBuffer);
|
|
});
|
|
stream.on('error', (err) => {
|
|
reject(err);
|
|
});
|
|
});
|
|
};
|
|
|
|
export const stream2Encoding = async (stream: NodeJS.ReadableStream) => {
|
|
const start = Date.now();
|
|
const copyStream = stream.pipe(new PassThrough());
|
|
|
|
/* get encoding */
|
|
const buffer = await (() => {
|
|
return new Promise<Buffer>((resolve, reject) => {
|
|
let tmpBuffer: Buffer = Buffer.from([]);
|
|
|
|
stream.on('data', (chunk) => {
|
|
if (tmpBuffer.length < 200) {
|
|
tmpBuffer = Buffer.concat([tmpBuffer, chunk]);
|
|
|
|
if (tmpBuffer.length >= 200) {
|
|
resolve(tmpBuffer);
|
|
}
|
|
}
|
|
});
|
|
stream.on('end', () => {
|
|
resolve(tmpBuffer);
|
|
});
|
|
stream.on('error', (err) => {
|
|
reject(err);
|
|
});
|
|
});
|
|
})();
|
|
|
|
const enc = detectFileEncoding(buffer);
|
|
|
|
return {
|
|
encoding: enc,
|
|
stream: copyStream
|
|
};
|
|
};
|