mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 13:03:50 +00:00
4.7.1 production (#1173)
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -9,6 +9,7 @@ import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { ReadFileByBufferParams } from '../read/type';
|
||||
import { MongoRwaTextBuffer } from '../../buffer/rawText/schema';
|
||||
import { readFileRawContent } from '../read/utils';
|
||||
import { PassThrough } from 'stream';
|
||||
|
||||
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||
MongoFileSchema;
|
||||
@@ -113,32 +114,40 @@ export async function getDownloadStream({
|
||||
fileId: string;
|
||||
}) {
|
||||
const bucket = getGridBucket(bucketName);
|
||||
const stream = bucket.openDownloadStream(new Types.ObjectId(fileId));
|
||||
const copyStream = stream.pipe(new PassThrough());
|
||||
|
||||
return bucket.openDownloadStream(new Types.ObjectId(fileId));
|
||||
/* get encoding */
|
||||
const buffer = await (() => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
let tmpBuffer: Buffer = Buffer.from([]);
|
||||
|
||||
stream.on('data', (chunk) => {
|
||||
if (tmpBuffer.length < 20) {
|
||||
tmpBuffer = Buffer.concat([tmpBuffer, chunk]);
|
||||
}
|
||||
if (tmpBuffer.length >= 20) {
|
||||
resolve(tmpBuffer);
|
||||
}
|
||||
});
|
||||
stream.on('end', () => {
|
||||
resolve(tmpBuffer);
|
||||
});
|
||||
stream.on('error', (err) => {
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
})();
|
||||
|
||||
const encoding = detectFileEncoding(buffer);
|
||||
|
||||
return {
|
||||
fileStream: copyStream,
|
||||
encoding
|
||||
// encoding: 'utf-8'
|
||||
};
|
||||
}
|
||||
|
||||
export const readFileEncode = async ({
|
||||
bucketName,
|
||||
fileId
|
||||
}: {
|
||||
bucketName: `${BucketNameEnum}`;
|
||||
fileId: string;
|
||||
}) => {
|
||||
const encodeStream = await getDownloadStream({ bucketName, fileId });
|
||||
let buffers: Buffer = Buffer.from([]);
|
||||
for await (const chunk of encodeStream) {
|
||||
buffers = Buffer.concat([buffers, chunk]);
|
||||
if (buffers.length > 10) {
|
||||
encodeStream.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const encoding = detectFileEncoding(buffers);
|
||||
|
||||
return encoding as BufferEncoding;
|
||||
};
|
||||
|
||||
export const readFileContentFromMongo = async ({
|
||||
teamId,
|
||||
bucketName,
|
||||
@@ -162,9 +171,8 @@ export const readFileContentFromMongo = async ({
|
||||
};
|
||||
}
|
||||
|
||||
const [file, encoding, fileStream] = await Promise.all([
|
||||
const [file, { encoding, fileStream }] = await Promise.all([
|
||||
getFileById({ bucketName, fileId }),
|
||||
readFileEncode({ bucketName, fileId }),
|
||||
getDownloadStream({ bucketName, fileId })
|
||||
]);
|
||||
|
||||
@@ -176,12 +184,12 @@ export const readFileContentFromMongo = async ({
|
||||
|
||||
const fileBuffers = await (() => {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
let buffers = Buffer.from([]);
|
||||
let buffer = Buffer.from([]);
|
||||
fileStream.on('data', (chunk) => {
|
||||
buffers = Buffer.concat([buffers, chunk]);
|
||||
buffer = Buffer.concat([buffer, chunk]);
|
||||
});
|
||||
fileStream.on('end', () => {
|
||||
resolve(buffers);
|
||||
resolve(buffer);
|
||||
});
|
||||
fileStream.on('error', (err) => {
|
||||
reject(err);
|
||||
|
@@ -6,7 +6,11 @@ export const readPptxRawText = async ({
|
||||
buffer,
|
||||
encoding
|
||||
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
|
||||
const result = await parseOffice({ buffer, encoding, extension: 'pptx' });
|
||||
const result = await parseOffice({
|
||||
buffer,
|
||||
encoding: encoding as BufferEncoding,
|
||||
extension: 'pptx'
|
||||
});
|
||||
|
||||
return {
|
||||
rawText: result
|
||||
|
@@ -1,8 +1,26 @@
|
||||
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
|
||||
import iconv from 'iconv-lite';
|
||||
|
||||
const rawEncodingList = [
|
||||
'ascii',
|
||||
'utf8',
|
||||
'utf-8',
|
||||
'utf16le',
|
||||
'utf-16le',
|
||||
'ucs2',
|
||||
'ucs-2',
|
||||
'base64',
|
||||
'base64url',
|
||||
'latin1',
|
||||
'binary',
|
||||
'hex'
|
||||
];
|
||||
|
||||
// 加载源文件内容
|
||||
export const readFileRawText = ({ buffer, encoding }: ReadFileByBufferParams): ReadFileResponse => {
|
||||
const content = buffer.toString(encoding);
|
||||
const content = rawEncodingList.includes(encoding)
|
||||
? buffer.toString(encoding as BufferEncoding)
|
||||
: iconv.decode(buffer, 'gbk');
|
||||
|
||||
return {
|
||||
rawText: content
|
||||
|
2
packages/service/common/file/read/type.d.ts
vendored
2
packages/service/common/file/read/type.d.ts
vendored
@@ -1,7 +1,7 @@
|
||||
export type ReadFileByBufferParams = {
|
||||
teamId: string;
|
||||
buffer: Buffer;
|
||||
encoding: BufferEncoding;
|
||||
encoding: string;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
|
||||
|
@@ -103,7 +103,7 @@ export const deleteDatasetDataVector = async (
|
||||
}
|
||||
return Promise.reject('deleteDatasetData: no where');
|
||||
})();
|
||||
console.log(where, '===');
|
||||
|
||||
try {
|
||||
await PgClient.delete(PgDatasetTableName, {
|
||||
where: [where]
|
||||
|
@@ -13,8 +13,11 @@ export const connectPg = async (): Promise<Pool> => {
|
||||
max: Number(process.env.DB_MAX_LINK || 20),
|
||||
min: 10,
|
||||
keepAlive: true,
|
||||
idleTimeoutMillis: 60000,
|
||||
connectionTimeoutMillis: 20000
|
||||
idleTimeoutMillis: 600000,
|
||||
connectionTimeoutMillis: 20000,
|
||||
query_timeout: 30000,
|
||||
statement_timeout: 40000,
|
||||
idle_in_transaction_session_timeout: 60000
|
||||
});
|
||||
|
||||
global.pgClient.on('error', async (err) => {
|
||||
|
@@ -13,6 +13,7 @@
|
||||
"decompress": "^4.2.1",
|
||||
"encoding": "^0.1.13",
|
||||
"file-type": "^19.0.0",
|
||||
"iconv-lite": "^0.6.3",
|
||||
"json5": "^2.2.3",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"mammoth": "^1.6.0",
|
||||
|
Reference in New Issue
Block a user