4.7.1 production (#1173)

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-04-11 16:30:17 +08:00
committed by GitHub
parent db2dd91f03
commit c314312a57
19 changed files with 199 additions and 120 deletions

View File

@@ -10,6 +10,6 @@ export const formatFileSize = (bytes: number): string => {
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
};
export const detectFileEncoding = (buffers: string | Buffer) => {
return (detect(buffers)?.encoding || 'utf-8') as BufferEncoding;
export const detectFileEncoding = (buffer: Buffer) => {
return detect(buffer.slice(0, 200))?.encoding?.toLocaleLowerCase();
};

View File

@@ -9,6 +9,7 @@ import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { ReadFileByBufferParams } from '../read/type';
import { MongoRwaTextBuffer } from '../../buffer/rawText/schema';
import { readFileRawContent } from '../read/utils';
import { PassThrough } from 'stream';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoFileSchema;
@@ -113,32 +114,40 @@ export async function getDownloadStream({
fileId: string;
}) {
const bucket = getGridBucket(bucketName);
const stream = bucket.openDownloadStream(new Types.ObjectId(fileId));
const copyStream = stream.pipe(new PassThrough());
return bucket.openDownloadStream(new Types.ObjectId(fileId));
/* get encoding */
const buffer = await (() => {
return new Promise<Buffer>((resolve, reject) => {
let tmpBuffer: Buffer = Buffer.from([]);
stream.on('data', (chunk) => {
if (tmpBuffer.length < 20) {
tmpBuffer = Buffer.concat([tmpBuffer, chunk]);
}
if (tmpBuffer.length >= 20) {
resolve(tmpBuffer);
}
});
stream.on('end', () => {
resolve(tmpBuffer);
});
stream.on('error', (err) => {
reject(err);
});
});
})();
const encoding = detectFileEncoding(buffer);
return {
fileStream: copyStream,
encoding
// encoding: 'utf-8'
};
}
export const readFileEncode = async ({
bucketName,
fileId
}: {
bucketName: `${BucketNameEnum}`;
fileId: string;
}) => {
const encodeStream = await getDownloadStream({ bucketName, fileId });
let buffers: Buffer = Buffer.from([]);
for await (const chunk of encodeStream) {
buffers = Buffer.concat([buffers, chunk]);
if (buffers.length > 10) {
encodeStream.abort();
break;
}
}
const encoding = detectFileEncoding(buffers);
return encoding as BufferEncoding;
};
export const readFileContentFromMongo = async ({
teamId,
bucketName,
@@ -162,9 +171,8 @@ export const readFileContentFromMongo = async ({
};
}
const [file, encoding, fileStream] = await Promise.all([
const [file, { encoding, fileStream }] = await Promise.all([
getFileById({ bucketName, fileId }),
readFileEncode({ bucketName, fileId }),
getDownloadStream({ bucketName, fileId })
]);
@@ -176,12 +184,12 @@ export const readFileContentFromMongo = async ({
const fileBuffers = await (() => {
return new Promise<Buffer>((resolve, reject) => {
let buffers = Buffer.from([]);
let buffer = Buffer.from([]);
fileStream.on('data', (chunk) => {
buffers = Buffer.concat([buffers, chunk]);
buffer = Buffer.concat([buffer, chunk]);
});
fileStream.on('end', () => {
resolve(buffers);
resolve(buffer);
});
fileStream.on('error', (err) => {
reject(err);

View File

@@ -6,7 +6,11 @@ export const readPptxRawText = async ({
buffer,
encoding
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
const result = await parseOffice({ buffer, encoding, extension: 'pptx' });
const result = await parseOffice({
buffer,
encoding: encoding as BufferEncoding,
extension: 'pptx'
});
return {
rawText: result

View File

@@ -1,8 +1,26 @@
import { ReadFileByBufferParams, ReadFileResponse } from './type.d';
import iconv from 'iconv-lite';
const rawEncodingList = [
'ascii',
'utf8',
'utf-8',
'utf16le',
'utf-16le',
'ucs2',
'ucs-2',
'base64',
'base64url',
'latin1',
'binary',
'hex'
];
// 加载源文件内容
export const readFileRawText = ({ buffer, encoding }: ReadFileByBufferParams): ReadFileResponse => {
const content = buffer.toString(encoding);
const content = rawEncodingList.includes(encoding)
? buffer.toString(encoding as BufferEncoding)
: iconv.decode(buffer, 'gbk');
return {
rawText: content

View File

@@ -1,7 +1,7 @@
export type ReadFileByBufferParams = {
teamId: string;
buffer: Buffer;
encoding: BufferEncoding;
encoding: string;
metadata?: Record<string, any>;
};

View File

@@ -103,7 +103,7 @@ export const deleteDatasetDataVector = async (
}
return Promise.reject('deleteDatasetData: no where');
})();
console.log(where, '===');
try {
await PgClient.delete(PgDatasetTableName, {
where: [where]

View File

@@ -13,8 +13,11 @@ export const connectPg = async (): Promise<Pool> => {
max: Number(process.env.DB_MAX_LINK || 20),
min: 10,
keepAlive: true,
idleTimeoutMillis: 60000,
connectionTimeoutMillis: 20000
idleTimeoutMillis: 600000,
connectionTimeoutMillis: 20000,
query_timeout: 30000,
statement_timeout: 40000,
idle_in_transaction_session_timeout: 60000
});
global.pgClient.on('error', async (err) => {

View File

@@ -13,6 +13,7 @@
"decompress": "^4.2.1",
"encoding": "^0.1.13",
"file-type": "^19.0.0",
"iconv-lite": "^0.6.3",
"json5": "^2.2.3",
"jsonwebtoken": "^9.0.2",
"mammoth": "^1.6.0",

View File

@@ -173,7 +173,6 @@ export function registerLexicalTextEntity<T extends TextNode>(
export function textToEditorState(text: string = '') {
const paragraph = text?.split('\n');
return JSON.stringify({
root: {
children: paragraph.map((p) => {
@@ -206,11 +205,23 @@ export function textToEditorState(text: string = '') {
}
export function editorStateToText(editor: LexicalEditor) {
const stringifiedEditorState = JSON.stringify(editor.getEditorState().toJSON());
const parsedEditorState = editor.parseEditorState(stringifiedEditorState);
const editorStateTextString = parsedEditorState.read(() => $getRoot().getTextContent());
return editorStateTextString;
const editorStateTextString: string[] = [];
const paragraphs = editor.getEditorState().toJSON().root.children;
paragraphs.forEach((paragraph: any) => {
const children = paragraph.children;
const paragraphText: string[] = [];
children.forEach((child: any) => {
if (child.type === 'linebreak') {
paragraphText.push(`
`);
} else if (child.text) {
paragraphText.push(child.text);
}
});
editorStateTextString.push(paragraphText.join(''));
});
return editorStateTextString.join(`
`);
}
const varRegex = /\{\{([a-zA-Z_][a-zA-Z0-9_]*)\}\}/g;