perf: backup import (#4866)

* i18n

* remove invalid code

* perf: backup import

* backup tip

* fix: indexsize invalid
This commit is contained in:
Archer
2025-05-22 15:53:51 +08:00
committed by GitHub
parent dd3c251603
commit 88bd3aaa9e
67 changed files with 751 additions and 388 deletions

View File

@@ -210,15 +210,15 @@ export const readFileContentFromMongo = async ({
tmbId,
bucketName,
fileId,
isQAImport = false,
customPdfParse = false
customPdfParse = false,
getFormatText
}: {
teamId: string;
tmbId: string;
bucketName: `${BucketNameEnum}`;
fileId: string;
isQAImport?: boolean;
customPdfParse?: boolean;
getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
}): Promise<{
rawText: string;
filename: string;
@@ -254,8 +254,8 @@ export const readFileContentFromMongo = async ({
// Get raw text
const { rawText } = await readRawContentByFileBuffer({
customPdfParse,
getFormatText,
extension,
isQAImport,
teamId,
tmbId,
buffer: fileBuffers,

View File

@@ -16,6 +16,7 @@ export type readRawTextByLocalFileParams = {
path: string;
encoding: string;
customPdfParse?: boolean;
getFormatText?: boolean;
metadata?: Record<string, any>;
};
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
@@ -27,8 +28,8 @@ export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParam
return readRawContentByFileBuffer({
extension,
isQAImport: false,
customPdfParse: params.customPdfParse,
getFormatText: params.getFormatText,
teamId: params.teamId,
tmbId: params.tmbId,
encoding: params.encoding,
@@ -46,7 +47,7 @@ export const readRawContentByFileBuffer = async ({
encoding,
metadata,
customPdfParse = false,
isQAImport = false
getFormatText = true
}: {
teamId: string;
tmbId: string;
@@ -57,8 +58,10 @@ export const readRawContentByFileBuffer = async ({
metadata?: Record<string, any>;
customPdfParse?: boolean;
isQAImport: boolean;
}): Promise<ReadFileResponse> => {
getFormatText?: boolean;
}): Promise<{
rawText: string;
}> => {
const systemParse = () =>
runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
extension,
@@ -176,16 +179,7 @@ export const readRawContentByFileBuffer = async ({
});
}
if (['csv', 'xlsx'].includes(extension)) {
// qa data
if (isQAImport) {
rawText = rawText || '';
} else {
rawText = formatText || rawText;
}
}
addLog.debug(`Upload file success, time: ${Date.now() - start}ms`);
return { rawText, formatText, imageList };
return { rawText: getFormatText ? formatText || rawText : rawText };
};