4.8.14 test (#3164)

* perf: match base 64 image

* perf: register plugins
This commit is contained in:
Archer
2024-11-15 10:35:04 +08:00
committed by GitHub
parent f699061dea
commit 58745f8c35
9 changed files with 76 additions and 44 deletions

View File

@@ -4,12 +4,12 @@ import FormData from 'form-data';
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
import fs from 'fs';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import type { ReadFileResponse } from '../../../worker/readFile/type';
import axios from 'axios';
import { addLog } from '../../system/log';
import { batchRun } from '@fastgpt/global/common/fn/utils';
import { addHours } from 'date-fns';
import { matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
export type readRawTextByLocalFileParams = {
teamId: string;
@@ -79,6 +79,7 @@ export const readRawContentByFileBuffer = async ({
data: {
page: number;
markdown: string;
duration: number;
};
}>(customReadfileUrl, data, {
timeout: 600000,
@@ -90,10 +91,12 @@ export const readRawContentByFileBuffer = async ({
addLog.info(`Use custom read file service, time: ${Date.now() - start}ms`);
const rawText = response.data.markdown;
const { text, imageList } = matchMdImgTextAndUpload(rawText);
return {
rawText,
formatText: rawText
rawText: text,
formatText: rawText,
imageList
};
};
@@ -120,6 +123,9 @@ export const readRawContentByFileBuffer = async ({
}
});
rawText = rawText.replace(item.uuid, src);
if (formatText) {
formatText = formatText.replace(item.uuid, src);
}
});
}
@@ -128,7 +134,7 @@ export const readRawContentByFileBuffer = async ({
if (isQAImport) {
rawText = rawText || '';
} else {
rawText = formatText || '';
rawText = formatText || rawText;
}
}

View File

@@ -1,5 +1,6 @@
import TurndownService from 'turndown';
import { ImageType } from '../readFile/type';
import { matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
// @ts-ignore
const turndownPluginGfm = require('joplin-turndown-plugin-gfm');
@@ -24,23 +25,10 @@ export const html2md = (
turndownService.remove(['i', 'script', 'iframe', 'style']);
turndownService.use(turndownPluginGfm.gfm);
const base64Regex = /"(data:image\/[^;]+;base64[^"]+)"/g;
const imageList: ImageType[] = [];
const images = Array.from(html.match(base64Regex) || []);
for (const image of images) {
const uuid = crypto.randomUUID();
const mime = image.split(';')[0].split(':')[1];
const base64 = image.split(',')[1];
html = html.replace(image, uuid);
imageList.push({
uuid,
base64,
mime
});
}
const { text, imageList } = matchMdImgTextAndUpload(html);
return {
rawText: turndownService.turndown(html),
rawText: turndownService.turndown(text),
imageList
};
} catch (error) {