mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
perf: retry to load image;perf: default index check (#4004)
* perf: retry to load image * perf: default index check
This commit is contained in:
@@ -6,6 +6,7 @@ import { guessBase64ImageType } from '../utils';
|
||||
import { readFromSecondary } from '../../mongo/utils';
|
||||
import { addHours } from 'date-fns';
|
||||
import { imageFileType } from '@fastgpt/global/common/file/constants';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
export const maxImgSize = 1024 * 1024 * 12;
|
||||
const base64MimeRegex = /data:image\/([^\)]+);base64/;
|
||||
@@ -40,13 +41,15 @@ export async function uploadMongoImg({
|
||||
return Promise.reject(`Invalid image file type: ${mime}`);
|
||||
}
|
||||
|
||||
const { _id } = await MongoImage.create({
|
||||
teamId,
|
||||
binary,
|
||||
metadata: Object.assign({ mime }, metadata),
|
||||
shareId,
|
||||
expiredTime: forever ? undefined : addHours(new Date(), 1)
|
||||
});
|
||||
const { _id } = await retryFn(() =>
|
||||
MongoImage.create({
|
||||
teamId,
|
||||
binary,
|
||||
metadata: Object.assign({ mime }, metadata),
|
||||
shareId,
|
||||
expiredTime: forever ? undefined : addHours(new Date(), 1)
|
||||
})
|
||||
);
|
||||
|
||||
return `${process.env.NEXT_PUBLIC_BASE_URL || ''}${imageBaseUrl}${String(_id)}.${extension}`;
|
||||
}
|
||||
|
@@ -2,23 +2,30 @@ import axios from 'axios';
|
||||
import { addLog } from '../../system/log';
|
||||
import { serverRequestBaseUrl } from '../../api/serverRequest';
|
||||
import { getFileContentTypeFromHeader, guessBase64ImageType } from '../utils';
|
||||
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||
|
||||
export const getImageBase64 = async (url: string) => {
|
||||
addLog.debug(`Load image to base64: ${url}`);
|
||||
|
||||
try {
|
||||
const response = await axios.get(url, {
|
||||
baseURL: serverRequestBaseUrl,
|
||||
responseType: 'arraybuffer',
|
||||
proxy: false
|
||||
});
|
||||
const response = await retryFn(() =>
|
||||
axios.get(url, {
|
||||
baseURL: serverRequestBaseUrl,
|
||||
responseType: 'arraybuffer',
|
||||
proxy: false
|
||||
})
|
||||
);
|
||||
|
||||
const base64 = Buffer.from(response.data, 'binary').toString('base64');
|
||||
const imageType =
|
||||
getFileContentTypeFromHeader(response.headers['content-type']) ||
|
||||
guessBase64ImageType(base64);
|
||||
|
||||
return `data:${imageType};base64,${base64}`;
|
||||
return {
|
||||
completeBase64: `data:${imageType};base64,${base64}`,
|
||||
base64,
|
||||
mime: imageType
|
||||
};
|
||||
} catch (error) {
|
||||
addLog.debug(`Load image to base64 failed: ${url}`);
|
||||
console.log(error);
|
||||
|
@@ -6,11 +6,12 @@ import type { ImageType, ReadFileResponse } from '../../../worker/readFile/type'
|
||||
import axios from 'axios';
|
||||
import { addLog } from '../../system/log';
|
||||
import { batchRun } from '@fastgpt/global/common/system/utils';
|
||||
import { htmlTable2Md, matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
|
||||
import { htmlTable2Md, matchMdImg } from '@fastgpt/global/common/string/markdown';
|
||||
import { createPdfParseUsage } from '../../../support/wallet/usage/controller';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { getImageBase64 } from '../image/utils';
|
||||
|
||||
export type readRawTextByLocalFileParams = {
|
||||
teamId: string;
|
||||
@@ -99,7 +100,7 @@ export const readRawContentByFileBuffer = async ({
|
||||
addLog.info(`Custom file parsing is complete, time: ${Date.now() - start}ms`);
|
||||
|
||||
const rawText = response.markdown;
|
||||
const { text, imageList } = matchMdImgTextAndUpload(rawText);
|
||||
const { text, imageList } = matchMdImg(rawText);
|
||||
|
||||
createPdfParseUsage({
|
||||
teamId,
|
||||
@@ -120,8 +121,8 @@ export const readRawContentByFileBuffer = async ({
|
||||
const parseTextImage = async (text: string) => {
|
||||
// Extract image links and convert to base64
|
||||
const imageList: { id: string; url: string }[] = [];
|
||||
const processedText = text.replace(/!\[.*?\]\((http[^)]+)\)/g, (match, url) => {
|
||||
const id = getNanoid();
|
||||
let processedText = text.replace(/!\[.*?\]\((http[^)]+)\)/g, (match, url) => {
|
||||
const id = `IMAGE_${getNanoid()}_IMAGE`;
|
||||
imageList.push({
|
||||
id,
|
||||
url
|
||||
@@ -129,22 +130,24 @@ export const readRawContentByFileBuffer = async ({
|
||||
return ``;
|
||||
});
|
||||
|
||||
// Get base64 from image url
|
||||
let resultImageList: ImageType[] = [];
|
||||
await Promise.all(
|
||||
imageList.map(async (item) => {
|
||||
await batchRun(
|
||||
imageList,
|
||||
async (item) => {
|
||||
try {
|
||||
const response = await axios.get(item.url, { responseType: 'arraybuffer' });
|
||||
const mime = response.headers['content-type'] || 'image/jpeg';
|
||||
const base64 = response.data.toString('base64');
|
||||
const { base64, mime } = await getImageBase64(item.url);
|
||||
resultImageList.push({
|
||||
uuid: item.id,
|
||||
mime,
|
||||
base64
|
||||
});
|
||||
} catch (error) {
|
||||
processedText = processedText.replace(item.id, item.url);
|
||||
addLog.warn(`Failed to get image from ${item.url}: ${getErrText(error)}`);
|
||||
}
|
||||
})
|
||||
},
|
||||
5
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -312,14 +315,14 @@ export const readRawContentByFileBuffer = async ({
|
||||
return await uploadMongoImg({
|
||||
base64Img: `data:${item.mime};base64,${item.base64}`,
|
||||
teamId,
|
||||
// expiredTime: addHours(new Date(), 1),
|
||||
metadata: {
|
||||
...metadata,
|
||||
mime: item.mime
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
return '';
|
||||
addLog.warn('Upload file image error', { error });
|
||||
return 'Upload load image error';
|
||||
}
|
||||
})();
|
||||
rawText = rawText.replace(item.uuid, src);
|
||||
|
@@ -165,7 +165,7 @@ export const loadRequestMessages = async ({
|
||||
try {
|
||||
// If imgUrl is a local path, load image from local, and set url to base64
|
||||
if (imgUrl.startsWith('/') || process.env.MULTIPLE_DATA_TO_BASE64 === 'true') {
|
||||
const base64 = await getImageBase64(imgUrl);
|
||||
const { completeBase64: base64 } = await getImageBase64(imgUrl);
|
||||
|
||||
return {
|
||||
...item,
|
||||
|
@@ -1,6 +1,6 @@
|
||||
import TurndownService from 'turndown';
|
||||
import { ImageType } from '../readFile/type';
|
||||
import { matchMdImgTextAndUpload } from '@fastgpt/global/common/string/markdown';
|
||||
import { matchMdImg } from '@fastgpt/global/common/string/markdown';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
// @ts-ignore
|
||||
const turndownPluginGfm = require('joplin-turndown-plugin-gfm');
|
||||
@@ -46,7 +46,7 @@ export const html2md = (
|
||||
// Base64 img to id, otherwise it will occupy memory when going to md
|
||||
const { processedHtml, images } = processBase64Images(html);
|
||||
const md = turndownService.turndown(processedHtml);
|
||||
const { text, imageList } = matchMdImgTextAndUpload(md);
|
||||
const { text, imageList } = matchMdImg(md);
|
||||
|
||||
return {
|
||||
rawText: text,
|
||||
|
Reference in New Issue
Block a user