mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
4.6.7 first pr (#726)
This commit is contained in:
@@ -4,15 +4,13 @@ export type CompressImgProps = {
|
||||
maxSize?: number;
|
||||
};
|
||||
|
||||
export const compressBase64ImgAndUpload = ({
|
||||
export const compressBase64Img = ({
|
||||
base64Img,
|
||||
maxW = 1080,
|
||||
maxH = 1080,
|
||||
maxSize = 1024 * 500, // 300kb
|
||||
uploadController
|
||||
maxSize = 1024 * 500 // 500kb
|
||||
}: CompressImgProps & {
|
||||
base64Img: string;
|
||||
uploadController: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
const fileType =
|
||||
@@ -54,12 +52,7 @@ export const compressBase64ImgAndUpload = ({
|
||||
return reject('图片太大了');
|
||||
}
|
||||
|
||||
try {
|
||||
const src = await uploadController(compressedDataUrl);
|
||||
resolve(src);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
resolve(compressedDataUrl);
|
||||
};
|
||||
img.onerror = reject;
|
||||
});
|
||||
|
@@ -1,53 +0,0 @@
|
||||
import { uploadMarkdownBase64 } from '@fastgpt/global/common/string/markdown';
|
||||
import { htmlStr2Md } from '../string/markdown';
|
||||
/**
|
||||
* read file raw text
|
||||
*/
|
||||
export const readFileRawText = (file: File) => {
|
||||
return new Promise((resolve: (_: string) => void, reject) => {
|
||||
try {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
resolve(reader.result as string);
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log('error txt read:', err);
|
||||
reject('Read file error');
|
||||
};
|
||||
reader.readAsText(file);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
export const readMdFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const md = await readFileRawText(file);
|
||||
const rawText = await uploadMarkdownBase64({
|
||||
rawText: md,
|
||||
uploadImgController
|
||||
});
|
||||
return rawText;
|
||||
};
|
||||
|
||||
export const readHtmlFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const md = htmlStr2Md(await readFileRawText(file));
|
||||
const rawText = await uploadMarkdownBase64({
|
||||
rawText: md,
|
||||
uploadImgController
|
||||
});
|
||||
|
||||
return rawText;
|
||||
};
|
21
packages/web/common/file/read/html.ts
Normal file
21
packages/web/common/file/read/html.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
import { htmlStr2Md } from '../../string/markdown';
|
||||
import { readFileRawText } from './rawText';
|
||||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
|
||||
export const readHtmlFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const { rawText } = await readFileRawText(file);
|
||||
const md = htmlStr2Md(rawText);
|
||||
|
||||
const simpleMd = await markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController
|
||||
});
|
||||
|
||||
return { rawText: rawText };
|
||||
};
|
46
packages/web/common/file/read/index.ts
Normal file
46
packages/web/common/file/read/index.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import { loadFile2Buffer } from '../utils';
|
||||
import { readHtmlFile } from './html';
|
||||
import { readMdFile } from './md';
|
||||
import { readPdfFile } from './pdf';
|
||||
import { readFileRawText } from './rawText';
|
||||
import { readWordFile } from './word';
|
||||
|
||||
export const readFileRawContent = async ({
|
||||
file,
|
||||
uploadBase64Controller
|
||||
}: {
|
||||
file: File;
|
||||
uploadBase64Controller?: (base64: string) => Promise<string>;
|
||||
}): Promise<{
|
||||
rawText: string;
|
||||
}> => {
|
||||
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
|
||||
|
||||
switch (extension) {
|
||||
case 'txt':
|
||||
return readFileRawText(file);
|
||||
case 'md':
|
||||
return readMdFile({
|
||||
file,
|
||||
uploadImgController: uploadBase64Controller
|
||||
});
|
||||
case 'html':
|
||||
return readHtmlFile({
|
||||
file,
|
||||
uploadImgController: uploadBase64Controller
|
||||
});
|
||||
case 'pdf':
|
||||
const pdf = await loadFile2Buffer({ file });
|
||||
return readPdfFile({ pdf });
|
||||
case 'docx':
|
||||
return readWordFile({
|
||||
file,
|
||||
uploadImgController: uploadBase64Controller
|
||||
});
|
||||
|
||||
default:
|
||||
return {
|
||||
rawText: ''
|
||||
};
|
||||
}
|
||||
};
|
17
packages/web/common/file/read/md.ts
Normal file
17
packages/web/common/file/read/md.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
import { readFileRawText } from './rawText';
|
||||
|
||||
export const readMdFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const { rawText: md } = await readFileRawText(file);
|
||||
const simpleMd = await markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController
|
||||
});
|
||||
return { rawText: simpleMd };
|
||||
};
|
64
packages/web/common/file/read/pdf.ts
Normal file
64
packages/web/common/file/read/pdf.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
/* read file to txt */
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
|
||||
type TokenType = {
|
||||
str: string;
|
||||
dir: string;
|
||||
width: number;
|
||||
height: number;
|
||||
transform: number[];
|
||||
fontName: string;
|
||||
hasEOL: boolean;
|
||||
};
|
||||
|
||||
export const readPdfFile = async ({ pdf }: { pdf: ArrayBuffer }) => {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = '/js/pdf.worker.js';
|
||||
|
||||
const readPDFPage = async (doc: any, pageNo: number) => {
|
||||
const page = await doc.getPage(pageNo);
|
||||
const tokenizedText = await page.getTextContent();
|
||||
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
const pageHeight = viewport.height;
|
||||
const headerThreshold = pageHeight * 0.95;
|
||||
const footerThreshold = pageHeight * 0.05;
|
||||
|
||||
const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
|
||||
return (
|
||||
!token.transform ||
|
||||
(token.transform[5] < headerThreshold && token.transform[5] > footerThreshold)
|
||||
);
|
||||
});
|
||||
|
||||
// concat empty string 'hasEOL'
|
||||
for (let i = 0; i < pageTexts.length; i++) {
|
||||
const item = pageTexts[i];
|
||||
if (item.str === '' && pageTexts[i - 1]) {
|
||||
pageTexts[i - 1].hasEOL = item.hasEOL;
|
||||
pageTexts.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
page.cleanup();
|
||||
|
||||
return pageTexts
|
||||
.map((token) => {
|
||||
const paragraphEnd = token.hasEOL && /([。?!.?!\n\r]|(\r\n))$/.test(token.str);
|
||||
|
||||
return paragraphEnd ? `${token.str}\n` : token.str;
|
||||
})
|
||||
.join('');
|
||||
};
|
||||
|
||||
const doc = await pdfjsLib.getDocument(pdf).promise;
|
||||
const pageTextPromises = [];
|
||||
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
|
||||
pageTextPromises.push(readPDFPage(doc, pageNo));
|
||||
}
|
||||
const pageTexts = await Promise.all(pageTextPromises);
|
||||
|
||||
return {
|
||||
rawText: pageTexts.join('')
|
||||
};
|
||||
};
|
22
packages/web/common/file/read/rawText.ts
Normal file
22
packages/web/common/file/read/rawText.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* read file raw text
|
||||
*/
|
||||
export const readFileRawText = (file: File) => {
|
||||
return new Promise<{ rawText: string }>((resolve, reject) => {
|
||||
try {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
resolve({
|
||||
rawText: reader.result as string
|
||||
});
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log('error txt read:', err);
|
||||
reject('Read file error');
|
||||
};
|
||||
reader.readAsText(file);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
};
|
28
packages/web/common/file/read/word.ts
Normal file
28
packages/web/common/file/read/word.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
import { htmlStr2Md } from '../../string/markdown';
|
||||
import { loadFile2Buffer } from '../utils';
|
||||
import mammoth from 'mammoth';
|
||||
|
||||
export const readWordFile = async ({
|
||||
file,
|
||||
uploadImgController
|
||||
}: {
|
||||
file: File;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const buffer = await loadFile2Buffer({ file });
|
||||
|
||||
const { value: html } = await mammoth.convertToHtml({
|
||||
arrayBuffer: buffer
|
||||
});
|
||||
const md = htmlStr2Md(html);
|
||||
|
||||
const rawText = await markdownProcess({
|
||||
rawText: md,
|
||||
uploadImgController: uploadImgController
|
||||
});
|
||||
|
||||
return {
|
||||
rawText
|
||||
};
|
||||
};
|
31
packages/web/common/file/utils.ts
Normal file
31
packages/web/common/file/utils.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
|
||||
export const loadFile2Buffer = ({ file, onError }: { file: File; onError?: (err: any) => void }) =>
|
||||
new Promise<ArrayBuffer>((resolve, reject) => {
|
||||
try {
|
||||
let reader = new FileReader();
|
||||
reader.readAsArrayBuffer(file);
|
||||
reader.onload = async ({ target }) => {
|
||||
if (!target?.result) {
|
||||
onError?.('Load file error');
|
||||
return reject('Load file error');
|
||||
}
|
||||
try {
|
||||
resolve(target.result as ArrayBuffer);
|
||||
} catch (err) {
|
||||
console.log(err, 'Load file error');
|
||||
onError?.(err);
|
||||
|
||||
reject(getErrText(err, 'Load file error'));
|
||||
}
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log(err, 'Load file error');
|
||||
onError?.(err);
|
||||
|
||||
reject(getErrText(err, 'Load file error'));
|
||||
};
|
||||
} catch (error) {
|
||||
reject('The browser does not support file content reading');
|
||||
}
|
||||
});
|
Reference in New Issue
Block a user