4.6.2-production (#518)

This commit is contained in:
Archer
2023-11-26 16:13:45 +08:00
committed by GitHub
parent 3acbf1ab17
commit f818260711
30 changed files with 477 additions and 283 deletions

View File

@@ -33,74 +33,98 @@ export const uploadFiles = ({
* compress image. response base64
* @param maxSize The max size of the compressed image
*/
export const compressImgAndUpload = ({
file,
export const compressBase64ImgAndUpload = ({
base64,
maxW = 200,
maxH = 200,
maxSize = 1024 * 100, // 100kb
expiredTime
}: {
base64: string;
maxW?: number;
maxH?: number;
maxSize?: number;
expiredTime?: Date;
}) => {
return new Promise<string>((resolve, reject) => {
const fileType = /^data:([a-zA-Z0-9]+\/[a-zA-Z0-9-.+]+).*,/.exec(base64)?.[1] || 'image/jpeg';
const img = new Image();
img.src = base64;
img.onload = async () => {
let width = img.width;
let height = img.height;
if (width > height) {
if (width > maxW) {
height *= maxW / width;
width = maxW;
}
} else {
if (height > maxH) {
width *= maxH / height;
height = maxH;
}
}
const canvas = document.createElement('canvas');
canvas.width = width;
canvas.height = height;
const ctx = canvas.getContext('2d');
if (!ctx) {
return reject('压缩图片异常');
}
ctx.drawImage(img, 0, 0, width, height);
const compressedDataUrl = canvas.toDataURL(fileType, 0.8);
// 移除 canvas 元素
canvas.remove();
if (compressedDataUrl.length > maxSize) {
return reject('图片太大了');
}
try {
const src = await postUploadImg(compressedDataUrl, expiredTime);
resolve(src);
} catch (error) {
reject(error);
}
};
});
};
export const compressImgFileAndUpload = async ({
file,
maxW,
maxH,
maxSize,
expiredTime
}: {
file: File;
maxW?: number;
maxH?: number;
maxSize?: number;
expiredTime?: Date;
}) =>
new Promise<string>((resolve, reject) => {
const reader = new FileReader();
reader.readAsDataURL(file);
}) => {
const reader = new FileReader();
reader.readAsDataURL(file);
const base64 = await new Promise<string>((resolve, reject) => {
reader.onload = async () => {
const img = new Image();
// @ts-ignore
img.src = reader.result;
img.onload = async () => {
let width = img.width;
let height = img.height;
if (width > height) {
if (width > maxW) {
height *= maxW / width;
width = maxW;
}
} else {
if (height > maxH) {
width *= maxH / height;
height = maxH;
}
}
const canvas = document.createElement('canvas');
canvas.width = width;
canvas.height = height;
const ctx = canvas.getContext('2d');
if (!ctx) {
return reject('压缩图片异常');
}
ctx.drawImage(img, 0, 0, width, height);
const compressedDataUrl = canvas.toDataURL(file.type, 0.8);
// 移除 canvas 元素
canvas.remove();
if (compressedDataUrl.length > maxSize) {
return reject('图片太大了');
}
const src = await (async () => {
try {
const src = await postUploadImg(compressedDataUrl, expiredTime);
return src;
} catch (error) {
return compressedDataUrl;
}
})();
resolve(src);
};
resolve(reader.result as string);
};
reader.onerror = (err) => {
console.log(err);
reject('压缩图片异常');
};
});
return compressBase64ImgAndUpload({
base64,
maxW,
maxH,
maxSize,
expiredTime
});
};

View File

@@ -1,6 +1,6 @@
import mammoth from 'mammoth';
import Papa from 'papaparse';
import { postUploadImg } from '@/web/common/file/api';
import { compressBase64ImgAndUpload } from './controller';
/**
* 读取 txt 文件内容
@@ -51,16 +51,30 @@ export const readPdfContent = (file: File) =>
const headerThreshold = pageHeight * 0.07; // 假设页头在页面顶部5%的区域内
const footerThreshold = pageHeight * 0.93; // 假设页脚在页面底部5%的区域内
const pageText = tokenizedText.items
.filter((token: TokenType) => {
return (
!token.transform ||
(token.transform[5] > headerThreshold && token.transform[5] < footerThreshold)
);
const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
return (
!token.transform ||
(token.transform[5] > headerThreshold && token.transform[5] < footerThreshold)
);
});
// concat empty string 'hasEOL'
for (let i = 0; i < pageTexts.length; i++) {
const item = pageTexts[i];
if (item.str === '' && pageTexts[i - 1]) {
pageTexts[i - 1].hasEOL = item.hasEOL;
pageTexts.splice(i, 1);
i--;
}
}
return pageTexts
.map((token) => {
const paragraphEnd = token.hasEOL && /([。?!.?!\n\r]|(\r\n))$/.test(token.str);
return paragraphEnd ? `${token.str}\n` : token.str;
})
.map((token: TokenType) => token.str)
.join('');
return pageText;
};
let reader = new FileReader();
@@ -100,10 +114,41 @@ export const readDocContent = (file: File) =>
reader.onload = async ({ target }) => {
if (!target?.result) return reject('读取 doc 文件失败');
try {
const res = await mammoth.extractRawText({
// @ts-ignore
const res = await mammoth.convertToMarkdown({
arrayBuffer: target.result as ArrayBuffer
});
resolve(res?.value);
let rawText: string = res?.value || '';
// match base64, upload and replace it
const base64Regex = /data:image\/[a-zA-Z]+;base64,([^\)]+)/g;
const base64Arr = rawText.match(base64Regex) || [];
// upload base64 and replace it
await Promise.all(
base64Arr.map(async (base64) => {
try {
const str = await compressBase64ImgAndUpload({
base64,
maxW: 800,
maxH: 800,
maxSize: 1024 * 1024 * 2
});
rawText = rawText.replace(base64, str);
} catch (error) {
rawText = rawText.replace(base64, '');
rawText = rawText.replaceAll('![]()', '');
}
})
);
const trimReg = /\s*(!\[.*\]\(.*\))\s*/g;
if (trimReg.test(rawText)) {
rawText = rawText.replace(/\s*(!\[.*\]\(.*\))\s*/g, '$1');
}
resolve(rawText);
} catch (error) {
window.umami?.track('wordReadError', {
err: error?.toString()