mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
fix: upload file (#2992)
* fix: upload file * chore: remove wasm, support html image parse * chore: adjust * chore: move base64match function into htmlstr2md
This commit is contained in:
@@ -1,7 +1,14 @@
|
||||
import TurndownService from 'turndown';
|
||||
import { ImageType } from '../readFile/type';
|
||||
// @ts-ignore
|
||||
const turndownPluginGfm = require('joplin-turndown-plugin-gfm');
|
||||
|
||||
export const html2md = (html: string): string => {
|
||||
export const html2md = (
|
||||
html: string
|
||||
): {
|
||||
rawText: string;
|
||||
imageList: ImageType[];
|
||||
} => {
|
||||
const turndownService = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
bulletListMarker: '-',
|
||||
@@ -15,12 +22,32 @@ export const html2md = (html: string): string => {
|
||||
|
||||
try {
|
||||
turndownService.remove(['i', 'script', 'iframe', 'style']);
|
||||
|
||||
turndownService.use(turndownPluginGfm.gfm);
|
||||
|
||||
return turndownService.turndown(html);
|
||||
const base64Regex = /"(data:image\/[^;]+;base64[^"]+)"/g;
|
||||
const imageList: ImageType[] = [];
|
||||
const images = Array.from(html.match(base64Regex) || []);
|
||||
for (const image of images) {
|
||||
const uuid = crypto.randomUUID();
|
||||
const mime = image.split(';')[0].split(':')[1];
|
||||
const base64 = image.split(',')[1];
|
||||
html = html.replace(image, uuid);
|
||||
imageList.push({
|
||||
uuid,
|
||||
base64,
|
||||
mime
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
rawText: turndownService.turndown(html),
|
||||
imageList
|
||||
};
|
||||
} catch (error) {
|
||||
console.log('html 2 markdown error', error);
|
||||
return '';
|
||||
return {
|
||||
rawText: '',
|
||||
imageList: []
|
||||
};
|
||||
}
|
||||
};
|
||||
|
@@ -1,20 +1,39 @@
|
||||
import mammoth from 'mammoth';
|
||||
import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
|
||||
import mammoth, { images } from 'mammoth';
|
||||
import { ReadRawTextByBuffer, ReadFileResponse, ImageType } from '../type';
|
||||
import { html2md } from '../../htmlStr2Md/utils';
|
||||
|
||||
/**
|
||||
* read docx to markdown
|
||||
*/
|
||||
export const readDocsFile = async ({ buffer }: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
|
||||
const imageList: ImageType[] = [];
|
||||
try {
|
||||
const { value: html } = await mammoth.convertToHtml({
|
||||
buffer
|
||||
});
|
||||
const { value: html } = await mammoth.convertToHtml(
|
||||
{
|
||||
buffer
|
||||
},
|
||||
{
|
||||
convertImage: images.imgElement(async (image) => {
|
||||
const imageBase64 = await image.readAsBase64String();
|
||||
const uuid = crypto.randomUUID();
|
||||
const mime = image.contentType;
|
||||
imageList.push({
|
||||
uuid,
|
||||
base64: imageBase64,
|
||||
mime
|
||||
});
|
||||
return {
|
||||
src: uuid
|
||||
};
|
||||
})
|
||||
}
|
||||
);
|
||||
|
||||
const rawText = html2md(html);
|
||||
const { rawText } = html2md(html);
|
||||
|
||||
return {
|
||||
rawText
|
||||
rawText,
|
||||
imageList
|
||||
};
|
||||
} catch (error) {
|
||||
console.log('error doc read:', error);
|
||||
|
@@ -5,9 +5,10 @@ import { html2md } from '../../htmlStr2Md/utils';
|
||||
export const readHtmlRawText = async (params: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
|
||||
const { rawText: html } = readFileRawText(params);
|
||||
|
||||
const rawText = html2md(html);
|
||||
const { rawText, imageList } = html2md(html);
|
||||
|
||||
return {
|
||||
rawText
|
||||
rawText,
|
||||
imageList
|
||||
};
|
||||
};
|
||||
|
7
packages/service/worker/readFile/type.d.ts
vendored
7
packages/service/worker/readFile/type.d.ts
vendored
@@ -8,7 +8,14 @@ export type ReadRawTextProps<T> = {
|
||||
|
||||
export type ReadRawTextByBuffer = ReadRawTextProps<Buffer>;
|
||||
|
||||
export type ImageType = {
|
||||
uuid: string;
|
||||
base64: string;
|
||||
mime: string;
|
||||
};
|
||||
|
||||
export type ReadFileResponse = {
|
||||
rawText: string;
|
||||
formatText?: string;
|
||||
imageList?: ImageType[];
|
||||
};
|
||||
|
Reference in New Issue
Block a user