mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-24 13:53:50 +00:00

* fix: upload file * chore: remove wasm, support html image parse * chore: adjust * chore: move base64match function into htmlstr2md
54 lines
1.3 KiB
TypeScript
54 lines
1.3 KiB
TypeScript
import TurndownService from 'turndown';
|
|
import { ImageType } from '../readFile/type';
|
|
// @ts-ignore
|
|
const turndownPluginGfm = require('joplin-turndown-plugin-gfm');
|
|
|
|
export const html2md = (
|
|
html: string
|
|
): {
|
|
rawText: string;
|
|
imageList: ImageType[];
|
|
} => {
|
|
const turndownService = new TurndownService({
|
|
headingStyle: 'atx',
|
|
bulletListMarker: '-',
|
|
codeBlockStyle: 'fenced',
|
|
fence: '```',
|
|
emDelimiter: '_',
|
|
strongDelimiter: '**',
|
|
linkStyle: 'inlined',
|
|
linkReferenceStyle: 'full'
|
|
});
|
|
|
|
try {
|
|
turndownService.remove(['i', 'script', 'iframe', 'style']);
|
|
turndownService.use(turndownPluginGfm.gfm);
|
|
|
|
const base64Regex = /"(data:image\/[^;]+;base64[^"]+)"/g;
|
|
const imageList: ImageType[] = [];
|
|
const images = Array.from(html.match(base64Regex) || []);
|
|
for (const image of images) {
|
|
const uuid = crypto.randomUUID();
|
|
const mime = image.split(';')[0].split(':')[1];
|
|
const base64 = image.split(',')[1];
|
|
html = html.replace(image, uuid);
|
|
imageList.push({
|
|
uuid,
|
|
base64,
|
|
mime
|
|
});
|
|
}
|
|
|
|
return {
|
|
rawText: turndownService.turndown(html),
|
|
imageList
|
|
};
|
|
} catch (error) {
|
|
console.log('html 2 markdown error', error);
|
|
return {
|
|
rawText: '',
|
|
imageList: []
|
|
};
|
|
}
|
|
};
|