mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 13:03:50 +00:00

* update: Add type * fix: update import statement for NextApiRequest type * fix: update imports to use type for LexicalEditor and EditorState * Refactor imports to use 'import type' for type-only imports across multiple files - Updated imports in various components and API files to use 'import type' for better clarity and to optimize TypeScript's type checking. - Ensured consistent usage of type imports in files related to chat, dataset, workflow, and user management. - Improved code readability and maintainability by distinguishing between value and type imports. * refactor: remove old ESLint configuration and add new rules - Deleted the old ESLint configuration file from the app project. - Added a new ESLint configuration file with updated rules and settings. - Changed imports to use type-only imports in various files for better clarity and performance. - Updated TypeScript configuration to remove unnecessary options. - Added an ESLint ignore file to exclude build and dependency directories from linting. * fix: update imports to use 'import type' for type-only imports in schema files
63 lines
1.6 KiB
TypeScript
63 lines
1.6 KiB
TypeScript
import TurndownService from 'turndown';
|
|
import { type ImageType } from '../readFile/type';
|
|
import { matchMdImg } from '@fastgpt/global/common/string/markdown';
|
|
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
|
// @ts-ignore
|
|
const turndownPluginGfm = require('joplin-turndown-plugin-gfm');
|
|
|
|
const processBase64Images = (htmlContent: string) => {
|
|
const base64Regex = /src="data:([^;]+);base64,([^"]+)"/g;
|
|
const images: ImageType[] = [];
|
|
|
|
const processedHtml = htmlContent.replace(base64Regex, (match, mime, base64Data) => {
|
|
const uuid = `IMAGE_${getNanoid(12)}_IMAGE`;
|
|
images.push({
|
|
uuid,
|
|
base64: base64Data,
|
|
mime
|
|
});
|
|
return `src="${uuid}"`;
|
|
});
|
|
|
|
return { processedHtml, images };
|
|
};
|
|
|
|
export const html2md = (
|
|
html: string
|
|
): {
|
|
rawText: string;
|
|
imageList: ImageType[];
|
|
} => {
|
|
const turndownService = new TurndownService({
|
|
headingStyle: 'atx',
|
|
bulletListMarker: '-',
|
|
codeBlockStyle: 'fenced',
|
|
fence: '```',
|
|
emDelimiter: '_',
|
|
strongDelimiter: '**',
|
|
linkStyle: 'inlined',
|
|
linkReferenceStyle: 'full'
|
|
});
|
|
|
|
try {
|
|
turndownService.remove(['i', 'script', 'iframe', 'style']);
|
|
turndownService.use(turndownPluginGfm.gfm);
|
|
|
|
// Base64 img to id, otherwise it will occupy memory when going to md
|
|
const { processedHtml, images } = processBase64Images(html);
|
|
const md = turndownService.turndown(processedHtml);
|
|
const { text, imageList } = matchMdImg(md);
|
|
|
|
return {
|
|
rawText: text,
|
|
imageList: [...images, ...imageList]
|
|
};
|
|
} catch (error) {
|
|
console.log('html 2 markdown error', error);
|
|
return {
|
|
rawText: '',
|
|
imageList: []
|
|
};
|
|
}
|
|
};
|