update text splitter (#3020)

This commit is contained in:
Archer
2024-10-30 01:10:35 +08:00
committed by shilin66
parent 78f44b936f
commit 6e84cb5fd9
12 changed files with 83 additions and 72 deletions

View File

@@ -1,8 +1,12 @@
import { simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
import { WorkerNameEnum, runWorker } from '../../worker/utils';
import { ImageType } from '../../worker/readFile/type';
export const htmlToMarkdown = async (html?: string | null) => {
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
const md = await runWorker<{
rawText: string;
imageList: ImageType[];
}>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
return simpleMarkdownText(md);
return simpleMarkdownText(md.rawText);
};

View File

@@ -34,7 +34,7 @@
"pdfjs-dist": "4.4.168",
"pg": "^8.10.0",
"request-ip": "^3.3.0",
"tiktoken": "^1.0.15",
"tiktoken": "1.0.17",
"tunnel": "^0.0.6",
"turndown": "^7.1.2"
},

View File

@@ -178,11 +178,13 @@ export class WorkerPool<Props = Record<string, any>, Response = any> {
// Worker error, terminate and delete it.Un catch error)
worker.on('error', (err) => {
addLog.warn('Worker error', { err });
console.log(err);
addLog.error('Worker error', err);
this.deleteWorker(workerId);
});
worker.on('messageerror', (err) => {
addLog.warn('Worker error', { err });
console.log(err);
addLog.error('Worker messageerror', err);
this.deleteWorker(workerId);
});