mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-18 01:16:01 +00:00
feat: Text check before synchronization (#689)
* fix: icon * fix: web selector * fix: web selector * perf: link sync * dev doc * chomd doc * perf: git intro * 466 intro * intro img * add json editor (#5) * team limit * websync limit * json editor * text editor * perf: search test * change cq value type * doc * intro img --------- Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -15,7 +15,8 @@ export const cheerioToHtml = ({
|
||||
// get origin url
|
||||
const originUrl = new URL(fetchUrl).origin;
|
||||
|
||||
const selectDom = $(selector || 'body');
|
||||
const usedSelector = selector || 'body';
|
||||
const selectDom = $(usedSelector);
|
||||
|
||||
// remove i element
|
||||
selectDom.find('i,script').remove();
|
||||
@@ -49,7 +50,10 @@ export const cheerioToHtml = ({
|
||||
.get()
|
||||
.join('\n');
|
||||
|
||||
return html;
|
||||
return {
|
||||
html,
|
||||
usedSelector
|
||||
};
|
||||
};
|
||||
export const urlsFetch = async ({
|
||||
urlList,
|
||||
@@ -66,25 +70,25 @@ export const urlsFetch = async ({
|
||||
});
|
||||
|
||||
const $ = cheerio.load(fetchRes.data);
|
||||
|
||||
const md = await htmlToMarkdown(
|
||||
cheerioToHtml({
|
||||
fetchUrl: url,
|
||||
$,
|
||||
selector
|
||||
})
|
||||
);
|
||||
const { html, usedSelector } = cheerioToHtml({
|
||||
fetchUrl: url,
|
||||
$,
|
||||
selector
|
||||
});
|
||||
const md = await htmlToMarkdown(html);
|
||||
|
||||
return {
|
||||
url,
|
||||
content: md
|
||||
content: md,
|
||||
selector: usedSelector
|
||||
};
|
||||
} catch (error) {
|
||||
console.log(error, 'fetch error');
|
||||
|
||||
return {
|
||||
url,
|
||||
content: ''
|
||||
content: '',
|
||||
selector: ''
|
||||
};
|
||||
}
|
||||
})
|
||||
|
@@ -21,6 +21,9 @@ export const htmlToMarkdown = (html?: string | null) =>
|
||||
worker.terminate();
|
||||
reject(err);
|
||||
});
|
||||
worker.on('exit', (code) => {
|
||||
console.log('html 2 md finish', code);
|
||||
});
|
||||
|
||||
worker.postMessage(html);
|
||||
});
|
||||
|
Reference in New Issue
Block a user