feat: Text check before synchronization (#689)

* fix: icon

* fix: web selector

* fix: web selector

* perf: link sync

* dev doc

* chomd doc

* perf: git intro

* 466 intro

* intro img

* add json editor (#5)

* team limit

* websync limit

* json editor

* text editor

* perf: search test

* change cq value type

* doc

* intro img

---------

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-01-04 23:19:24 +08:00
committed by GitHub
parent c2abbb579f
commit 828829011a
64 changed files with 1789 additions and 1489 deletions

View File

@@ -15,7 +15,8 @@ export const cheerioToHtml = ({
// get origin url
const originUrl = new URL(fetchUrl).origin;
const selectDom = $(selector || 'body');
const usedSelector = selector || 'body';
const selectDom = $(usedSelector);
// remove i element
selectDom.find('i,script').remove();
@@ -49,7 +50,10 @@ export const cheerioToHtml = ({
.get()
.join('\n');
return html;
return {
html,
usedSelector
};
};
export const urlsFetch = async ({
urlList,
@@ -66,25 +70,25 @@ export const urlsFetch = async ({
});
const $ = cheerio.load(fetchRes.data);
const md = await htmlToMarkdown(
cheerioToHtml({
fetchUrl: url,
$,
selector
})
);
const { html, usedSelector } = cheerioToHtml({
fetchUrl: url,
$,
selector
});
const md = await htmlToMarkdown(html);
return {
url,
content: md
content: md,
selector: usedSelector
};
} catch (error) {
console.log(error, 'fetch error');
return {
url,
content: ''
content: '',
selector: ''
};
}
})

View File

@@ -21,6 +21,9 @@ export const htmlToMarkdown = (html?: string | null) =>
worker.terminate();
reject(err);
});
worker.on('exit', (code) => {
console.log('html 2 md finish', code);
});
worker.postMessage(html);
});