mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
fix: 网页抓取:正确处理//开头的超链接 (#2803)
Co-authored-by: zhenyiwang <zhenyiwang@intl.zju.edu.cn>
This commit is contained in:
@@ -14,6 +14,7 @@ export const cheerioToHtml = ({
|
|||||||
}) => {
|
}) => {
|
||||||
// get origin url
|
// get origin url
|
||||||
const originUrl = new URL(fetchUrl).origin;
|
const originUrl = new URL(fetchUrl).origin;
|
||||||
|
const protocol = new URL(fetchUrl).protocol; // http: or https:
|
||||||
|
|
||||||
const usedSelector = selector || 'body';
|
const usedSelector = selector || 'body';
|
||||||
const selectDom = $(usedSelector);
|
const selectDom = $(usedSelector);
|
||||||
@@ -32,14 +33,22 @@ export const cheerioToHtml = ({
|
|||||||
// if link,img startWith /, add origin url
|
// if link,img startWith /, add origin url
|
||||||
selectDom.find('a').each((i, el) => {
|
selectDom.find('a').each((i, el) => {
|
||||||
const href = $(el).attr('href');
|
const href = $(el).attr('href');
|
||||||
if (href && href.startsWith('/')) {
|
if (href) {
|
||||||
$(el).attr('href', originUrl + href);
|
if (href.startsWith('//')) {
|
||||||
|
$(el).attr('href', protocol + href);
|
||||||
|
} else if (href.startsWith('/')) {
|
||||||
|
$(el).attr('href', originUrl + href);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
selectDom.find('img').each((i, el) => {
|
selectDom.find('img').each((i, el) => {
|
||||||
const src = $(el).attr('src');
|
const src = $(el).attr('src');
|
||||||
if (src && src.startsWith('/')) {
|
if (src) {
|
||||||
$(el).attr('src', originUrl + src);
|
if (src.startsWith('//')) {
|
||||||
|
$(el).attr('src', protocol + src);
|
||||||
|
} else if (src.startsWith('/')) {
|
||||||
|
$(el).attr('src', originUrl + src);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user