fix: Check the url to avoid ssrf attacks (#3965)

* fix: Check the url to avoid ssrf attacks

* Delete docSite/content/zh-cn/docs/development/upgrading/490.md
This commit is contained in:
Archer
2025-03-04 14:45:29 +08:00
committed by GitHub
parent e860c56b77
commit b4dda6a41b
4 changed files with 208 additions and 53 deletions

View File

@@ -2,6 +2,7 @@ import { UrlFetchParams, UrlFetchResponse } from '@fastgpt/global/common/file/ap
import * as cheerio from 'cheerio';
import axios from 'axios';
import { htmlToMarkdown } from './utils';
import { isInternalAddress } from '../system/utils';
export const cheerioToHtml = ({
fetchUrl,
@@ -75,6 +76,16 @@ export const urlsFetch = async ({
const response = await Promise.all(
urlList.map(async (url) => {
const isInternal = isInternalAddress(url);
if (isInternal) {
return {
url,
title: '',
content: 'Cannot fetch internal url',
selector: ''
};
}
try {
const fetchRes = await axios.get(url, {
timeout: 30000

View File

@@ -0,0 +1,63 @@
import { SERVICE_LOCAL_HOST } from './tools';
export const isInternalAddress = (url: string): boolean => {
try {
const parsedUrl = new URL(url);
const hostname = parsedUrl.hostname;
const fullUrl = parsedUrl.toString();
// Check for localhost and common internal domains
if (hostname === SERVICE_LOCAL_HOST) {
return true;
}
// Metadata endpoints whitelist
const metadataEndpoints = [
// AWS
'http://169.254.169.254/latest/meta-data/',
// Azure
'http://169.254.169.254/metadata/instance?api-version=2021-02-01',
// GCP
'http://metadata.google.internal/computeMetadata/v1/',
// Alibaba Cloud
'http://100.100.100.200/latest/meta-data/',
// Tencent Cloud
'http://metadata.tencentyun.com/latest/meta-data/',
// Huawei Cloud
'http://169.254.169.254/latest/meta-data/'
];
if (metadataEndpoints.some((endpoint) => fullUrl.startsWith(endpoint))) {
return true;
}
// For non-metadata URLs, check if it's a domain name
const ipv4Pattern = /^(\d{1,3}\.){3}\d{1,3}$/;
if (!ipv4Pattern.test(hostname)) {
return true;
}
// ... existing IP validation code ...
const parts = hostname.split('.').map(Number);
if (parts.length !== 4 || parts.some((part) => part < 0 || part > 255)) {
return false;
}
// Only allow public IP ranges
return (
parts[0] !== 0 &&
parts[0] !== 10 &&
parts[0] !== 127 &&
!(parts[0] === 169 && parts[1] === 254) &&
!(parts[0] === 172 && parts[1] >= 16 && parts[1] <= 31) &&
!(parts[0] === 192 && parts[1] === 168) &&
!(parts[0] >= 224 && parts[0] <= 239) &&
!(parts[0] >= 240 && parts[0] <= 255) &&
!(parts[0] === 100 && parts[1] >= 64 && parts[1] <= 127) &&
!(parts[0] === 9 && parts[1] === 0) &&
!(parts[0] === 11 && parts[1] === 0)
);
} catch {
return false; // If URL parsing fails, reject it as potentially unsafe
}
};