From 113e8f711f83939b471a8d804cb3db7b7f4b1584 Mon Sep 17 00:00:00 2001 From: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com> Date: Sun, 2 Mar 2025 17:50:03 +0800 Subject: [PATCH] add env proxypool (#3939) --- plugins/webcrawler/SPIDER/.env.example | 4 +- .../webcrawler/SPIDER/src/utils/setupPage.ts | 37 +++++-------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/plugins/webcrawler/SPIDER/.env.example b/plugins/webcrawler/SPIDER/.env.example index f97db78a4..0b928ac73 100644 --- a/plugins/webcrawler/SPIDER/.env.example +++ b/plugins/webcrawler/SPIDER/.env.example @@ -18,4 +18,6 @@ MONGODB_URI=mongodb://root:example@mongodb:27017 BLACKLIST = [".gov.cn",".edu.cn"] STD_TTL=3600 -EXPIRE_AFTER_SECONDS=9000 \ No newline at end of file +EXPIRE_AFTER_SECONDS=9000 + +#VALIDATE_PROXY=[{"ip":"","port":},{"ip":"","port":}] \ No newline at end of file diff --git a/plugins/webcrawler/SPIDER/src/utils/setupPage.ts b/plugins/webcrawler/SPIDER/src/utils/setupPage.ts index 9bebb3b0c..e95efa1ec 100644 --- a/plugins/webcrawler/SPIDER/src/utils/setupPage.ts +++ b/plugins/webcrawler/SPIDER/src/utils/setupPage.ts @@ -1,6 +1,8 @@ import { Page } from 'puppeteer'; import randomUseragent from 'random-useragent'; +import dotenv from 'dotenv'; +dotenv.config(); const getRandomUserAgent = () => { return randomUseragent.getRandom(); }; @@ -11,31 +13,10 @@ const getRandomPlatform = () => { }; //代理池 -const validateproxy = [ - { ip: "39.102.210.222", port: 8080 }, - { ip: "8.130.71.75", port: 8080 }, - { ip: "39.102.214.208", port: 9999 }, - { ip: "39.104.59.56", port: 8080 }, - { ip: "8.130.37.235", port: 3128 }, - { ip: "8.138.131.110", port: 8080 }, - { ip: "8.140.105.75", port: 8009 }, - { ip: "114.80.38.120", port: 3081 }, - { ip: "8.148.23.165", port: 8081 }, - { ip: "119.96.72.199", port: 59394 }, - { ip: "120.55.14.137", port: 80 }, - { ip: "47.116.181.146", port: 5060 }, - { ip: "39.102.214.199", port: 3128 }, - { ip: "47.121.183.107", port: 8080 }, - { ip: "39.104.16.201", port: 8080 }, - { ip: "39.102.209.163", port: 10002 }, - { ip: "101.201.76.157", port: 9090 }, - { ip: "122.224.124.26", port: 12080 }, - { ip: "180.105.244.199", port: 1080 }, - { ip: "119.3.113.150", port: 9094 } -]; +const validateproxy = process.env.VALIDATE_PROXY ? JSON.parse(process.env.VALIDATE_PROXY) : []; const getRandomProxy = () => { - return validateproxy[Math.floor(Math.random() * validateproxy.length)]; + return validateproxy.length > 0 ? validateproxy[Math.floor(Math.random() * validateproxy.length)] : null; }; const getRandomLanguages = () => { @@ -49,10 +30,12 @@ const getRandomLanguages = () => { export const setupPage = async (page: Page): Promise => { const proxy = getRandomProxy(); - await page.authenticate({ - username: proxy.ip, - password: proxy.port.toString() - }); + if (proxy) { + await page.authenticate({ + username: proxy.ip, + password: proxy.port.toString() + }); + } await page.evaluateOnNewDocument(() => { const newProto = (navigator as any).__proto__;