Python Sandbox (#4380)

* Python3 Sandbox (#3944)

* update python box (#4251)

* update python box

* Adjust the height of the NodeCode border.

* update python sandbox and add test systemcall bash

* update sandbox

* add VERSION_RELEASE (#4376)

* save empty docx

* fix pythonbox log error

* fix: js template

---------

Co-authored-by: dogfar <37035781+dogfar@users.noreply.github.com>
Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com>
Co-authored-by: gggaaallleee <1293587368@qq.com>
This commit is contained in:
Archer
2025-03-28 13:45:09 +08:00
committed by GitHub
parent 8323c2d27e
commit 565a966d19
23 changed files with 777 additions and 92 deletions

View File

@@ -0,0 +1,80 @@
name: spider
version: "2.2"
services:
searxng:
container_name: searxng
image: docker.io/searxng/searxng:latest
platform: linux/amd64
restart: unless-stopped
networks:
- spider_net
ports:
- "8080:8080"
volumes:
- ./searxng:/etc/searxng:rw
environment:
- SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/
- UWSGI_WORKERS=4 # UWSGI 工作进程数
- UWSGI_THREADS=4 # UWSGI 线程数
cap_drop:
- ALL
mongodb:
container_name: mongodb
image: mongo:4.4
restart: unless-stopped
networks:
- spider_net
ports:
- "27017:27017"
volumes:
- mongo-data:/data/db
environment:
MONGO_INITDB_ROOT_USERNAME: root # MongoDB 根用户名
MONGO_INITDB_ROOT_PASSWORD: example # MongoDB 根用户密码
nodeapp:
container_name: main
platform: linux/amd64
#build:
# context: .
image: gggaaallleee/webcrawler-test-new:latest
ports:
- "3000:3000"
networks:
- spider_net
depends_on:
- mongodb
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
volumes:
- /dev/shm:/dev/shm
environment:
- ACCESS_TOKEN=webcrawler # 访问令牌
- DETECT_WEBSITE=zhuanlan.zhihu.com # 无法处理跳过的网站
- STRATEGIES=[{"waitUntil":"networkidle0","timeout":5000},{"waitUntil":"networkidle2","timeout":10000},{"waitUntil":"load","timeout":15000}] # 页面加载策略
- PORT=3000
- MAX_CONCURRENCY=10 # 最大并发数
- NODE_ENV=development
- ENGINE_BAIDUURL=https://www.baidu.com/s # 百度搜索引擎 URL
- ENGINE_SEARCHXNGURL=http://searxng:8080/search # Searxng 搜索引擎 URL
- MONGODB_URI=mongodb://root:example@mongodb:27017 # MongoDB 连接 URI
- BLACKLIST=[".gov.cn",".edu.cn"] # 受保护域名
- STD_TTL=3600 # 标准 TTL
- EXPIRE_AFTER_SECONDS=9000 # 过期时间(秒)
#- VALIDATE_PROXY=[{"ip":"","port":},{"ip":"","port":}] #代理池
deploy:
resources:
limits:
memory: 4G
cpus: '2.0'
networks:
spider_net:
volumes:
mongo-data:

View File

@@ -0,0 +1,6 @@
# This configuration file updates the default configuration file
# See https://github.com/searxng/searxng/blob/master/searx/limiter.toml
[botdetection.ip_limit]
# activate link_token method in the ip_limit method
link_token = true

View File

@@ -0,0 +1,122 @@
general:
debug: false
instance_name: "searxng"
privacypolicy_url: false
donation_url: false
contact_url: false
enable_metrics: true
open_metrics: ''
brand:
new_issue_url: https://github.com/searxng/searxng/issues/new
docs_url: https://docs.searxng.org/
public_instances: https://searx.space
wiki_url: https://github.com/searxng/searxng/wiki
issue_url: https://github.com/searxng/searxng/issues
search:
safe_search: 0
autocomplete: ""
autocomplete_min: 4
default_lang: "auto"
ban_time_on_fail: 5
max_ban_time_on_fail: 120
formats:
- html
server:
port: 8080
bind_address: "0.0.0.0"
base_url: false
limiter: false
public_instance: false
secret_key: "example"
image_proxy: false
http_protocol_version: "1.0"
method: "POST"
default_http_headers:
X-Content-Type-Options: nosniff
X-Download-Options: noopen
X-Robots-Tag: noindex, nofollow
Referrer-Policy: no-referrer
redis:
url: false
ui:
static_path: ""
static_use_hash: false
templates_path: ""
default_theme: simple
default_locale: ""
query_in_title: false
infinite_scroll: false
center_alignment: false
theme_args:
simple_style: auto
# 启用 cn 分类
enabled_categories: [cn,en, general, images,en]
# 或者定义分类显示顺序
categories_order: [cn, en,general, images]
outgoing:
request_timeout: 30.0
max_request_timeout: 40.0
pool_connections: 200
pool_maxsize: 50
enable_http2: false
retries: 5
engines:
- name: bing
engine: bing
disabled: false
categories: cn
#- name: bilibili
# engine: bilibili
# shortcut: bil
# disabled: false
# categories: cn
- name : baidu
engine : json_engine
paging : True
first_page_num : 0
search_url : https://www.baidu.com/s?tn=json&wd={query}&pn={pageno}&rn=50
url_query : url
title_query : title
content_query : abs
categories : cn
- name : 360search
engine: 360search
disabled: false
categories: cn
- name : sogou
disabled: false
categories: cn
- name: google
disabled: false
categories: en
- name: yahoo
disabled: false
categories: en
- name: duckduckgo
disabled: false
categories: en
search:
formats:
- html
- json
doi_resolvers:
oadoi.org: 'https://oadoi.org/'
doi.org: 'https://doi.org/'
doai.io: 'https://dissem.in/'
sci-hub.se: 'https://sci-hub.se/'
sci-hub.st: 'https://sci-hub.st/'
sci-hub.ru: 'https://sci-hub.ru/'
default_doi_resolver: 'oadoi.org'