name: spider
version: "2.2"

services:
  searxng:
    container_name: searxng
    image: docker.io/searxng/searxng:latest
    platform: linux/amd64
    restart: unless-stopped
    networks:
      - spider_net
    ports:
      - "8080:8080"
    volumes:
      - ./searxng:/etc/searxng:rw
    environment:
      - SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/
      - UWSGI_WORKERS=4 # UWSGI 工作进程数
      - UWSGI_THREADS=4 # UWSGI 线程数
    cap_drop:
      - ALL

  mongodb:
    container_name: mongodb
    image: mongo:4.4
    restart: unless-stopped
    networks:
      - spider_net
    ports:
      - "27017:27017"
    volumes:
      - mongo-data:/data/db
    environment:
      MONGO_INITDB_ROOT_USERNAME: root # MongoDB 根用户名
      MONGO_INITDB_ROOT_PASSWORD: example # MongoDB 根用户密码

  nodeapp:
    container_name: main
    platform: linux/amd64
    #build:
    #  context: . 
    image: gggaaallleee/webcrawler-test-new:latest
    ports:
      - "3000:3000"
    networks:
      - spider_net
    depends_on:
      - mongodb
    logging:
      driver: "json-file"
      options:
        max-size: "1m"
        max-file: "1"
    volumes:
      - /dev/shm:/dev/shm
    environment:
      - ACCESS_TOKEN=webcrawler # 访问令牌
      - DETECT_WEBSITE=zhuanlan.zhihu.com # 无法处理跳过的网站
      - STRATEGIES=[{"waitUntil":"networkidle0","timeout":5000},{"waitUntil":"networkidle2","timeout":10000},{"waitUntil":"load","timeout":15000}] # 页面加载策略
      - PORT=3000
      - MAX_CONCURRENCY=10 # 最大并发数
      - NODE_ENV=development
      - ENGINE_BAIDUURL=https://www.baidu.com/s  # 百度搜索引擎 URL
      - ENGINE_SEARCHXNGURL=http://searxng:8080/search # Searxng 搜索引擎 URL
      - MONGODB_URI=mongodb://root:example@mongodb:27017 # MongoDB 连接 URI
      - BLACKLIST=[".gov.cn",".edu.cn"] # 受保护域名
      - STD_TTL=3600 # 标准 TTL（秒）
      - EXPIRE_AFTER_SECONDS=9000 # 过期时间（秒）
     #- VALIDATE_PROXY=[{"ip":"","port":},{"ip":"","port":}] #代理池
    deploy:
        resources:
          limits:
            memory: 4G
            cpus: '2.0'

networks:
  spider_net:

volumes:
  mongo-data: