From d23c72690e122ab3ae4541f9b40471b8dfa9a7ef Mon Sep 17 00:00:00 2001 From: yanzhicong <143411362+mryanzhicong@users.noreply.github.com> Date: Mon, 13 Apr 2026 20:41:33 +0800 Subject: [PATCH] feat: add openGauss DataVec as vector database backend (#6666) * feat: add openGauss DataVec as vector database backend Add openGauss with DataVec extension as a new vector storage option alongside PGVector and Milvus. Includes vector DB controller, Docker Compose deployment configs (CN/Global), deploy generation scripts, and test templates. * test: add opengauss vectorDB integration entry * test: adjust vector env template for opengauss run * fix: ts --------- Co-authored-by: archer <545436317@qq.com> --- deploy/docker/cn/docker-compose.opengauss.yml | 355 ++++++++++++++++++ .../global/docker-compose.opengauss.yml | 355 ++++++++++++++++++ deploy/init.mjs | 20 +- deploy/templates/vector/opengauss.txt | 20 + .../docker/cn/docker-compose.opengauss.yml | 355 ++++++++++++++++++ .../global/docker-compose.opengauss.yml | 355 ++++++++++++++++++ packages/service/common/vectorDB/constants.ts | 1 + .../service/common/vectorDB/controller.ts | 10 +- .../common/vectorDB/opengauss/controller.ts | 201 ++++++++++ .../common/vectorDB/opengauss/index.ts | 208 ++++++++++ packages/service/type/env.ts | 1 + projects/app/.env.template | 4 +- test/.env.test.tempalte | 4 +- test/integrationTest/vectorDB/README.md | 1 + .../opengauss/index.integration.test.ts | 15 + 15 files changed, 1901 insertions(+), 4 deletions(-) create mode 100644 deploy/docker/cn/docker-compose.opengauss.yml create mode 100644 deploy/docker/global/docker-compose.opengauss.yml create mode 100644 deploy/templates/vector/opengauss.txt create mode 100644 document/public/deploy/docker/cn/docker-compose.opengauss.yml create mode 100644 document/public/deploy/docker/global/docker-compose.opengauss.yml create mode 100644 packages/service/common/vectorDB/opengauss/controller.ts create mode 100644 packages/service/common/vectorDB/opengauss/index.ts create mode 100644 test/integrationTest/vectorDB/opengauss/index.integration.test.ts diff --git a/deploy/docker/cn/docker-compose.opengauss.yml b/deploy/docker/cn/docker-compose.opengauss.yml new file mode 100644 index 0000000000..59e23038bb --- /dev/null +++ b/deploy/docker/cn/docker-compose.opengauss.yml @@ -0,0 +1,355 @@ +# 用于部署的 docker-compose 文件: +# - FastGPT 端口映射为 3000:3000 +# - FastGPT-mcp-server 端口映射 3005:3000 +# - 建议修改账密后再运行 + +# root 默认密码(重启后会强制重置该密码成环境变量值) +x-default-root-psw: &x-default-root-psw '1234' +# 系统最高密钥凭证 +x-system-key: &x-system-key 'fastgpt-xxx' +# plugin auth token +x-plugin-auth-token: &x-plugin-auth-token 'token' +# code sandbox token +x-code-sandbox-token: &x-code-sandbox-token 'codesandbox' +# aiproxy token +x-aiproxy-token: &x-aiproxy-token 'token' +# 数据库连接相关配置 +x-share-db-config: &x-share-db-config + MONGODB_URI: mongodb://myusername:mypassword@mongo:27017/fastgpt?authSource=admin + DB_MAX_LINK: 100 + REDIS_URL: redis://default:mypassword@redis:6379 + # @see https://doc.fastgpt.cn/docs/self-host/config/object-storage + STORAGE_VENDOR: minio # minio | aws-s3 | cos | oss + STORAGE_REGION: us-east-1 + STORAGE_ACCESS_KEY_ID: minioadmin + STORAGE_SECRET_ACCESS_KEY: minioadmin + STORAGE_PUBLIC_BUCKET: fastgpt-public + STORAGE_PRIVATE_BUCKET: fastgpt-private + STORAGE_EXTERNAL_ENDPOINT: http://192.168.0.2:9000 # 一个服务器和客户端均可访问到存储桶的地址,可以是固定的宿主机 IP 或者域名,注意不要填写成 127.0.0.1 或者 localhost 等本地回环地址(因为容器里无法使用) + STORAGE_S3_ENDPOINT: http://fastgpt-minio:9000 # 协议://域名(IP):端口 + STORAGE_S3_FORCE_PATH_STYLE: true + STORAGE_S3_MAX_RETRIES: 3 +# Log 配置 +x-log-config: &x-log-config + LOG_ENABLE_CONSOLE: true + LOG_CONSOLE_LEVEL: debug + LOG_ENABLE_OTEL: false + LOG_OTEL_LEVEL: info + LOG_OTEL_URL: http://localhost:4318/v1/logs + +# 向量库相关配置(openGauss DataVec,使用独立环境变量) +x-vec-config: &x-vec-config + OPENGAUSS_URL: postgresql://gaussdb:FastGPT@123@opengauss:5432/fastgpt + +version: '3.3' +services: + # Vector DB - openGauss + vectorDB: + image: opengauss/opengauss:7.0.0-RC1 + container_name: opengauss + restart: always + privileged: true + networks: + - fastgpt + environment: + # 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果 + - GS_USERNAME=gaussdb # 默认会创建 gaussdb 用户 + - GS_PASSWORD=FastGPT@123 # 密码必须包含大写、小写、数字和特殊字符,且长度不少于8位 + - GS_DB=fastgpt # 默认会创建 postgres 数据库,这里以 fastgpt 为例 + volumes: + - ./opengauss/data:/var/lib/opengauss + healthcheck: + test: ['CMD-SHELL', 'su - omm -c "gsql -d postgres -p 5432 -c \"SELECT 1\""'] + interval: 10s + timeout: 5s + retries: 10 + start_period: 30s + mongo: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mongo:5.0.32 # cpu 不支持 AVX 时候使用 4.4.29 + container_name: mongo + restart: always + networks: + - fastgpt + command: mongod --keyFile /data/mongodb.key --replSet rs0 + environment: + - MONGO_INITDB_ROOT_USERNAME=myusername + - MONGO_INITDB_ROOT_PASSWORD=mypassword + volumes: + - ./mongo/data:/data/db + healthcheck: + test: ['CMD', 'mongo', '-u', 'myusername', '-p', 'mypassword', '--authenticationDatabase', 'admin', '--eval', "db.adminCommand('ping')"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + entrypoint: + - bash + - -c + - | + openssl rand -base64 128 > /data/mongodb.key + chmod 400 /data/mongodb.key + chown 999:999 /data/mongodb.key + echo 'const isInited = rs.status().ok === 1 + if(!isInited){ + rs.initiate({ + _id: "rs0", + members: [ + { _id: 0, host: "mongo:27017" } + ] + }) + }' > /data/initReplicaSet.js + # 启动MongoDB服务 + exec docker-entrypoint.sh "$$@" & + + # 等待MongoDB服务启动 + until mongo -u myusername -p mypassword --authenticationDatabase admin --eval "print('waited for connection')"; do + echo "Waiting for MongoDB to start..." + sleep 2 + done + + # 执行初始化副本集的脚本 + mongo -u myusername -p mypassword --authenticationDatabase admin /data/initReplicaSet.js + + # 等待docker-entrypoint.sh脚本执行的MongoDB服务进程 + wait $$! + redis: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/redis:7.2-alpine + container_name: redis + networks: + - fastgpt + restart: always + command: | + redis-server --requirepass mypassword --loglevel warning --maxclients 10000 --appendonly yes --save 60 10 --maxmemory 4gb --maxmemory-policy noeviction + healthcheck: + test: ['CMD', 'redis-cli', '-a', 'mypassword', 'ping'] + interval: 10s + timeout: 3s + retries: 3 + start_period: 30s + volumes: + - ./redis/data:/data + fastgpt-minio: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/minio:RELEASE.2025-09-07T16-13-09Z + container_name: fastgpt-minio + restart: always + ports: + - 9000:9000 + - 9001:9001 + networks: + - fastgpt + environment: + - MINIO_ROOT_USER=minioadmin + - MINIO_ROOT_PASSWORD=minioadmin + volumes: + - ./fastgpt-minio:/data + command: server /data --console-address ":9001" + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live'] + interval: 30s + timeout: 20s + retries: 3 + + fastgpt: + container_name: fastgpt + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.14.9.3 # git + ports: + - 3000:3000 + networks: + - fastgpt + depends_on: + - mongo + - code-sandbox + - vectorDB + restart: always + environment: + <<: [*x-share-db-config, *x-vec-config, *x-log-config] + # ==================== 基础配置 ==================== + # 前端外部可访问的地址,用于自动补全文件资源路径。例如 https:fastgpt.cn,不能填 localhost。这个值可以不填,不填则发给模型的图片会是一个相对路径,而不是全路径,模型可能伪造Host。 + FE_DOMAIN: http://localhost:3000 + # root key(最高权限) + ROOT_KEY: *x-system-key + # root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。 + DEFAULT_ROOT_PSW: *x-default-root-psw + # 数据库最大连接数 + DB_MAX_LINK: 5 + # 自动同步索引(0 表示不同步) + SYNC_INDEX: 1 + TOKEN_KEY: fastgpt + # 文件阅读时的密钥 + FILE_TOKEN_KEY: filetokenkey + # 密钥加密 key + AES256_SECRET_KEY: fastgptsecret + # 强制将图片转成 base64 传递给模型 + MULTIPLE_DATA_TO_BASE64: true + + # ==================== 服务地址与集成 ==================== + # plugin 地址 + PLUGIN_BASE_URL: http://fastgpt-plugin:3000 + PLUGIN_TOKEN: *x-plugin-auth-token + # code-sandbox 地址 + SANDBOX_URL: http://code-sandbox:3000 + SANDBOX_TOKEN: *x-code-sandbox-token + # AI Proxy 的地址,如果配了该地址,优先使用 + AIPROXY_API_ENDPOINT: http://aiproxy:3000 + # AI Proxy 的 Admin Token,与 AI Proxy 中的环境变量 ADMIN_KEY + AIPROXY_API_TOKEN: *x-aiproxy-token + + # ==================== 日志与监控 ==================== + # 传递给 OTLP 收集器的服务名称 + LOG_OTEL_SERVICE_NAME: fastgpt-client + + # ==================== 安全与运行限制 ==================== + # 启动 IP 限流(true);部分接口启用 IP 限流策略以防止异常请求 + USE_IP_LIMIT: false + # 工作流最大运行次数,避免极端死循环 + WORKFLOW_MAX_RUN_TIMES: 1000 + # 循环最大运行次数,避免极端死循环 + WORKFLOW_MAX_LOOP_TIMES: 100 + # 服务器接收请求的最大大小(MB) + SERVICE_REQUEST_MAX_CONTENT_LENGTH: 10 + # 启用内网 IP 检查 + CHECK_INTERNAL_IP: false + # ==================== 上传与账号策略 ==================== + # 最大上传文件大小(MB) + UPLOAD_FILE_MAX_SIZE: 1000 + # 最大上传文件数量 + UPLOAD_FILE_MAX_AMOUNT: 1000 + # LLM 请求追踪保留时长(小时) + LLM_REQUEST_TRACKING_RETENTION_HOURS: 6 + # ==================== 功能开关与特殊配置 ==================== + # 自定义跨域;不配置时默认允许所有跨域(逗号分割) + ALLOWED_ORIGINS: + # HTML 转 Markdown 最大字符数(超过后不执行转换) + MAX_HTML_TRANSFORM_CHARS: 1000000 + volumes: + - ./config.json:/app/data/config.json + code-sandbox: + container_name: code-sandbox + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.14.9.3 + networks: + - fastgpt + restart: always + environment: + <<: [*x-log-config] + LOG_OTEL_SERVICE_NAME: fastgpt-code-sandbox + SANDBOX_TOKEN: *x-code-sandbox-token + # ===== Resource Limits ===== + # Execution timeout per request (ms) + SANDBOX_MAX_TIMEOUT: 60000 + # Maximum allowed memory per user code execution (MB) + # Note: System automatically adds 50MB for runtime overhead + # Actual process limit = SANDBOX_MAX_MEMORY_MB + 50MB + SANDBOX_MAX_MEMORY_MB: 256 + + # ===== Process Pool ===== + # Number of pre-warmed worker processes (JS + Python) + SANDBOX_POOL_SIZE: 20 + + # ===== Network Request Limits ===== + # Whether to check if the request is to a private network + CHECK_INTERNAL_IP: false + # Maximum number of HTTP requests per execution + SANDBOX_REQUEST_MAX_COUNT: 30 + # Timeout for each outbound HTTP request (ms) + SANDBOX_REQUEST_TIMEOUT: 60000 + # Maximum response body size for outbound requests + SANDBOX_REQUEST_MAX_RESPONSE_MB: 10 + # Maximum request body size for outbound requests (MB) + SANDBOX_REQUEST_MAX_BODY_MB: 5 + + # ===== Module Control ===== + # JS allowed modules whitelist (comma-separated) + SANDBOX_JS_ALLOWED_MODULES: lodash,dayjs,moment,uuid,crypto-js,qs,url,querystring + # Python allowed modules whitelist (comma-separated) + SANDBOX_PYTHON_ALLOWED_MODULES: math,cmath,decimal,fractions,random,statistics,collections,array,heapq,bisect,queue,copy,itertools,functools,operator,string,re,difflib,textwrap,unicodedata,codecs,datetime,time,calendar,_strptime,json,csv,base64,binascii,struct,hashlib,hmac,secrets,uuid,typing,abc,enum,dataclasses,contextlib,pprint,weakref,numpy,pandas,matplotlib + + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/health'] + interval: 30s + timeout: 20s + retries: 3 + fastgpt-mcp-server: + container_name: fastgpt-mcp-server + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.14.9 + networks: + - fastgpt + ports: + - 3005:3000 + restart: always + environment: + <<: [*x-log-config] + FASTGPT_ENDPOINT: http://fastgpt:3000 + fastgpt-plugin: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-plugin:v0.5.5 + container_name: fastgpt-plugin + restart: always + networks: + - fastgpt + environment: + <<: [*x-share-db-config, *x-log-config] + AUTH_TOKEN: *x-plugin-auth-token + # 工具网络请求,最大请求和响应体 + SERVICE_REQUEST_MAX_CONTENT_LENGTH: 10 + # 最大 API 请求体大小 + MAX_API_SIZE: 10 + # 传递给 OTLP 收集器的服务名称 + LOG_OTEL_SERVICE_NAME: fastgpt-plugin + depends_on: + fastgpt-minio: + condition: service_healthy + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/health'] + interval: 30s + timeout: 20s + retries: 3 + # AI Proxy + aiproxy: + image: registry.cn-hangzhou.aliyuncs.com/labring/aiproxy:v0.3.5 + container_name: aiproxy + restart: unless-stopped + depends_on: + aiproxy_pg: + condition: service_healthy + networks: + - fastgpt + - aiproxy + environment: + # 对应 fastgpt 里的AIPROXY_API_TOKEN + ADMIN_KEY: *x-aiproxy-token + # 错误日志详情保存时间(小时) + LOG_DETAIL_STORAGE_HOURS: 1 + # 数据库连接地址 + SQL_DSN: postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy + # 最大重试次数 + RETRY_TIMES: 3 + # 不需要计费 + BILLING_ENABLED: false + # 不需要严格检测模型 + DISABLE_MODEL_CONFIG: true + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status'] + interval: 5s + timeout: 5s + retries: 10 + aiproxy_pg: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:0.8.0-pg15 # docker hub + restart: unless-stopped + container_name: aiproxy_pg + volumes: + - ./aiproxy_pg:/var/lib/postgresql/data + networks: + - aiproxy + environment: + TZ: Asia/Shanghai + POSTGRES_USER: postgres + POSTGRES_DB: aiproxy + POSTGRES_PASSWORD: aiproxy + healthcheck: + test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy'] + interval: 5s + timeout: 5s + retries: 10 +networks: + fastgpt: + aiproxy: + vector: diff --git a/deploy/docker/global/docker-compose.opengauss.yml b/deploy/docker/global/docker-compose.opengauss.yml new file mode 100644 index 0000000000..8caed82105 --- /dev/null +++ b/deploy/docker/global/docker-compose.opengauss.yml @@ -0,0 +1,355 @@ +# 用于部署的 docker-compose 文件: +# - FastGPT 端口映射为 3000:3000 +# - FastGPT-mcp-server 端口映射 3005:3000 +# - 建议修改账密后再运行 + +# root 默认密码(重启后会强制重置该密码成环境变量值) +x-default-root-psw: &x-default-root-psw '1234' +# 系统最高密钥凭证 +x-system-key: &x-system-key 'fastgpt-xxx' +# plugin auth token +x-plugin-auth-token: &x-plugin-auth-token 'token' +# code sandbox token +x-code-sandbox-token: &x-code-sandbox-token 'codesandbox' +# aiproxy token +x-aiproxy-token: &x-aiproxy-token 'token' +# 数据库连接相关配置 +x-share-db-config: &x-share-db-config + MONGODB_URI: mongodb://myusername:mypassword@mongo:27017/fastgpt?authSource=admin + DB_MAX_LINK: 100 + REDIS_URL: redis://default:mypassword@redis:6379 + # @see https://doc.fastgpt.cn/docs/self-host/config/object-storage + STORAGE_VENDOR: minio # minio | aws-s3 | cos | oss + STORAGE_REGION: us-east-1 + STORAGE_ACCESS_KEY_ID: minioadmin + STORAGE_SECRET_ACCESS_KEY: minioadmin + STORAGE_PUBLIC_BUCKET: fastgpt-public + STORAGE_PRIVATE_BUCKET: fastgpt-private + STORAGE_EXTERNAL_ENDPOINT: http://192.168.0.2:9000 # 一个服务器和客户端均可访问到存储桶的地址,可以是固定的宿主机 IP 或者域名,注意不要填写成 127.0.0.1 或者 localhost 等本地回环地址(因为容器里无法使用) + STORAGE_S3_ENDPOINT: http://fastgpt-minio:9000 # 协议://域名(IP):端口 + STORAGE_S3_FORCE_PATH_STYLE: true + STORAGE_S3_MAX_RETRIES: 3 +# Log 配置 +x-log-config: &x-log-config + LOG_ENABLE_CONSOLE: true + LOG_CONSOLE_LEVEL: debug + LOG_ENABLE_OTEL: false + LOG_OTEL_LEVEL: info + LOG_OTEL_URL: http://localhost:4318/v1/logs + +# 向量库相关配置(openGauss DataVec,使用独立环境变量) +x-vec-config: &x-vec-config + OPENGAUSS_URL: postgresql://gaussdb:FastGPT@123@opengauss:5432/fastgpt + +version: '3.3' +services: + # Vector DB - openGauss + vectorDB: + image: opengauss/opengauss:7.0.0-RC1 + container_name: opengauss + restart: always + privileged: true + networks: + - fastgpt + environment: + # 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果 + - GS_USERNAME=gaussdb # 默认会创建 gaussdb 用户 + - GS_PASSWORD=FastGPT@123 # 密码必须包含大写、小写、数字和特殊字符,且长度不少于8位 + - GS_DB=fastgpt # 默认会创建 postgres 数据库,这里以 fastgpt 为例 + volumes: + - ./opengauss/data:/var/lib/opengauss + healthcheck: + test: ['CMD-SHELL', 'su - omm -c "gsql -d postgres -p 5432 -c \"SELECT 1\""'] + interval: 10s + timeout: 5s + retries: 10 + start_period: 30s + mongo: + image: mongo:5.0.32 # cpu 不支持 AVX 时候使用 4.4.29 + container_name: mongo + restart: always + networks: + - fastgpt + command: mongod --keyFile /data/mongodb.key --replSet rs0 + environment: + - MONGO_INITDB_ROOT_USERNAME=myusername + - MONGO_INITDB_ROOT_PASSWORD=mypassword + volumes: + - ./mongo/data:/data/db + healthcheck: + test: ['CMD', 'mongo', '-u', 'myusername', '-p', 'mypassword', '--authenticationDatabase', 'admin', '--eval', "db.adminCommand('ping')"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + entrypoint: + - bash + - -c + - | + openssl rand -base64 128 > /data/mongodb.key + chmod 400 /data/mongodb.key + chown 999:999 /data/mongodb.key + echo 'const isInited = rs.status().ok === 1 + if(!isInited){ + rs.initiate({ + _id: "rs0", + members: [ + { _id: 0, host: "mongo:27017" } + ] + }) + }' > /data/initReplicaSet.js + # 启动MongoDB服务 + exec docker-entrypoint.sh "$$@" & + + # 等待MongoDB服务启动 + until mongo -u myusername -p mypassword --authenticationDatabase admin --eval "print('waited for connection')"; do + echo "Waiting for MongoDB to start..." + sleep 2 + done + + # 执行初始化副本集的脚本 + mongo -u myusername -p mypassword --authenticationDatabase admin /data/initReplicaSet.js + + # 等待docker-entrypoint.sh脚本执行的MongoDB服务进程 + wait $$! + redis: + image: redis:7.2-alpine + container_name: redis + networks: + - fastgpt + restart: always + command: | + redis-server --requirepass mypassword --loglevel warning --maxclients 10000 --appendonly yes --save 60 10 --maxmemory 4gb --maxmemory-policy noeviction + healthcheck: + test: ['CMD', 'redis-cli', '-a', 'mypassword', 'ping'] + interval: 10s + timeout: 3s + retries: 3 + start_period: 30s + volumes: + - ./redis/data:/data + fastgpt-minio: + image: minio/minio:RELEASE.2025-09-07T16-13-09Z + container_name: fastgpt-minio + restart: always + ports: + - 9000:9000 + - 9001:9001 + networks: + - fastgpt + environment: + - MINIO_ROOT_USER=minioadmin + - MINIO_ROOT_PASSWORD=minioadmin + volumes: + - ./fastgpt-minio:/data + command: server /data --console-address ":9001" + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live'] + interval: 30s + timeout: 20s + retries: 3 + + fastgpt: + container_name: fastgpt + image: ghcr.io/labring/fastgpt:v4.14.9.3 # git + ports: + - 3000:3000 + networks: + - fastgpt + depends_on: + - mongo + - code-sandbox + - vectorDB + restart: always + environment: + <<: [*x-share-db-config, *x-vec-config, *x-log-config] + # ==================== 基础配置 ==================== + # 前端外部可访问的地址,用于自动补全文件资源路径。例如 https:fastgpt.cn,不能填 localhost。这个值可以不填,不填则发给模型的图片会是一个相对路径,而不是全路径,模型可能伪造Host。 + FE_DOMAIN: http://localhost:3000 + # root key(最高权限) + ROOT_KEY: *x-system-key + # root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。 + DEFAULT_ROOT_PSW: *x-default-root-psw + # 数据库最大连接数 + DB_MAX_LINK: 5 + # 自动同步索引(0 表示不同步) + SYNC_INDEX: 1 + TOKEN_KEY: fastgpt + # 文件阅读时的密钥 + FILE_TOKEN_KEY: filetokenkey + # 密钥加密 key + AES256_SECRET_KEY: fastgptsecret + # 强制将图片转成 base64 传递给模型 + MULTIPLE_DATA_TO_BASE64: true + + # ==================== 服务地址与集成 ==================== + # plugin 地址 + PLUGIN_BASE_URL: http://fastgpt-plugin:3000 + PLUGIN_TOKEN: *x-plugin-auth-token + # code-sandbox 地址 + SANDBOX_URL: http://code-sandbox:3000 + SANDBOX_TOKEN: *x-code-sandbox-token + # AI Proxy 的地址,如果配了该地址,优先使用 + AIPROXY_API_ENDPOINT: http://aiproxy:3000 + # AI Proxy 的 Admin Token,与 AI Proxy 中的环境变量 ADMIN_KEY + AIPROXY_API_TOKEN: *x-aiproxy-token + + # ==================== 日志与监控 ==================== + # 传递给 OTLP 收集器的服务名称 + LOG_OTEL_SERVICE_NAME: fastgpt-client + + # ==================== 安全与运行限制 ==================== + # 启动 IP 限流(true);部分接口启用 IP 限流策略以防止异常请求 + USE_IP_LIMIT: false + # 工作流最大运行次数,避免极端死循环 + WORKFLOW_MAX_RUN_TIMES: 1000 + # 循环最大运行次数,避免极端死循环 + WORKFLOW_MAX_LOOP_TIMES: 100 + # 服务器接收请求的最大大小(MB) + SERVICE_REQUEST_MAX_CONTENT_LENGTH: 10 + # 启用内网 IP 检查 + CHECK_INTERNAL_IP: false + # ==================== 上传与账号策略 ==================== + # 最大上传文件大小(MB) + UPLOAD_FILE_MAX_SIZE: 1000 + # 最大上传文件数量 + UPLOAD_FILE_MAX_AMOUNT: 1000 + # LLM 请求追踪保留时长(小时) + LLM_REQUEST_TRACKING_RETENTION_HOURS: 6 + # ==================== 功能开关与特殊配置 ==================== + # 自定义跨域;不配置时默认允许所有跨域(逗号分割) + ALLOWED_ORIGINS: + # HTML 转 Markdown 最大字符数(超过后不执行转换) + MAX_HTML_TRANSFORM_CHARS: 1000000 + volumes: + - ./config.json:/app/data/config.json + code-sandbox: + container_name: code-sandbox + image: ghcr.io/labring/fastgpt-sandbox:v4.14.9.3 + networks: + - fastgpt + restart: always + environment: + <<: [*x-log-config] + LOG_OTEL_SERVICE_NAME: fastgpt-code-sandbox + SANDBOX_TOKEN: *x-code-sandbox-token + # ===== Resource Limits ===== + # Execution timeout per request (ms) + SANDBOX_MAX_TIMEOUT: 60000 + # Maximum allowed memory per user code execution (MB) + # Note: System automatically adds 50MB for runtime overhead + # Actual process limit = SANDBOX_MAX_MEMORY_MB + 50MB + SANDBOX_MAX_MEMORY_MB: 256 + + # ===== Process Pool ===== + # Number of pre-warmed worker processes (JS + Python) + SANDBOX_POOL_SIZE: 20 + + # ===== Network Request Limits ===== + # Whether to check if the request is to a private network + CHECK_INTERNAL_IP: false + # Maximum number of HTTP requests per execution + SANDBOX_REQUEST_MAX_COUNT: 30 + # Timeout for each outbound HTTP request (ms) + SANDBOX_REQUEST_TIMEOUT: 60000 + # Maximum response body size for outbound requests + SANDBOX_REQUEST_MAX_RESPONSE_MB: 10 + # Maximum request body size for outbound requests (MB) + SANDBOX_REQUEST_MAX_BODY_MB: 5 + + # ===== Module Control ===== + # JS allowed modules whitelist (comma-separated) + SANDBOX_JS_ALLOWED_MODULES: lodash,dayjs,moment,uuid,crypto-js,qs,url,querystring + # Python allowed modules whitelist (comma-separated) + SANDBOX_PYTHON_ALLOWED_MODULES: math,cmath,decimal,fractions,random,statistics,collections,array,heapq,bisect,queue,copy,itertools,functools,operator,string,re,difflib,textwrap,unicodedata,codecs,datetime,time,calendar,_strptime,json,csv,base64,binascii,struct,hashlib,hmac,secrets,uuid,typing,abc,enum,dataclasses,contextlib,pprint,weakref,numpy,pandas,matplotlib + + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/health'] + interval: 30s + timeout: 20s + retries: 3 + fastgpt-mcp-server: + container_name: fastgpt-mcp-server + image: ghcr.io/labring/fastgpt-mcp_server:v4.14.9 + networks: + - fastgpt + ports: + - 3005:3000 + restart: always + environment: + <<: [*x-log-config] + FASTGPT_ENDPOINT: http://fastgpt:3000 + fastgpt-plugin: + image: ghcr.io/labring/fastgpt-plugin:v0.5.5 + container_name: fastgpt-plugin + restart: always + networks: + - fastgpt + environment: + <<: [*x-share-db-config, *x-log-config] + AUTH_TOKEN: *x-plugin-auth-token + # 工具网络请求,最大请求和响应体 + SERVICE_REQUEST_MAX_CONTENT_LENGTH: 10 + # 最大 API 请求体大小 + MAX_API_SIZE: 10 + # 传递给 OTLP 收集器的服务名称 + LOG_OTEL_SERVICE_NAME: fastgpt-plugin + depends_on: + fastgpt-minio: + condition: service_healthy + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/health'] + interval: 30s + timeout: 20s + retries: 3 + # AI Proxy + aiproxy: + image: ghcr.io/labring/aiproxy:v0.3.5 + container_name: aiproxy + restart: unless-stopped + depends_on: + aiproxy_pg: + condition: service_healthy + networks: + - fastgpt + - aiproxy + environment: + # 对应 fastgpt 里的AIPROXY_API_TOKEN + ADMIN_KEY: *x-aiproxy-token + # 错误日志详情保存时间(小时) + LOG_DETAIL_STORAGE_HOURS: 1 + # 数据库连接地址 + SQL_DSN: postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy + # 最大重试次数 + RETRY_TIMES: 3 + # 不需要计费 + BILLING_ENABLED: false + # 不需要严格检测模型 + DISABLE_MODEL_CONFIG: true + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status'] + interval: 5s + timeout: 5s + retries: 10 + aiproxy_pg: + image: pgvector/pgvector:0.8.0-pg15 # docker hub + restart: unless-stopped + container_name: aiproxy_pg + volumes: + - ./aiproxy_pg:/var/lib/postgresql/data + networks: + - aiproxy + environment: + TZ: Asia/Shanghai + POSTGRES_USER: postgres + POSTGRES_DB: aiproxy + POSTGRES_PASSWORD: aiproxy + healthcheck: + test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy'] + interval: 5s + timeout: 5s + retries: 10 +networks: + fastgpt: + aiproxy: + vector: diff --git a/deploy/init.mjs b/deploy/init.mjs index b7bba3dfe4..7bb01bd1ec 100644 --- a/deploy/init.mjs +++ b/deploy/init.mjs @@ -18,7 +18,8 @@ const VectorEnum = { milvus: 'milvus', zilliz: 'zilliz', ob: 'ob', - seekdb: 'seekdb' + seekdb: 'seekdb', + opengauss: 'opengauss' }; // make sure the cwd @@ -110,6 +111,12 @@ init_sql: `, extra: `` }, + opengauss: { + db: '', + config: `\ + OPENGAUSS_URL: postgresql://gaussdb:FastGPT@123@opengauss:5432/fastgpt`, + extra: '' + }, }; /** @@ -155,6 +162,9 @@ const replace = (source, region, vec) => { const seekdb = fs.readFileSync(path.join(process.cwd(), 'templates', 'vector', 'seekdb.txt')); vector.seekdb.db = String(seekdb); + + const opengauss = fs.readFileSync(path.join(process.cwd(), 'templates', 'vector', 'opengauss.txt')); + vector.opengauss.db = String(opengauss); } const generateDevFile = async () => { @@ -226,6 +236,14 @@ const generateProdFile = async () => { fs.promises.writeFile( path.join(process.cwd(), 'docker', 'global', 'docker-compose.seekdb.yml'), replace(template, 'global', VectorEnum.seekdb) + ), + fs.promises.writeFile( + path.join(process.cwd(), 'docker', 'cn', 'docker-compose.opengauss.yml'), + replace(template, 'cn', VectorEnum.opengauss) + ), + fs.promises.writeFile( + path.join(process.cwd(), 'docker', 'global', 'docker-compose.opengauss.yml'), + replace(template, 'global', VectorEnum.opengauss) ) ]); diff --git a/deploy/templates/vector/opengauss.txt b/deploy/templates/vector/opengauss.txt new file mode 100644 index 0000000000..e770669d03 --- /dev/null +++ b/deploy/templates/vector/opengauss.txt @@ -0,0 +1,20 @@ + vectorDB: + image: opengauss/opengauss:7.0.0-RC1 + container_name: opengauss + restart: always + privileged: true + networks: + - fastgpt + environment: + # 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果 + - GS_USERNAME=gaussdb # 默认会创建 gaussdb 用户 + - GS_PASSWORD=FastGPT@123 # 密码必须包含大写、小写、数字和特殊字符,且长度不少于8位 + - GS_DB=fastgpt # 默认会创建 postgres 数据库,这里以 fastgpt 为例 + volumes: + - ./opengauss/data:/var/lib/opengauss + healthcheck: + test: ['CMD-SHELL', 'su - omm -c "gsql -d postgres -p 5432 -c \"SELECT 1\""'] + interval: 10s + timeout: 5s + retries: 10 + start_period: 30s \ No newline at end of file diff --git a/document/public/deploy/docker/cn/docker-compose.opengauss.yml b/document/public/deploy/docker/cn/docker-compose.opengauss.yml new file mode 100644 index 0000000000..59e23038bb --- /dev/null +++ b/document/public/deploy/docker/cn/docker-compose.opengauss.yml @@ -0,0 +1,355 @@ +# 用于部署的 docker-compose 文件: +# - FastGPT 端口映射为 3000:3000 +# - FastGPT-mcp-server 端口映射 3005:3000 +# - 建议修改账密后再运行 + +# root 默认密码(重启后会强制重置该密码成环境变量值) +x-default-root-psw: &x-default-root-psw '1234' +# 系统最高密钥凭证 +x-system-key: &x-system-key 'fastgpt-xxx' +# plugin auth token +x-plugin-auth-token: &x-plugin-auth-token 'token' +# code sandbox token +x-code-sandbox-token: &x-code-sandbox-token 'codesandbox' +# aiproxy token +x-aiproxy-token: &x-aiproxy-token 'token' +# 数据库连接相关配置 +x-share-db-config: &x-share-db-config + MONGODB_URI: mongodb://myusername:mypassword@mongo:27017/fastgpt?authSource=admin + DB_MAX_LINK: 100 + REDIS_URL: redis://default:mypassword@redis:6379 + # @see https://doc.fastgpt.cn/docs/self-host/config/object-storage + STORAGE_VENDOR: minio # minio | aws-s3 | cos | oss + STORAGE_REGION: us-east-1 + STORAGE_ACCESS_KEY_ID: minioadmin + STORAGE_SECRET_ACCESS_KEY: minioadmin + STORAGE_PUBLIC_BUCKET: fastgpt-public + STORAGE_PRIVATE_BUCKET: fastgpt-private + STORAGE_EXTERNAL_ENDPOINT: http://192.168.0.2:9000 # 一个服务器和客户端均可访问到存储桶的地址,可以是固定的宿主机 IP 或者域名,注意不要填写成 127.0.0.1 或者 localhost 等本地回环地址(因为容器里无法使用) + STORAGE_S3_ENDPOINT: http://fastgpt-minio:9000 # 协议://域名(IP):端口 + STORAGE_S3_FORCE_PATH_STYLE: true + STORAGE_S3_MAX_RETRIES: 3 +# Log 配置 +x-log-config: &x-log-config + LOG_ENABLE_CONSOLE: true + LOG_CONSOLE_LEVEL: debug + LOG_ENABLE_OTEL: false + LOG_OTEL_LEVEL: info + LOG_OTEL_URL: http://localhost:4318/v1/logs + +# 向量库相关配置(openGauss DataVec,使用独立环境变量) +x-vec-config: &x-vec-config + OPENGAUSS_URL: postgresql://gaussdb:FastGPT@123@opengauss:5432/fastgpt + +version: '3.3' +services: + # Vector DB - openGauss + vectorDB: + image: opengauss/opengauss:7.0.0-RC1 + container_name: opengauss + restart: always + privileged: true + networks: + - fastgpt + environment: + # 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果 + - GS_USERNAME=gaussdb # 默认会创建 gaussdb 用户 + - GS_PASSWORD=FastGPT@123 # 密码必须包含大写、小写、数字和特殊字符,且长度不少于8位 + - GS_DB=fastgpt # 默认会创建 postgres 数据库,这里以 fastgpt 为例 + volumes: + - ./opengauss/data:/var/lib/opengauss + healthcheck: + test: ['CMD-SHELL', 'su - omm -c "gsql -d postgres -p 5432 -c \"SELECT 1\""'] + interval: 10s + timeout: 5s + retries: 10 + start_period: 30s + mongo: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mongo:5.0.32 # cpu 不支持 AVX 时候使用 4.4.29 + container_name: mongo + restart: always + networks: + - fastgpt + command: mongod --keyFile /data/mongodb.key --replSet rs0 + environment: + - MONGO_INITDB_ROOT_USERNAME=myusername + - MONGO_INITDB_ROOT_PASSWORD=mypassword + volumes: + - ./mongo/data:/data/db + healthcheck: + test: ['CMD', 'mongo', '-u', 'myusername', '-p', 'mypassword', '--authenticationDatabase', 'admin', '--eval', "db.adminCommand('ping')"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + entrypoint: + - bash + - -c + - | + openssl rand -base64 128 > /data/mongodb.key + chmod 400 /data/mongodb.key + chown 999:999 /data/mongodb.key + echo 'const isInited = rs.status().ok === 1 + if(!isInited){ + rs.initiate({ + _id: "rs0", + members: [ + { _id: 0, host: "mongo:27017" } + ] + }) + }' > /data/initReplicaSet.js + # 启动MongoDB服务 + exec docker-entrypoint.sh "$$@" & + + # 等待MongoDB服务启动 + until mongo -u myusername -p mypassword --authenticationDatabase admin --eval "print('waited for connection')"; do + echo "Waiting for MongoDB to start..." + sleep 2 + done + + # 执行初始化副本集的脚本 + mongo -u myusername -p mypassword --authenticationDatabase admin /data/initReplicaSet.js + + # 等待docker-entrypoint.sh脚本执行的MongoDB服务进程 + wait $$! + redis: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/redis:7.2-alpine + container_name: redis + networks: + - fastgpt + restart: always + command: | + redis-server --requirepass mypassword --loglevel warning --maxclients 10000 --appendonly yes --save 60 10 --maxmemory 4gb --maxmemory-policy noeviction + healthcheck: + test: ['CMD', 'redis-cli', '-a', 'mypassword', 'ping'] + interval: 10s + timeout: 3s + retries: 3 + start_period: 30s + volumes: + - ./redis/data:/data + fastgpt-minio: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/minio:RELEASE.2025-09-07T16-13-09Z + container_name: fastgpt-minio + restart: always + ports: + - 9000:9000 + - 9001:9001 + networks: + - fastgpt + environment: + - MINIO_ROOT_USER=minioadmin + - MINIO_ROOT_PASSWORD=minioadmin + volumes: + - ./fastgpt-minio:/data + command: server /data --console-address ":9001" + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live'] + interval: 30s + timeout: 20s + retries: 3 + + fastgpt: + container_name: fastgpt + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.14.9.3 # git + ports: + - 3000:3000 + networks: + - fastgpt + depends_on: + - mongo + - code-sandbox + - vectorDB + restart: always + environment: + <<: [*x-share-db-config, *x-vec-config, *x-log-config] + # ==================== 基础配置 ==================== + # 前端外部可访问的地址,用于自动补全文件资源路径。例如 https:fastgpt.cn,不能填 localhost。这个值可以不填,不填则发给模型的图片会是一个相对路径,而不是全路径,模型可能伪造Host。 + FE_DOMAIN: http://localhost:3000 + # root key(最高权限) + ROOT_KEY: *x-system-key + # root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。 + DEFAULT_ROOT_PSW: *x-default-root-psw + # 数据库最大连接数 + DB_MAX_LINK: 5 + # 自动同步索引(0 表示不同步) + SYNC_INDEX: 1 + TOKEN_KEY: fastgpt + # 文件阅读时的密钥 + FILE_TOKEN_KEY: filetokenkey + # 密钥加密 key + AES256_SECRET_KEY: fastgptsecret + # 强制将图片转成 base64 传递给模型 + MULTIPLE_DATA_TO_BASE64: true + + # ==================== 服务地址与集成 ==================== + # plugin 地址 + PLUGIN_BASE_URL: http://fastgpt-plugin:3000 + PLUGIN_TOKEN: *x-plugin-auth-token + # code-sandbox 地址 + SANDBOX_URL: http://code-sandbox:3000 + SANDBOX_TOKEN: *x-code-sandbox-token + # AI Proxy 的地址,如果配了该地址,优先使用 + AIPROXY_API_ENDPOINT: http://aiproxy:3000 + # AI Proxy 的 Admin Token,与 AI Proxy 中的环境变量 ADMIN_KEY + AIPROXY_API_TOKEN: *x-aiproxy-token + + # ==================== 日志与监控 ==================== + # 传递给 OTLP 收集器的服务名称 + LOG_OTEL_SERVICE_NAME: fastgpt-client + + # ==================== 安全与运行限制 ==================== + # 启动 IP 限流(true);部分接口启用 IP 限流策略以防止异常请求 + USE_IP_LIMIT: false + # 工作流最大运行次数,避免极端死循环 + WORKFLOW_MAX_RUN_TIMES: 1000 + # 循环最大运行次数,避免极端死循环 + WORKFLOW_MAX_LOOP_TIMES: 100 + # 服务器接收请求的最大大小(MB) + SERVICE_REQUEST_MAX_CONTENT_LENGTH: 10 + # 启用内网 IP 检查 + CHECK_INTERNAL_IP: false + # ==================== 上传与账号策略 ==================== + # 最大上传文件大小(MB) + UPLOAD_FILE_MAX_SIZE: 1000 + # 最大上传文件数量 + UPLOAD_FILE_MAX_AMOUNT: 1000 + # LLM 请求追踪保留时长(小时) + LLM_REQUEST_TRACKING_RETENTION_HOURS: 6 + # ==================== 功能开关与特殊配置 ==================== + # 自定义跨域;不配置时默认允许所有跨域(逗号分割) + ALLOWED_ORIGINS: + # HTML 转 Markdown 最大字符数(超过后不执行转换) + MAX_HTML_TRANSFORM_CHARS: 1000000 + volumes: + - ./config.json:/app/data/config.json + code-sandbox: + container_name: code-sandbox + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.14.9.3 + networks: + - fastgpt + restart: always + environment: + <<: [*x-log-config] + LOG_OTEL_SERVICE_NAME: fastgpt-code-sandbox + SANDBOX_TOKEN: *x-code-sandbox-token + # ===== Resource Limits ===== + # Execution timeout per request (ms) + SANDBOX_MAX_TIMEOUT: 60000 + # Maximum allowed memory per user code execution (MB) + # Note: System automatically adds 50MB for runtime overhead + # Actual process limit = SANDBOX_MAX_MEMORY_MB + 50MB + SANDBOX_MAX_MEMORY_MB: 256 + + # ===== Process Pool ===== + # Number of pre-warmed worker processes (JS + Python) + SANDBOX_POOL_SIZE: 20 + + # ===== Network Request Limits ===== + # Whether to check if the request is to a private network + CHECK_INTERNAL_IP: false + # Maximum number of HTTP requests per execution + SANDBOX_REQUEST_MAX_COUNT: 30 + # Timeout for each outbound HTTP request (ms) + SANDBOX_REQUEST_TIMEOUT: 60000 + # Maximum response body size for outbound requests + SANDBOX_REQUEST_MAX_RESPONSE_MB: 10 + # Maximum request body size for outbound requests (MB) + SANDBOX_REQUEST_MAX_BODY_MB: 5 + + # ===== Module Control ===== + # JS allowed modules whitelist (comma-separated) + SANDBOX_JS_ALLOWED_MODULES: lodash,dayjs,moment,uuid,crypto-js,qs,url,querystring + # Python allowed modules whitelist (comma-separated) + SANDBOX_PYTHON_ALLOWED_MODULES: math,cmath,decimal,fractions,random,statistics,collections,array,heapq,bisect,queue,copy,itertools,functools,operator,string,re,difflib,textwrap,unicodedata,codecs,datetime,time,calendar,_strptime,json,csv,base64,binascii,struct,hashlib,hmac,secrets,uuid,typing,abc,enum,dataclasses,contextlib,pprint,weakref,numpy,pandas,matplotlib + + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/health'] + interval: 30s + timeout: 20s + retries: 3 + fastgpt-mcp-server: + container_name: fastgpt-mcp-server + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-mcp_server:v4.14.9 + networks: + - fastgpt + ports: + - 3005:3000 + restart: always + environment: + <<: [*x-log-config] + FASTGPT_ENDPOINT: http://fastgpt:3000 + fastgpt-plugin: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-plugin:v0.5.5 + container_name: fastgpt-plugin + restart: always + networks: + - fastgpt + environment: + <<: [*x-share-db-config, *x-log-config] + AUTH_TOKEN: *x-plugin-auth-token + # 工具网络请求,最大请求和响应体 + SERVICE_REQUEST_MAX_CONTENT_LENGTH: 10 + # 最大 API 请求体大小 + MAX_API_SIZE: 10 + # 传递给 OTLP 收集器的服务名称 + LOG_OTEL_SERVICE_NAME: fastgpt-plugin + depends_on: + fastgpt-minio: + condition: service_healthy + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/health'] + interval: 30s + timeout: 20s + retries: 3 + # AI Proxy + aiproxy: + image: registry.cn-hangzhou.aliyuncs.com/labring/aiproxy:v0.3.5 + container_name: aiproxy + restart: unless-stopped + depends_on: + aiproxy_pg: + condition: service_healthy + networks: + - fastgpt + - aiproxy + environment: + # 对应 fastgpt 里的AIPROXY_API_TOKEN + ADMIN_KEY: *x-aiproxy-token + # 错误日志详情保存时间(小时) + LOG_DETAIL_STORAGE_HOURS: 1 + # 数据库连接地址 + SQL_DSN: postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy + # 最大重试次数 + RETRY_TIMES: 3 + # 不需要计费 + BILLING_ENABLED: false + # 不需要严格检测模型 + DISABLE_MODEL_CONFIG: true + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status'] + interval: 5s + timeout: 5s + retries: 10 + aiproxy_pg: + image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:0.8.0-pg15 # docker hub + restart: unless-stopped + container_name: aiproxy_pg + volumes: + - ./aiproxy_pg:/var/lib/postgresql/data + networks: + - aiproxy + environment: + TZ: Asia/Shanghai + POSTGRES_USER: postgres + POSTGRES_DB: aiproxy + POSTGRES_PASSWORD: aiproxy + healthcheck: + test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy'] + interval: 5s + timeout: 5s + retries: 10 +networks: + fastgpt: + aiproxy: + vector: diff --git a/document/public/deploy/docker/global/docker-compose.opengauss.yml b/document/public/deploy/docker/global/docker-compose.opengauss.yml new file mode 100644 index 0000000000..8caed82105 --- /dev/null +++ b/document/public/deploy/docker/global/docker-compose.opengauss.yml @@ -0,0 +1,355 @@ +# 用于部署的 docker-compose 文件: +# - FastGPT 端口映射为 3000:3000 +# - FastGPT-mcp-server 端口映射 3005:3000 +# - 建议修改账密后再运行 + +# root 默认密码(重启后会强制重置该密码成环境变量值) +x-default-root-psw: &x-default-root-psw '1234' +# 系统最高密钥凭证 +x-system-key: &x-system-key 'fastgpt-xxx' +# plugin auth token +x-plugin-auth-token: &x-plugin-auth-token 'token' +# code sandbox token +x-code-sandbox-token: &x-code-sandbox-token 'codesandbox' +# aiproxy token +x-aiproxy-token: &x-aiproxy-token 'token' +# 数据库连接相关配置 +x-share-db-config: &x-share-db-config + MONGODB_URI: mongodb://myusername:mypassword@mongo:27017/fastgpt?authSource=admin + DB_MAX_LINK: 100 + REDIS_URL: redis://default:mypassword@redis:6379 + # @see https://doc.fastgpt.cn/docs/self-host/config/object-storage + STORAGE_VENDOR: minio # minio | aws-s3 | cos | oss + STORAGE_REGION: us-east-1 + STORAGE_ACCESS_KEY_ID: minioadmin + STORAGE_SECRET_ACCESS_KEY: minioadmin + STORAGE_PUBLIC_BUCKET: fastgpt-public + STORAGE_PRIVATE_BUCKET: fastgpt-private + STORAGE_EXTERNAL_ENDPOINT: http://192.168.0.2:9000 # 一个服务器和客户端均可访问到存储桶的地址,可以是固定的宿主机 IP 或者域名,注意不要填写成 127.0.0.1 或者 localhost 等本地回环地址(因为容器里无法使用) + STORAGE_S3_ENDPOINT: http://fastgpt-minio:9000 # 协议://域名(IP):端口 + STORAGE_S3_FORCE_PATH_STYLE: true + STORAGE_S3_MAX_RETRIES: 3 +# Log 配置 +x-log-config: &x-log-config + LOG_ENABLE_CONSOLE: true + LOG_CONSOLE_LEVEL: debug + LOG_ENABLE_OTEL: false + LOG_OTEL_LEVEL: info + LOG_OTEL_URL: http://localhost:4318/v1/logs + +# 向量库相关配置(openGauss DataVec,使用独立环境变量) +x-vec-config: &x-vec-config + OPENGAUSS_URL: postgresql://gaussdb:FastGPT@123@opengauss:5432/fastgpt + +version: '3.3' +services: + # Vector DB - openGauss + vectorDB: + image: opengauss/opengauss:7.0.0-RC1 + container_name: opengauss + restart: always + privileged: true + networks: + - fastgpt + environment: + # 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果 + - GS_USERNAME=gaussdb # 默认会创建 gaussdb 用户 + - GS_PASSWORD=FastGPT@123 # 密码必须包含大写、小写、数字和特殊字符,且长度不少于8位 + - GS_DB=fastgpt # 默认会创建 postgres 数据库,这里以 fastgpt 为例 + volumes: + - ./opengauss/data:/var/lib/opengauss + healthcheck: + test: ['CMD-SHELL', 'su - omm -c "gsql -d postgres -p 5432 -c \"SELECT 1\""'] + interval: 10s + timeout: 5s + retries: 10 + start_period: 30s + mongo: + image: mongo:5.0.32 # cpu 不支持 AVX 时候使用 4.4.29 + container_name: mongo + restart: always + networks: + - fastgpt + command: mongod --keyFile /data/mongodb.key --replSet rs0 + environment: + - MONGO_INITDB_ROOT_USERNAME=myusername + - MONGO_INITDB_ROOT_PASSWORD=mypassword + volumes: + - ./mongo/data:/data/db + healthcheck: + test: ['CMD', 'mongo', '-u', 'myusername', '-p', 'mypassword', '--authenticationDatabase', 'admin', '--eval', "db.adminCommand('ping')"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + entrypoint: + - bash + - -c + - | + openssl rand -base64 128 > /data/mongodb.key + chmod 400 /data/mongodb.key + chown 999:999 /data/mongodb.key + echo 'const isInited = rs.status().ok === 1 + if(!isInited){ + rs.initiate({ + _id: "rs0", + members: [ + { _id: 0, host: "mongo:27017" } + ] + }) + }' > /data/initReplicaSet.js + # 启动MongoDB服务 + exec docker-entrypoint.sh "$$@" & + + # 等待MongoDB服务启动 + until mongo -u myusername -p mypassword --authenticationDatabase admin --eval "print('waited for connection')"; do + echo "Waiting for MongoDB to start..." + sleep 2 + done + + # 执行初始化副本集的脚本 + mongo -u myusername -p mypassword --authenticationDatabase admin /data/initReplicaSet.js + + # 等待docker-entrypoint.sh脚本执行的MongoDB服务进程 + wait $$! + redis: + image: redis:7.2-alpine + container_name: redis + networks: + - fastgpt + restart: always + command: | + redis-server --requirepass mypassword --loglevel warning --maxclients 10000 --appendonly yes --save 60 10 --maxmemory 4gb --maxmemory-policy noeviction + healthcheck: + test: ['CMD', 'redis-cli', '-a', 'mypassword', 'ping'] + interval: 10s + timeout: 3s + retries: 3 + start_period: 30s + volumes: + - ./redis/data:/data + fastgpt-minio: + image: minio/minio:RELEASE.2025-09-07T16-13-09Z + container_name: fastgpt-minio + restart: always + ports: + - 9000:9000 + - 9001:9001 + networks: + - fastgpt + environment: + - MINIO_ROOT_USER=minioadmin + - MINIO_ROOT_PASSWORD=minioadmin + volumes: + - ./fastgpt-minio:/data + command: server /data --console-address ":9001" + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live'] + interval: 30s + timeout: 20s + retries: 3 + + fastgpt: + container_name: fastgpt + image: ghcr.io/labring/fastgpt:v4.14.9.3 # git + ports: + - 3000:3000 + networks: + - fastgpt + depends_on: + - mongo + - code-sandbox + - vectorDB + restart: always + environment: + <<: [*x-share-db-config, *x-vec-config, *x-log-config] + # ==================== 基础配置 ==================== + # 前端外部可访问的地址,用于自动补全文件资源路径。例如 https:fastgpt.cn,不能填 localhost。这个值可以不填,不填则发给模型的图片会是一个相对路径,而不是全路径,模型可能伪造Host。 + FE_DOMAIN: http://localhost:3000 + # root key(最高权限) + ROOT_KEY: *x-system-key + # root 密码,用户名为: root。如果需要修改 root 密码,直接修改这个环境变量,并重启即可。 + DEFAULT_ROOT_PSW: *x-default-root-psw + # 数据库最大连接数 + DB_MAX_LINK: 5 + # 自动同步索引(0 表示不同步) + SYNC_INDEX: 1 + TOKEN_KEY: fastgpt + # 文件阅读时的密钥 + FILE_TOKEN_KEY: filetokenkey + # 密钥加密 key + AES256_SECRET_KEY: fastgptsecret + # 强制将图片转成 base64 传递给模型 + MULTIPLE_DATA_TO_BASE64: true + + # ==================== 服务地址与集成 ==================== + # plugin 地址 + PLUGIN_BASE_URL: http://fastgpt-plugin:3000 + PLUGIN_TOKEN: *x-plugin-auth-token + # code-sandbox 地址 + SANDBOX_URL: http://code-sandbox:3000 + SANDBOX_TOKEN: *x-code-sandbox-token + # AI Proxy 的地址,如果配了该地址,优先使用 + AIPROXY_API_ENDPOINT: http://aiproxy:3000 + # AI Proxy 的 Admin Token,与 AI Proxy 中的环境变量 ADMIN_KEY + AIPROXY_API_TOKEN: *x-aiproxy-token + + # ==================== 日志与监控 ==================== + # 传递给 OTLP 收集器的服务名称 + LOG_OTEL_SERVICE_NAME: fastgpt-client + + # ==================== 安全与运行限制 ==================== + # 启动 IP 限流(true);部分接口启用 IP 限流策略以防止异常请求 + USE_IP_LIMIT: false + # 工作流最大运行次数,避免极端死循环 + WORKFLOW_MAX_RUN_TIMES: 1000 + # 循环最大运行次数,避免极端死循环 + WORKFLOW_MAX_LOOP_TIMES: 100 + # 服务器接收请求的最大大小(MB) + SERVICE_REQUEST_MAX_CONTENT_LENGTH: 10 + # 启用内网 IP 检查 + CHECK_INTERNAL_IP: false + # ==================== 上传与账号策略 ==================== + # 最大上传文件大小(MB) + UPLOAD_FILE_MAX_SIZE: 1000 + # 最大上传文件数量 + UPLOAD_FILE_MAX_AMOUNT: 1000 + # LLM 请求追踪保留时长(小时) + LLM_REQUEST_TRACKING_RETENTION_HOURS: 6 + # ==================== 功能开关与特殊配置 ==================== + # 自定义跨域;不配置时默认允许所有跨域(逗号分割) + ALLOWED_ORIGINS: + # HTML 转 Markdown 最大字符数(超过后不执行转换) + MAX_HTML_TRANSFORM_CHARS: 1000000 + volumes: + - ./config.json:/app/data/config.json + code-sandbox: + container_name: code-sandbox + image: ghcr.io/labring/fastgpt-sandbox:v4.14.9.3 + networks: + - fastgpt + restart: always + environment: + <<: [*x-log-config] + LOG_OTEL_SERVICE_NAME: fastgpt-code-sandbox + SANDBOX_TOKEN: *x-code-sandbox-token + # ===== Resource Limits ===== + # Execution timeout per request (ms) + SANDBOX_MAX_TIMEOUT: 60000 + # Maximum allowed memory per user code execution (MB) + # Note: System automatically adds 50MB for runtime overhead + # Actual process limit = SANDBOX_MAX_MEMORY_MB + 50MB + SANDBOX_MAX_MEMORY_MB: 256 + + # ===== Process Pool ===== + # Number of pre-warmed worker processes (JS + Python) + SANDBOX_POOL_SIZE: 20 + + # ===== Network Request Limits ===== + # Whether to check if the request is to a private network + CHECK_INTERNAL_IP: false + # Maximum number of HTTP requests per execution + SANDBOX_REQUEST_MAX_COUNT: 30 + # Timeout for each outbound HTTP request (ms) + SANDBOX_REQUEST_TIMEOUT: 60000 + # Maximum response body size for outbound requests + SANDBOX_REQUEST_MAX_RESPONSE_MB: 10 + # Maximum request body size for outbound requests (MB) + SANDBOX_REQUEST_MAX_BODY_MB: 5 + + # ===== Module Control ===== + # JS allowed modules whitelist (comma-separated) + SANDBOX_JS_ALLOWED_MODULES: lodash,dayjs,moment,uuid,crypto-js,qs,url,querystring + # Python allowed modules whitelist (comma-separated) + SANDBOX_PYTHON_ALLOWED_MODULES: math,cmath,decimal,fractions,random,statistics,collections,array,heapq,bisect,queue,copy,itertools,functools,operator,string,re,difflib,textwrap,unicodedata,codecs,datetime,time,calendar,_strptime,json,csv,base64,binascii,struct,hashlib,hmac,secrets,uuid,typing,abc,enum,dataclasses,contextlib,pprint,weakref,numpy,pandas,matplotlib + + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/health'] + interval: 30s + timeout: 20s + retries: 3 + fastgpt-mcp-server: + container_name: fastgpt-mcp-server + image: ghcr.io/labring/fastgpt-mcp_server:v4.14.9 + networks: + - fastgpt + ports: + - 3005:3000 + restart: always + environment: + <<: [*x-log-config] + FASTGPT_ENDPOINT: http://fastgpt:3000 + fastgpt-plugin: + image: ghcr.io/labring/fastgpt-plugin:v0.5.5 + container_name: fastgpt-plugin + restart: always + networks: + - fastgpt + environment: + <<: [*x-share-db-config, *x-log-config] + AUTH_TOKEN: *x-plugin-auth-token + # 工具网络请求,最大请求和响应体 + SERVICE_REQUEST_MAX_CONTENT_LENGTH: 10 + # 最大 API 请求体大小 + MAX_API_SIZE: 10 + # 传递给 OTLP 收集器的服务名称 + LOG_OTEL_SERVICE_NAME: fastgpt-plugin + depends_on: + fastgpt-minio: + condition: service_healthy + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/health'] + interval: 30s + timeout: 20s + retries: 3 + # AI Proxy + aiproxy: + image: ghcr.io/labring/aiproxy:v0.3.5 + container_name: aiproxy + restart: unless-stopped + depends_on: + aiproxy_pg: + condition: service_healthy + networks: + - fastgpt + - aiproxy + environment: + # 对应 fastgpt 里的AIPROXY_API_TOKEN + ADMIN_KEY: *x-aiproxy-token + # 错误日志详情保存时间(小时) + LOG_DETAIL_STORAGE_HOURS: 1 + # 数据库连接地址 + SQL_DSN: postgres://postgres:aiproxy@aiproxy_pg:5432/aiproxy + # 最大重试次数 + RETRY_TIMES: 3 + # 不需要计费 + BILLING_ENABLED: false + # 不需要严格检测模型 + DISABLE_MODEL_CONFIG: true + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3000/api/status'] + interval: 5s + timeout: 5s + retries: 10 + aiproxy_pg: + image: pgvector/pgvector:0.8.0-pg15 # docker hub + restart: unless-stopped + container_name: aiproxy_pg + volumes: + - ./aiproxy_pg:/var/lib/postgresql/data + networks: + - aiproxy + environment: + TZ: Asia/Shanghai + POSTGRES_USER: postgres + POSTGRES_DB: aiproxy + POSTGRES_PASSWORD: aiproxy + healthcheck: + test: ['CMD', 'pg_isready', '-U', 'postgres', '-d', 'aiproxy'] + interval: 5s + timeout: 5s + retries: 10 +networks: + fastgpt: + aiproxy: + vector: diff --git a/packages/service/common/vectorDB/constants.ts b/packages/service/common/vectorDB/constants.ts index 87e917a951..302ee3a525 100644 --- a/packages/service/common/vectorDB/constants.ts +++ b/packages/service/common/vectorDB/constants.ts @@ -2,6 +2,7 @@ export const DatasetVectorDbName = 'fastgpt'; export const DatasetVectorTableName = 'modeldata'; export const PG_ADDRESS = process.env.PG_URL; +export const OPENGAUSS_ADDRESS = process.env.OPENGAUSS_URL; export const OCEANBASE_ADDRESS = process.env.OCEANBASE_URL; export const SEEKDB_ADDRESS = process.env.SEEKDB_URL; export const MILVUS_ADDRESS = process.env.MILVUS_ADDRESS; diff --git a/packages/service/common/vectorDB/controller.ts b/packages/service/common/vectorDB/controller.ts index d6b8b577c3..c77af4db30 100644 --- a/packages/service/common/vectorDB/controller.ts +++ b/packages/service/common/vectorDB/controller.ts @@ -2,10 +2,17 @@ import { PgVectorCtrl } from './pg'; import { ObVectorCtrl } from './oceanbase'; import { SeekVectorCtrl } from './seekdb'; +import { OpenGaussVectorCtrl } from './opengauss'; import { getVectorsByText } from '../../core/ai/embedding'; import type { VectorControllerType, InsertVectorControllerPropsType } from './type'; import { type EmbeddingModelItemType } from '@fastgpt/global/core/ai/model.schema'; -import { MILVUS_ADDRESS, PG_ADDRESS, OCEANBASE_ADDRESS, SEEKDB_ADDRESS } from './constants'; +import { + MILVUS_ADDRESS, + PG_ADDRESS, + OPENGAUSS_ADDRESS, + OCEANBASE_ADDRESS, + SEEKDB_ADDRESS +} from './constants'; import { MilvusCtrl } from './milvus'; import { setRedisCache, @@ -23,6 +30,7 @@ const getVectorObj = (): VectorControllerType => { if (OCEANBASE_ADDRESS) return new ObVectorCtrl({ type: 'oceanbase' }); if (PG_ADDRESS) return new PgVectorCtrl(); if (MILVUS_ADDRESS) return new MilvusCtrl(); + if (OPENGAUSS_ADDRESS) return new OpenGaussVectorCtrl(); return new PgVectorCtrl(); }; diff --git a/packages/service/common/vectorDB/opengauss/controller.ts b/packages/service/common/vectorDB/opengauss/controller.ts new file mode 100644 index 0000000000..5d63829a64 --- /dev/null +++ b/packages/service/common/vectorDB/opengauss/controller.ts @@ -0,0 +1,201 @@ +import { delay } from '@fastgpt/global/common/system/utils'; +import { getLogger, LogCategories } from '../../logger'; +import { Pool } from 'pg'; +import type { QueryResultRow } from 'pg'; +import { OPENGAUSS_ADDRESS } from '../constants'; + +const logger = getLogger(LogCategories.INFRA.VECTOR); + +export const connectOg = async (): Promise => { + if (global.pgClient) { + return global.pgClient; + } + + const pool = new Pool({ + connectionString: OPENGAUSS_ADDRESS, + max: Number(process.env.DB_MAX_LINK || 30), + min: 15, + keepAlive: true, + idleTimeoutMillis: 1800000, + connectionTimeoutMillis: 30000, + query_timeout: 60000, + statement_timeout: 90000, + idle_in_transaction_session_timeout: 60000, + allowExitOnIdle: false, + application_name: 'fastgpt-vector-db' + }); + global.pgClient = pool; + + global.pgClient.on('error', async (err) => { + logger.error('openGauss pool error', { error: err }); + }); + global.pgClient.on('connect', async () => { + logger.info('openGauss pool connected'); + }); + global.pgClient.on('remove', async () => { + logger.warn('openGauss connection removed from pool'); + }); + + try { + await global.pgClient.connect(); + return global.pgClient; + } catch (error) { + logger.error('openGauss connection failed', { error }); + global.pgClient?.removeAllListeners(); + global.pgClient?.end(); + global.pgClient = null; + + await delay(1000); + logger.warn('openGauss reconnecting after failure'); + + return connectOg(); + } +}; + +type WhereProps = (string | [string, string | number])[]; +type GetProps = { + fields?: string[]; + where?: WhereProps; + order?: { field: string; mode: 'DESC' | 'ASC' | string }[]; + limit?: number; + offset?: number; +}; + +type DeleteProps = { + where: WhereProps; +}; + +type ValuesProps = { key: string; value?: string | number }[]; +type UpdateProps = { + values: ValuesProps; + where: WhereProps; +}; +type InsertProps = { + values: ValuesProps[]; +}; + +class OgClass { + private getWhereStr(where?: WhereProps) { + return where + ? `WHERE ${where + .map((item) => { + if (typeof item === 'string') { + return item; + } + const val = typeof item[1] === 'number' ? item[1] : `'${String(item[1])}'`; + return `${item[0]}=${val}`; + }) + .join(' ')}` + : ''; + } + private getUpdateValStr(values: ValuesProps) { + return values + .map((item) => { + const val = + typeof item.value === 'number' + ? item.value + : `'${String(item.value).replace(/\'/g, '"')}'`; + + return `${item.key}=${val}`; + }) + .join(','); + } + private getInsertValStr(values: ValuesProps[]) { + return values + .map( + (items) => + `(${items + .map((item) => + typeof item.value === 'number' + ? item.value + : `'${String(item.value).replace(/\'/g, '"')}'` + ) + .join(',')})` + ) + .join(','); + } + + async query(sql: string) { + const og = await connectOg(); + const start = Date.now(); + return og.query(sql).then((res) => { + const time = Date.now() - start; + + if (time > 1000) { + const safeSql = sql.replace(/'\[[^\]]*?\]'/g, "'[x]'"); + logger.warn('openGauss slow query detected', { + level: 'slow-2', + durationMs: time, + sql: safeSql + }); + } else if (time > 300) { + const safeSql = sql.replace(/'\[[^\]]*?\]'/g, "'[x]'"); + logger.warn('openGauss slow query detected', { + level: 'slow-1', + durationMs: time, + sql: safeSql + }); + } + + return res; + }); + } + + async select(table: string, props: GetProps) { + const sql = `SELECT ${ + !props.fields || props.fields?.length === 0 ? '*' : props.fields?.join(',') + } + FROM ${table} + ${this.getWhereStr(props.where)} + ${ + props.order + ? `ORDER BY ${props.order.map((item) => `${item.field} ${item.mode}`).join(',')}` + : '' + } + LIMIT ${props.limit || 10} OFFSET ${props.offset || 0} + `; + + return this.query(sql); + } + async count(table: string, props: GetProps) { + const sql = `SELECT COUNT(${props?.fields?.[0] || '*'}) + FROM ${table} + ${this.getWhereStr(props.where)} + `; + + return this.query(sql).then((res) => Number(res.rows[0]?.count || 0)); + } + async delete(table: string, props: DeleteProps) { + const sql = `DELETE FROM ${table} ${this.getWhereStr(props.where)}`; + return this.query(sql); + } + async update(table: string, props: UpdateProps) { + if (props.values.length === 0) { + return { + rowCount: 0 + }; + } + + const sql = `UPDATE ${table} SET ${this.getUpdateValStr(props.values)} ${this.getWhereStr( + props.where + )}`; + return this.query(sql); + } + async insert(table: string, props: InsertProps) { + if (props.values.length === 0) { + return { + rowCount: 0, + rows: [] + }; + } + + const fields = props.values[0].map((item) => item.key).join(','); + const sql = `INSERT INTO ${table} (${fields}) VALUES ${this.getInsertValStr( + props.values + )} RETURNING id`; + + return this.query<{ id: string }>(sql); + } +} + +export const OgClient = new OgClass(); diff --git a/packages/service/common/vectorDB/opengauss/index.ts b/packages/service/common/vectorDB/opengauss/index.ts new file mode 100644 index 0000000000..86599c5ba2 --- /dev/null +++ b/packages/service/common/vectorDB/opengauss/index.ts @@ -0,0 +1,208 @@ +/* openGauss DataVec vector crud */ +import { DatasetVectorTableName } from '../constants'; +import { OgClient, connectOg } from './controller'; +import type { VectorControllerType } from '../type'; +import dayjs from 'dayjs'; +import { getLogger, LogCategories } from '../../logger'; + +const logger = getLogger(LogCategories.INFRA.VECTOR); + +export class OpenGaussVectorCtrl implements VectorControllerType { + constructor() {} + init = async () => { + try { + await connectOg(); + await OgClient.query(` + CREATE TABLE IF NOT EXISTS ${DatasetVectorTableName} ( + id BIGSERIAL PRIMARY KEY, + vector VECTOR(1536) NOT NULL, + team_id VARCHAR(50) NOT NULL, + dataset_id VARCHAR(50) NOT NULL, + collection_id VARCHAR(50) NOT NULL, + createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + `); + + await OgClient.query( + `CREATE INDEX CONCURRENTLY IF NOT EXISTS vector_index ON ${DatasetVectorTableName} USING hnsw (vector vector_ip_ops) WITH (m = 32, ef_construction = 128);` + ); + await OgClient.query( + `CREATE INDEX CONCURRENTLY IF NOT EXISTS team_dataset_collection_index ON ${DatasetVectorTableName} USING btree(team_id, dataset_id, collection_id);` + ); + await OgClient.query( + `CREATE INDEX CONCURRENTLY IF NOT EXISTS create_time_index ON ${DatasetVectorTableName} USING btree(createtime);` + ); + + logger.info('openGauss DataVec vector initialization completed'); + } catch (error) { + logger.error('openGauss DataVec vector initialization failed', { error }); + } + }; + insert: VectorControllerType['insert'] = async (props) => { + const { teamId, datasetId, collectionId, vectors } = props; + + const values = vectors.map((vector) => [ + { key: 'vector', value: `[${vector}]` }, + { key: 'team_id', value: String(teamId) }, + { key: 'dataset_id', value: String(datasetId) }, + { key: 'collection_id', value: String(collectionId) } + ]); + + const { rowCount, rows } = await OgClient.insert(DatasetVectorTableName, { + values + }); + + if (rowCount === 0) { + return Promise.reject('insertDatasetData: no insert'); + } + + return { + insertIds: rows.map((row) => row.id) + }; + }; + delete: VectorControllerType['delete'] = async (props) => { + const { teamId } = props; + + const teamIdWhere = `team_id='${String(teamId)}' AND`; + + const where = await (() => { + if ('id' in props && props.id) return `${teamIdWhere} id=${props.id}`; + + if ('datasetIds' in props && props.datasetIds) { + const datasetIdWhere = `dataset_id IN (${props.datasetIds + .map((id) => `'${String(id)}'`) + .join(',')})`; + + if ('collectionIds' in props && props.collectionIds) { + return `${teamIdWhere} ${datasetIdWhere} AND collection_id IN (${props.collectionIds + .map((id) => `'${String(id)}'`) + .join(',')})`; + } + + return `${teamIdWhere} ${datasetIdWhere}`; + } + + if ('idList' in props && Array.isArray(props.idList)) { + if (props.idList.length === 0) return; + return `${teamIdWhere} id IN (${props.idList.map((id) => String(id)).join(',')})`; + } + return Promise.reject('deleteDatasetData: no where'); + })(); + + if (!where) return; + + await OgClient.delete(DatasetVectorTableName, { + where: [where] + }); + }; + embRecall: VectorControllerType['embRecall'] = async (props) => { + const { teamId, datasetIds, vector, limit, forbidCollectionIdList, filterCollectionIdList } = + props; + + // Get forbid collection + const formatForbidCollectionIdList = (() => { + if (!filterCollectionIdList) return forbidCollectionIdList; + const list = forbidCollectionIdList + .map((id) => String(id)) + .filter((id) => !filterCollectionIdList.includes(id)); + return list; + })(); + const forbidCollectionSql = + formatForbidCollectionIdList.length > 0 + ? `AND collection_id NOT IN (${formatForbidCollectionIdList.map((id) => `'${id}'`).join(',')})` + : ''; + + // Filter by collectionId + const formatFilterCollectionId = (() => { + if (!filterCollectionIdList) return; + + return filterCollectionIdList + .map((id) => String(id)) + .filter((id) => !forbidCollectionIdList.includes(id)); + })(); + const filterCollectionIdSql = formatFilterCollectionId + ? `AND collection_id IN (${formatFilterCollectionId.map((id) => `'${id}'`).join(',')})` + : ''; + // Empty data + if (formatFilterCollectionId && formatFilterCollectionId.length === 0) { + return { results: [] }; + } + + const results: any = await OgClient.query( + `BEGIN; + SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100}; + SELECT id, collection_id, vector <#> '[${vector}]' AS score + FROM ${DatasetVectorTableName} + WHERE dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')}) + ${filterCollectionIdSql} + ${forbidCollectionSql} + ORDER BY score LIMIT ${limit}; + COMMIT;` + ); + const rows = results?.[results.length - 2]?.rows as { + id: string; + collection_id: string; + score: number; + }[]; + + if (!Array.isArray(rows)) { + return { + results: [] + }; + } + + return { + results: rows.map((item) => ({ + id: String(item.id), + collectionId: item.collection_id, + score: item.score * -1 + })) + }; + }; + + getVectorDataByTime: VectorControllerType['getVectorDataByTime'] = async (start, end) => { + const { rows } = await OgClient.query<{ + id: string; + team_id: string; + dataset_id: string; + }>(`SELECT id, team_id, dataset_id + FROM ${DatasetVectorTableName} + WHERE createtime BETWEEN '${dayjs(start).format('YYYY-MM-DD HH:mm:ss')}' AND '${dayjs( + end + ).format('YYYY-MM-DD HH:mm:ss')}'; + `); + + return rows.map((item) => ({ + id: String(item.id), + teamId: item.team_id, + datasetId: item.dataset_id + })); + }; + getVectorCount: VectorControllerType['getVectorCount'] = async (props) => { + const { teamId, datasetId, collectionId } = props; + + // Build where conditions dynamically + const whereConditions: any[] = []; + + if (teamId) { + whereConditions.push(['team_id', String(teamId)]); + } + + if (datasetId) { + if (whereConditions.length > 0) whereConditions.push('and'); + whereConditions.push(['dataset_id', String(datasetId)]); + } + + if (collectionId) { + if (whereConditions.length > 0) whereConditions.push('and'); + whereConditions.push(['collection_id', String(collectionId)]); + } + + // If no conditions provided, count all + const total = await OgClient.count(DatasetVectorTableName, { + where: whereConditions.length > 0 ? whereConditions : undefined + }); + + return total; + }; +} diff --git a/packages/service/type/env.ts b/packages/service/type/env.ts index b75f9c4203..6077902302 100644 --- a/packages/service/type/env.ts +++ b/packages/service/type/env.ts @@ -21,6 +21,7 @@ declare global { // Vector VECTOR_VQ_LEVEL: string; PG_URL: string; + OPENGAUSS_URL: string; OCEANBASE_URL: string; SEEKDB_URL: string; MILVUS_ADDRESS: string; diff --git a/projects/app/.env.template b/projects/app/.env.template index f9c9951a6e..c828c09134 100644 --- a/projects/app/.env.template +++ b/projects/app/.env.template @@ -108,7 +108,7 @@ MONGODB_URI="mongodb://myusername:mypassword@localhost:27017/fastgpt?authSource= # 日志库 MONGODB_LOG_URI="mongodb://myusername:mypassword@localhost:27017/fastgpt?authSource=admin&directConnection=true" -# 向量库优先级: pg > oceanbase > milvus +# 向量库优先级: pg > oceanbase > milvus > opengauss # 向量量化等级: PG 支持 32/16,OceanBase 支持 32/8/1 VECTOR_VQ_LEVEL=32 @@ -119,6 +119,8 @@ PG_URL=postgresql://username:password@localhost:5432/postgres # Milvus 向量库连接参数 # MILVUS_ADDRESS= # MILVUS_TOKEN= +# openGauss 向量库连接参数 +# OPENGAUSS_URL=postgresql://gaussdb:FastGPT@123@localhost:5432/fastgpt # ==================== 域名与前端 ==================== # 页面地址,用于自动补全相对路径资源的 domain(注意结尾不要带 /) diff --git a/test/.env.test.tempalte b/test/.env.test.tempalte index 1ee228e964..c9e2a42726 100644 --- a/test/.env.test.tempalte +++ b/test/.env.test.tempalte @@ -7,4 +7,6 @@ PG_URL=postgresql://username:password@localhost:6001/postgres SEEKDB_URL=mysql://root:seekdbpassword@127.0.0.1:6003/mysql # Milvus vector database connection MILVUS_ADDRESS=http://localhost:6002 -MILVUS_TOKEN= \ No newline at end of file +MILVUS_TOKEN= +# openGauss vector database connection +OPENGAUSS_URL=postgresql://gaussdb:FastGPT@123@localhost:5432/fastgpt \ No newline at end of file diff --git a/test/integrationTest/vectorDB/README.md b/test/integrationTest/vectorDB/README.md index dcb62e803f..2937a848c4 100644 --- a/test/integrationTest/vectorDB/README.md +++ b/test/integrationTest/vectorDB/README.md @@ -18,6 +18,7 @@ cp test/.env.test.template test/.env.test.local | `PG_URL` | PostgreSQL + pgvector 连接串 | PgVectorCtrl | | `OCEANBASE_URL` | Oceanbase 连接串(后续) | ObVectorCtrl | | `MILVUS_ADDRESS` | Milvus 地址(后续) | MilvusCtrl | +| `OPENGAUSS_URL` | openGauss DataVec 连接串 | OpenGaussVectorCtrl | 未设置对应环境变量时,该驱动的集成测试会**整体跳过**,不会报错。 diff --git a/test/integrationTest/vectorDB/opengauss/index.integration.test.ts b/test/integrationTest/vectorDB/opengauss/index.integration.test.ts new file mode 100644 index 0000000000..0ba5e9c354 --- /dev/null +++ b/test/integrationTest/vectorDB/opengauss/index.integration.test.ts @@ -0,0 +1,15 @@ +import { describe, vi } from 'vitest'; +import { createVectorDBTestSuite } from '../testSuites'; + +// Unmock vector controllers for integration tests +vi.unmock('@fastgpt/service/common/vectorDB/opengauss'); +vi.unmock('@fastgpt/service/common/vectorDB/constants'); + +import { OpenGaussVectorCtrl } from '@fastgpt/service/common/vectorDB/opengauss'; + +const isEnabled = Boolean(process.env.OPENGAUSS_URL); + +describe.skipIf(!isEnabled)('OpenGauss Vector Integration', () => { + const vectorCtrl = new OpenGaussVectorCtrl(); + createVectorDBTestSuite(vectorCtrl); +});