mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
docs
This commit is contained in:
BIN
files/deploy/fastgpt/clash/Country.mmdb
Normal file
BIN
files/deploy/fastgpt/clash/Country.mmdb
Normal file
Binary file not shown.
BIN
files/deploy/fastgpt/clash/clash-linux-amd64-v3
Normal file
BIN
files/deploy/fastgpt/clash/clash-linux-amd64-v3
Normal file
Binary file not shown.
22
files/deploy/fastgpt/clash/config.yaml
Normal file
22
files/deploy/fastgpt/clash/config.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
mixed-port: 7890
|
||||
allow-lan: false
|
||||
bind-address: '*'
|
||||
mode: rule
|
||||
log-level: warning
|
||||
dns:
|
||||
enable: true
|
||||
ipv6: false
|
||||
nameserver:
|
||||
- 8.8.8.8
|
||||
- 8.8.4.4
|
||||
cache-size: 400
|
||||
proxies:
|
||||
|
||||
proxy-groups:
|
||||
- { name: '♻️ 自动选择', type: url-test, proxies: [香港V02×1.5,ABC, 印度01, 台湾03, 新加坡02, 新加坡03, 日本01, 日本02, 新加坡01, 美国01, 美国02, 台湾01, 台湾02], url: 'https://api.openai.com', interval: 3600}
|
||||
rules:
|
||||
- 'DOMAIN-SUFFIX,google.com,♻️ 自动选择'
|
||||
- 'DOMAIN-SUFFIX,ai.fastgpt.run,♻️ 自动选择'
|
||||
- 'DOMAIN-SUFFIX,openai.com,♻️ 自动选择'
|
||||
- 'DOMAIN-SUFFIX,api.openai.com,♻️ 自动选择'
|
||||
- 'MATCH,DIRECT'
|
18
files/deploy/fastgpt/clash/proxy.sh
Normal file
18
files/deploy/fastgpt/clash/proxy.sh
Normal file
@@ -0,0 +1,18 @@
|
||||
export ALL_PROXY=socks5://127.0.0.1:7891
|
||||
export http_proxy=http://127.0.0.1:7890
|
||||
export https_proxy=http://127.0.0.1:7890
|
||||
export HTTP_PROXY=http://127.0.0.1:7890
|
||||
export HTTPS_PROXY=http://127.0.0.1:7890
|
||||
|
||||
OLD_PROCESS=$(pgrep clash)
|
||||
if [ ! -z "$OLD_PROCESS" ]; then
|
||||
echo "Killing old process: $OLD_PROCESS"
|
||||
kill $OLD_PROCESS
|
||||
fi
|
||||
sleep 2
|
||||
|
||||
cd /root/fastgpt/clash/fast
|
||||
rm -f ./nohup.out || true
|
||||
rm -f ./cache.db || true
|
||||
nohup ./clash-linux-amd64-v3 -d ./ &
|
||||
echo "Restart clash fast"
|
10
files/deploy/fastgpt/clash/stop.sh
Normal file
10
files/deploy/fastgpt/clash/stop.sh
Normal file
@@ -0,0 +1,10 @@
|
||||
export ALL_PROXY=''
|
||||
export http_proxy=''
|
||||
export https_proxy=''
|
||||
export HTTP_PROXY=''
|
||||
export HTTPS_PROXY=''
|
||||
OLD_PROCESS=$(pgrep clash)
|
||||
if [ ! -z "$OLD_PROCESS" ]; then
|
||||
echo "Killing old process: $OLD_PROCESS"
|
||||
kill $OLD_PROCESS
|
||||
fi
|
256
files/deploy/fastgpt/docker-compose.yml
Normal file
256
files/deploy/fastgpt/docker-compose.yml
Normal file
@@ -0,0 +1,256 @@
|
||||
# 非 host 版本, 不使用本机代理
|
||||
version: '3.3'
|
||||
services:
|
||||
pg:
|
||||
# image: ankane/pgvector:v0.4.2 # dockerhub
|
||||
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.4.2 # 阿里云
|
||||
container_name: pg
|
||||
restart: always
|
||||
ports: # 生产环境建议不要暴露
|
||||
- 8100:5432
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
# 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果
|
||||
- POSTGRES_USER=fastgpt
|
||||
- POSTGRES_PASSWORD=1234
|
||||
- POSTGRES_DB=fastgpt
|
||||
volumes:
|
||||
# 刚创建的文件
|
||||
- ./pg/init.sql:/docker-entrypoint-initdb.d/init.sh
|
||||
- ./pg/data:/var/lib/postgresql/data
|
||||
mongo:
|
||||
# image: mongo:5.0.18
|
||||
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mongo:5.0.18 # 阿里云
|
||||
container_name: mongo
|
||||
restart: always
|
||||
ports: # 生产环境建议不要暴露
|
||||
- 27017:27017
|
||||
networks:
|
||||
- fastgpt
|
||||
environment:
|
||||
# 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果
|
||||
- MONGO_INITDB_ROOT_USERNAME=username
|
||||
- MONGO_INITDB_ROOT_PASSWORD=password
|
||||
volumes:
|
||||
- ./mongo/data:/data/db
|
||||
- ./mongo/logs:/var/log/mongodb
|
||||
fastgpt:
|
||||
container_name: fastgpt
|
||||
# image: c121914yu/fast-gpt:latest # docker hub
|
||||
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:latest # 阿里云
|
||||
# network_mode: host #
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
- fastgpt
|
||||
depends_on:
|
||||
- mongo
|
||||
- pg
|
||||
restart: always
|
||||
environment: # 可选的变量,不需要的话需要去掉
|
||||
- PORT=3000 # 运行的端口地址,如果不是 3000,需要修改成实际地址。
|
||||
- DB_MAX_LINK=5 # database max link
|
||||
# proxy(可选)
|
||||
- AXIOS_PROXY_HOST=127.0.0.1
|
||||
- AXIOS_PROXY_PORT=7890
|
||||
# 发送邮箱验证码配置。用的是QQ邮箱。参考 nodeMail 获取MAILE_CODE,自行百度。
|
||||
- MY_MAIL=54545@qq.com
|
||||
- MAILE_CODE=1234
|
||||
# 阿里短信服务(邮箱和短信至少二选一)
|
||||
- aliAccessKeyId=xxxx
|
||||
- aliAccessKeySecret=xxxx
|
||||
- aliSignName=xxxxx
|
||||
- aliTemplateCode=SMS_xxxx
|
||||
# google V3 安全校验(可选)
|
||||
- CLIENT_GOOGLE_VER_TOKEN=xxx
|
||||
- SERVICE_GOOGLE_VER_TOKEN=xx
|
||||
# token加密凭证(随便填,作为登录凭证)
|
||||
- TOKEN_KEY=any
|
||||
# root key, 最高权限,可以内部接口互相调用
|
||||
- ROOT_KEY=root_key
|
||||
# 和上方mongo镜像的username,password对应
|
||||
- MONGODB_URI=mongodb://username:password@mongo:27017/?authSource=admin
|
||||
- MONGODB_NAME=fastgpt
|
||||
- PG_HOST=pg
|
||||
- PG_PORT=5432
|
||||
# 和上方PG镜像对应.
|
||||
- PG_USER=fastgpt
|
||||
- PG_PASSWORD=1234
|
||||
- PG_DB_NAME=fastgpt
|
||||
# oneapi 配置 推荐使用 one-api 管理key
|
||||
- ONEAPI_URL=https://xxxxx.cloud.sealos.io/v1
|
||||
- ONEAPI_KEY=sk-xxxxxx
|
||||
# openai 相关配置:使用了 oneapi 后,下面只需要填下 OPENAI_BASE_URL (国外可全忽略)
|
||||
- OPENAIKEY=sk-xxxxx
|
||||
- OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
- OPENAI_BASE_URL_AUTH=可选的安全凭证,会放到 header.auth 里
|
||||
fastgpt-admin:
|
||||
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-admin:latest
|
||||
container_name: fastgpt-admin
|
||||
restart: always
|
||||
ports:
|
||||
- 3001:3001
|
||||
networks:
|
||||
- fastgpt
|
||||
depends_on:
|
||||
- mongo
|
||||
- fastgpt
|
||||
environment:
|
||||
- MONGODB_URI=mongodb://username:password@mongo:27017/?authSource=admin
|
||||
- MONGODB_NAME=fastgpt
|
||||
- ADMIN_USER=username
|
||||
- ADMIN_PASS=password
|
||||
- ADMIN_SECRET=any
|
||||
- PARENT_URL=http://fastgpt:3000
|
||||
- PARENT_ROOT_KEY=root_key
|
||||
keyadmin:
|
||||
container_name: keyadmin
|
||||
image: justsong/one-api
|
||||
restart: always
|
||||
ports:
|
||||
- 3002:3000
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
volumes:
|
||||
- /keyadmin:/data
|
||||
nginx: # 宝塔不需要额外装 nginx
|
||||
image: registry.cn-hangzhou.aliyuncs.com/fastgpt/nginx:alpine3.17
|
||||
# image: nginx:alpine3.17
|
||||
container_name: nginx
|
||||
restart: always
|
||||
network_mode: host
|
||||
volumes:
|
||||
# 刚创建的文件
|
||||
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
- ./nginx/logs:/var/log/nginx
|
||||
# https证书,没有的话不填,对应的nginx.conf也要修改
|
||||
- ./nginx/ssl/docgpt.key:/ssl/docgpt.key
|
||||
- ./nginx/ssl/docgpt.pem:/ssl/docgpt.pem
|
||||
networks:
|
||||
fastgpt:
|
||||
# host 版本, 不推荐,推荐直接用上面的,用个 BASE_URL 中转
|
||||
# version: '3.3'
|
||||
# services:
|
||||
# pg:
|
||||
# # image: ankane/pgvector:v0.4.2 # dockerhub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.4.2 # 阿里云
|
||||
# container_name: pg
|
||||
# restart: always
|
||||
# ports: # 生产环境建议不要暴露
|
||||
# - 8100:5432
|
||||
# networks:
|
||||
# - fastgpt
|
||||
# environment:
|
||||
# # 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果
|
||||
# - POSTGRES_USER=fastgpt
|
||||
# - POSTGRES_PASSWORD=1234
|
||||
# - POSTGRES_DB=fastgpt
|
||||
# volumes:
|
||||
# # 刚创建的文件
|
||||
# - ./pg/init.sql:/docker-entrypoint-initdb.d/init.sh
|
||||
# - ./pg/data:/var/lib/postgresql/data
|
||||
# mongo:
|
||||
# # image: mongo:5.0.18
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/mongo:5.0.18 # 阿里云
|
||||
# container_name: mongo
|
||||
# restart: always
|
||||
# ports: # 生产环境建议不要暴露
|
||||
# - 27017:27017
|
||||
# networks:
|
||||
# - fastgpt
|
||||
# environment:
|
||||
# # 这里的配置只有首次运行生效。修改后,重启镜像是不会生效的。需要把持久化数据删除再重启,才有效果
|
||||
# - MONGO_INITDB_ROOT_USERNAME=username
|
||||
# - MONGO_INITDB_ROOT_PASSWORD=password
|
||||
# volumes:
|
||||
# - ./mongo/data:/data/db
|
||||
# - ./mongo/logs:/var/log/mongodb
|
||||
# fastgpt:
|
||||
# # image: ghcr.io/c121914yu/fastgpt:latest # github
|
||||
# # image: c121914yu/fast-gpt:latest # docker hub
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:latest # 阿里云
|
||||
# network_mode: host
|
||||
# restart: always
|
||||
# container_name: fastgpt
|
||||
# environment: # 可选的变量,不需要的话需要去掉
|
||||
# - PORT=3000 # 运行的端口地址,如果不是 3000,需要修改成实际地址。
|
||||
# - DB_MAX_LINK=15 # database max link
|
||||
# # proxy(可选)
|
||||
# - AXIOS_PROXY_HOST=127.0.0.1
|
||||
# - AXIOS_PROXY_PORT=7890
|
||||
# # 发送邮箱验证码配置。用的是QQ邮箱。参考 nodeMail 获取MAILE_CODE,自行百度。
|
||||
# - MY_MAIL=54545@qq.com
|
||||
# - MAILE_CODE=1234
|
||||
# # 阿里短信服务(邮箱和短信至少二选一)
|
||||
# - aliAccessKeyId=xxxx
|
||||
# - aliAccessKeySecret=xxxx
|
||||
# - aliSignName=xxxxx
|
||||
# - aliTemplateCode=SMS_xxxx
|
||||
# # google V3 安全校验(可选)
|
||||
# - CLIENT_GOOGLE_VER_TOKEN=xxx
|
||||
# - SERVICE_GOOGLE_VER_TOKEN=xx
|
||||
# # token加密凭证(随便填,作为登录凭证)
|
||||
# - TOKEN_KEY=xxxx
|
||||
# # root key, 最高权限,可以内部接口互相调用
|
||||
# - ROOT_KEY=xxx
|
||||
# # 和上方mongo镜像的username,password对应
|
||||
# - MONGODB_URI=mongodb://username:password@0.0.0.0:27017/?authSource=admin
|
||||
# - MONGODB_NAME=fastgpt
|
||||
# - PG_HOST=0.0.0.0
|
||||
# - PG_PORT=8100
|
||||
# # 和上方PG镜像对应.
|
||||
# - PG_USER=fastgpt
|
||||
# - PG_PASSWORD=1234
|
||||
# - PG_DB_NAME=fastgpt
|
||||
# # oneapi 配置 推荐使用 one-api 管理key
|
||||
# - ONEAPI_URL=https://xxxxx.cloud.sealos.io/v1
|
||||
# - ONEAPI_KEY=sk-xxxxx
|
||||
# # openai 相关配置:使用了 oneapi 后,下面只需要填下 OPENAI_BASE_URL (国外可全忽略)
|
||||
# - OPENAIKEY=sk-xxxxx
|
||||
# - OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
# - OPENAI_BASE_URL_AUTH=可选的安全凭证,会放到 header.auth 里
|
||||
# fastgpt-admin:
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-admin:latest
|
||||
# container_name: fastgpt-admin
|
||||
# restart: always
|
||||
# ports:
|
||||
# - 3001:3001
|
||||
# networks:
|
||||
# - fastgpt
|
||||
# depends_on:
|
||||
# - mongo
|
||||
# - fastgpt
|
||||
# environment:
|
||||
# - MONGODB_URI=mongodb://username:password@mongo:27017/?authSource=admin
|
||||
# - MONGODB_NAME=fastgpt
|
||||
# - ADMIN_USER=username
|
||||
# - ADMIN_PASS=password
|
||||
# - ADMIN_SECRET=any
|
||||
# - PARENT_URL=http://fastgpt:3000
|
||||
# - PARENT_ROOT_KEY=root_key
|
||||
# key-admin:
|
||||
# container_name: key-admin
|
||||
# image: justsong/one-api
|
||||
# restart: always
|
||||
# ports:
|
||||
# - 3002:3000
|
||||
# environment:
|
||||
# - TZ=Asia/Shanghai
|
||||
# volumes:
|
||||
# - /home/ubuntu/data/one-api:/data
|
||||
# nginx: # 宝塔不需要额外装 nginx
|
||||
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/nginx:alpine3.17
|
||||
# # image: nginx:alpine3.17
|
||||
# container_name: nginx
|
||||
# restart: always
|
||||
# network_mode: host
|
||||
# volumes:
|
||||
# # 刚创建的文件
|
||||
# - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
# - ./nginx/logs:/var/log/nginx
|
||||
# # https证书,没有的话不填,对应的nginx.conf也要修改
|
||||
# - ./nginx/ssl/docgpt.key:/ssl/docgpt.key
|
||||
# - ./nginx/ssl/docgpt.pem:/ssl/docgpt.pem
|
||||
# networks:
|
||||
# fastgpt:
|
BIN
files/deploy/fastgpt/docker-compose/docker-compose
Normal file
BIN
files/deploy/fastgpt/docker-compose/docker-compose
Normal file
Binary file not shown.
2
files/deploy/fastgpt/docker-compose/init.sh
Normal file
2
files/deploy/fastgpt/docker-compose/init.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
cp ./docker-compose /usr/local/bin/docker-compose
|
||||
sudo chmod +x /usr/local/bin/docker-compose
|
69
files/deploy/fastgpt/nginx/nginx.conf
Normal file
69
files/deploy/fastgpt/nginx/nginx.conf
Normal file
@@ -0,0 +1,69 @@
|
||||
user nginx;
|
||||
worker_processes auto;
|
||||
worker_rlimit_nofile 51200;
|
||||
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
|
||||
http {
|
||||
resolver 8.8.8.8;
|
||||
proxy_ssl_server_name on;
|
||||
|
||||
access_log off;
|
||||
server_names_hash_bucket_size 512;
|
||||
client_header_buffer_size 64k;
|
||||
large_client_header_buffers 4 64k;
|
||||
client_max_body_size 50M;
|
||||
|
||||
proxy_connect_timeout 240s;
|
||||
proxy_read_timeout 240s;
|
||||
proxy_buffer_size 128k;
|
||||
proxy_buffers 4 256k;
|
||||
|
||||
gzip on;
|
||||
gzip_min_length 1k;
|
||||
gzip_buffers 4 8k;
|
||||
gzip_http_version 1.1;
|
||||
gzip_comp_level 6;
|
||||
gzip_vary on;
|
||||
gzip_types text/plain application/x-javascript text/css application/javascript application/json application/xml;
|
||||
gzip_disable "MSIE [1-6]\.";
|
||||
|
||||
open_file_cache max=1000 inactive=1d;
|
||||
open_file_cache_valid 30s;
|
||||
open_file_cache_min_uses 8;
|
||||
open_file_cache_errors off;
|
||||
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name docgpt.ahapocket.cn;
|
||||
ssl_certificate /ssl/docgpt.pem;
|
||||
ssl_certificate_key /ssl/docgpt.key;
|
||||
ssl_session_timeout 5m;
|
||||
|
||||
location / {
|
||||
proxy_pass http://localhost:3000;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
}
|
||||
}
|
||||
server {
|
||||
listen 80;
|
||||
server_name docgpt.ahapocket.cn;
|
||||
rewrite ^(.*) https://$server_name$1 permanent;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 3000;
|
||||
server_name 120.0.0.0;
|
||||
|
||||
location / {
|
||||
proxy_pass http://localhost:3000;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
}
|
||||
}
|
||||
}
|
21
files/deploy/fastgpt/pg/init.sql
Normal file
21
files/deploy/fastgpt/pg/init.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
set -e
|
||||
psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
-- init table
|
||||
CREATE TABLE IF NOT EXISTS modeldata (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
vector VECTOR(1536) NOT NULL,
|
||||
user_id VARCHAR(50) NOT NULL,
|
||||
kb_id VARCHAR(50) NOT NULL,
|
||||
source VARCHAR(100),
|
||||
q TEXT NOT NULL,
|
||||
a TEXT NOT NULL
|
||||
);
|
||||
-- 索引设置,按需取
|
||||
-- CREATE INDEX IF NOT EXISTS modeldata_userId_index ON modeldata USING HASH (user_id);
|
||||
-- CREATE INDEX IF NOT EXISTS modeldata_kbId_index ON modeldata USING HASH (kb_id);
|
||||
-- CREATE INDEX IF NOT EXISTS idx_model_data_md5_q_a_user_id_kb_id ON modeldata (md5(q), md5(a), user_id, kb_id);
|
||||
-- CREATE INDEX modeldata_id_desc_idx ON modeldata (id DESC);
|
||||
-- vector 索引,可以参考 [pg vector](https://github.com/pgvector/pgvector) 去配置,根据数据量去配置
|
||||
EOSQL
|
19
files/deploy/fastgpt/run.sh
Normal file
19
files/deploy/fastgpt/run.sh
Normal file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
docker-compose pull
|
||||
docker-compose up -d
|
||||
|
||||
echo "Docker Compose 重新拉取镜像完成!"
|
||||
|
||||
# 删除本地旧镜像
|
||||
images=$(docker images --format "{{.ID}} {{.Repository}}" | grep fastgpt)
|
||||
|
||||
# 将镜像 ID 和名称放入数组中
|
||||
IFS=$'\n' read -rd '' -a image_array <<<"$images"
|
||||
|
||||
# 遍历数组并删除所有旧的镜像
|
||||
for ((i=1; i<${#image_array[@]}; i++))
|
||||
do
|
||||
image=${image_array[$i]}
|
||||
image_id=${image%% *}
|
||||
docker rmi $image_id
|
||||
done
|
243
files/models/ChatGLM2/openai_api.py
Normal file
243
files/models/ChatGLM2/openai_api.py
Normal file
@@ -0,0 +1,243 @@
|
||||
# coding=utf-8
|
||||
import time
|
||||
import torch
|
||||
import uvicorn
|
||||
from pydantic import BaseModel, Field
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import List, Literal, Optional, Union
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
from fastapi import Depends, HTTPException, Request
|
||||
from starlette.status import HTTP_401_UNAUTHORIZED
|
||||
import argparse
|
||||
import tiktoken
|
||||
import numpy as np
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI): # collects GPU memory
|
||||
yield
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.ipc_collect()
|
||||
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
role: Literal["user", "assistant", "system"]
|
||||
content: str
|
||||
|
||||
|
||||
class DeltaMessage(BaseModel):
|
||||
role: Optional[Literal["user", "assistant", "system"]] = None
|
||||
content: Optional[str] = None
|
||||
|
||||
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages: List[ChatMessage]
|
||||
temperature: Optional[float] = None
|
||||
top_p: Optional[float] = None
|
||||
max_length: Optional[int] = None
|
||||
stream: Optional[bool] = False
|
||||
|
||||
|
||||
class ChatCompletionResponseChoice(BaseModel):
|
||||
index: int
|
||||
message: ChatMessage
|
||||
finish_reason: Literal["stop", "length"]
|
||||
|
||||
|
||||
class ChatCompletionResponseStreamChoice(BaseModel):
|
||||
index: int
|
||||
delta: DeltaMessage
|
||||
finish_reason: Optional[Literal["stop", "length"]]
|
||||
|
||||
|
||||
class ChatCompletionResponse(BaseModel):
|
||||
model: str
|
||||
object: Literal["chat.completion", "chat.completion.chunk"]
|
||||
choices: List[Union[ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice]]
|
||||
created: Optional[int] = Field(default_factory=lambda: int(time.time()))
|
||||
|
||||
async def verify_token(request: Request):
|
||||
auth_header = request.headers.get('Authorization')
|
||||
if auth_header:
|
||||
token_type, _, token = auth_header.partition(' ')
|
||||
if token_type.lower() == "bearer" and token == "sk-aaabbbcccdddeeefffggghhhiiijjjkkk": # 这里配置你的token
|
||||
return True
|
||||
raise HTTPException(
|
||||
status_code=HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid authorization credentials",
|
||||
)
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: List[str]
|
||||
model: str
|
||||
|
||||
class EmbeddingResponse(BaseModel):
|
||||
data: list
|
||||
model: str
|
||||
object: str
|
||||
usage: dict
|
||||
|
||||
def num_tokens_from_string(string: str) -> int:
|
||||
"""Returns the number of tokens in a text string."""
|
||||
encoding = tiktoken.get_encoding('cl100k_base')
|
||||
num_tokens = len(encoding.encode(string))
|
||||
return num_tokens
|
||||
|
||||
def expand_features(embedding, target_length):
|
||||
poly = PolynomialFeatures(degree=2)
|
||||
expanded_embedding = poly.fit_transform(embedding.reshape(1, -1))
|
||||
expanded_embedding = expanded_embedding.flatten()
|
||||
if len(expanded_embedding) > target_length:
|
||||
# 如果扩展后的特征超过目标长度,可以通过截断或其他方法来减少维度
|
||||
expanded_embedding = expanded_embedding[:target_length]
|
||||
elif len(expanded_embedding) < target_length:
|
||||
# 如果扩展后的特征少于目标长度,可以通过填充或其他方法来增加维度
|
||||
expanded_embedding = np.pad(expanded_embedding, (0, target_length - len(expanded_embedding)))
|
||||
return expanded_embedding
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
|
||||
async def create_chat_completion(request: ChatCompletionRequest, token: bool = Depends(verify_token)):
|
||||
global model, tokenizer
|
||||
|
||||
if request.messages[-1].role != "user":
|
||||
raise HTTPException(status_code=400, detail="Invalid request")
|
||||
query = request.messages[-1].content
|
||||
|
||||
prev_messages = request.messages[:-1]
|
||||
if len(prev_messages) > 0 and prev_messages[0].role == "system":
|
||||
query = prev_messages.pop(0).content + query
|
||||
|
||||
history = []
|
||||
if len(prev_messages) % 2 == 0:
|
||||
for i in range(0, len(prev_messages), 2):
|
||||
if prev_messages[i].role == "user" and prev_messages[i+1].role == "assistant":
|
||||
history.append([prev_messages[i].content, prev_messages[i+1].content])
|
||||
|
||||
if request.stream:
|
||||
generate = predict(query, history, request.model)
|
||||
return EventSourceResponse(generate, media_type="text/event-stream")
|
||||
|
||||
response, _ = model.chat(tokenizer, query, history=history)
|
||||
choice_data = ChatCompletionResponseChoice(
|
||||
index=0,
|
||||
message=ChatMessage(role="assistant", content=response),
|
||||
finish_reason="stop"
|
||||
)
|
||||
|
||||
return ChatCompletionResponse(model=request.model, choices=[choice_data], object="chat.completion")
|
||||
|
||||
|
||||
async def predict(query: str, history: List[List[str]], model_id: str):
|
||||
global model, tokenizer
|
||||
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=0,
|
||||
delta=DeltaMessage(role="assistant"),
|
||||
finish_reason=None
|
||||
)
|
||||
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
|
||||
yield "{}".format(chunk.json(exclude_unset=True, ensure_ascii=False))
|
||||
|
||||
current_length = 0
|
||||
|
||||
for new_response, _ in model.stream_chat(tokenizer, query, history):
|
||||
if len(new_response) == current_length:
|
||||
continue
|
||||
|
||||
new_text = new_response[current_length:]
|
||||
current_length = len(new_response)
|
||||
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=0,
|
||||
delta=DeltaMessage(content=new_text),
|
||||
finish_reason=None
|
||||
)
|
||||
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
|
||||
yield "{}".format(chunk.json(exclude_unset=True, ensure_ascii=False))
|
||||
|
||||
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=0,
|
||||
delta=DeltaMessage(),
|
||||
finish_reason="stop"
|
||||
)
|
||||
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
|
||||
yield "{}".format(chunk.json(exclude_unset=True, ensure_ascii=False))
|
||||
yield '[DONE]'
|
||||
|
||||
@app.post("/v1/embeddings", response_model=EmbeddingResponse)
|
||||
async def get_embeddings(request: EmbeddingRequest, token: bool = Depends(verify_token)):
|
||||
|
||||
|
||||
# 计算嵌入向量和tokens数量
|
||||
embeddings = [embeddings_model.encode(text) for text in request.input]
|
||||
|
||||
# 如果嵌入向量的维度不为1536,则使用插值法扩展至1536维度
|
||||
embeddings = [expand_features(embedding, 1536) if len(embedding) < 1536 else embedding for embedding in embeddings]
|
||||
|
||||
# Min-Max normalization
|
||||
embeddings = [(embedding - np.min(embedding)) / (np.max(embedding) - np.min(embedding)) if np.max(embedding) != np.min(embedding) else embedding for embedding in embeddings]
|
||||
|
||||
# 将numpy数组转换为列表
|
||||
embeddings = [embedding.tolist() for embedding in embeddings]
|
||||
prompt_tokens = sum(len(text.split()) for text in request.input)
|
||||
total_tokens = sum(num_tokens_from_string(text) for text in request.input)
|
||||
|
||||
|
||||
response = {
|
||||
"data": [
|
||||
{
|
||||
"embedding": embedding,
|
||||
"index": index,
|
||||
"object": "embedding"
|
||||
} for index, embedding in enumerate(embeddings)
|
||||
],
|
||||
"model": request.model,
|
||||
"object": "list",
|
||||
"usage": {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"total_tokens": total_tokens,
|
||||
}
|
||||
}
|
||||
|
||||
return response
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model_name", default="16", type=str, help="Model name")
|
||||
args = parser.parse_args()
|
||||
|
||||
model_dict = {
|
||||
"4": "THUDM/chatglm2-6b-int4",
|
||||
"8": "THUDM/chatglm2-6b-int8",
|
||||
"16": "THUDM/chatglm2-6b"
|
||||
}
|
||||
|
||||
model_name = model_dict.get(args.model_name, "THUDM/chatglm2-6b")
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained(model_name, trust_remote_code=True).cuda()
|
||||
embeddings_model = SentenceTransformer('moka-ai/m3e-large',device='cpu')
|
||||
|
||||
uvicorn.run(app, host='0.0.0.0', port=6006, workers=1)
|
Reference in New Issue
Block a user