mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
Python Sandbox (#4380)
* Python3 Sandbox (#3944) * update python box (#4251) * update python box * Adjust the height of the NodeCode border. * update python sandbox and add test systemcall bash * update sandbox * add VERSION_RELEASE (#4376) * save empty docx * fix pythonbox log error * fix: js template --------- Co-authored-by: dogfar <37035781+dogfar@users.noreply.github.com> Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com> Co-authored-by: gggaaallleee <1293587368@qq.com>
This commit is contained in:
@@ -1,8 +1,22 @@
|
||||
# --------- install dependence -----------
|
||||
FROM python:3.11-alpine AS python_base
|
||||
|
||||
ENV VERSION_RELEASE = Alpine3.11
|
||||
# 安装make和g++
|
||||
RUN apk add --no-cache make g++
|
||||
RUN apk add --no-cache make g++ tar wget gperf automake libtool linux-headers
|
||||
|
||||
WORKDIR /app
|
||||
COPY projects/sandbox/requirements.txt /app/requirements.txt
|
||||
RUN wget https://github.com/seccomp/libseccomp/releases/download/v2.5.5/libseccomp-2.5.5.tar.gz && \
|
||||
tar -zxvf libseccomp-2.5.5.tar.gz && \
|
||||
cd libseccomp-2.5.5 && \
|
||||
./configure --prefix=/usr && \
|
||||
make && \
|
||||
make install && \
|
||||
pip install --no-cache-dir -i https://mirrors.aliyun.com/pypi/simple Cython && \
|
||||
pip install --no-cache-dir -i https://mirrors.aliyun.com/pypi/simple -r /app/requirements.txt && \
|
||||
cd src/python && \
|
||||
python setup.py install
|
||||
|
||||
|
||||
FROM node:20.14.0-alpine AS install
|
||||
|
||||
@@ -10,7 +24,7 @@ WORKDIR /app
|
||||
|
||||
ARG proxy
|
||||
RUN [ -z "$proxy" ] || sed -i 's/dl-cdn.alpinelinux.org/mirrors.ustc.edu.cn/g' /etc/apk/repositories
|
||||
RUN apk add --no-cache make g++
|
||||
RUN apk add --no-cache make g++ python3
|
||||
|
||||
# copy py3.11
|
||||
COPY --from=python_base /usr/local /usr/local
|
||||
@@ -42,9 +56,12 @@ RUN pnpm --filter=sandbox build
|
||||
FROM node:20.14.0-alpine AS runner
|
||||
WORKDIR /app
|
||||
|
||||
RUN apk add --no-cache libffi libffi-dev strace bash
|
||||
COPY --from=python_base /usr/local /usr/local
|
||||
COPY --from=builder /app/node_modules /app/node_modules
|
||||
COPY --from=builder /app/projects/sandbox /app/projects/sandbox
|
||||
|
||||
ENV NODE_ENV=production
|
||||
ENV PATH="/usr/local/bin:${PATH}"
|
||||
|
||||
CMD ["node", "--no-node-snapshot", "projects/sandbox/dist/main.js"]
|
||||
|
2
projects/sandbox/requirements.txt
Normal file
2
projects/sandbox/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
numpy
|
||||
pandas
|
130
projects/sandbox/src/sandbox/constants.ts
Normal file
130
projects/sandbox/src/sandbox/constants.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
export const pythonScript = `
|
||||
import subprocess
|
||||
import json
|
||||
import ast
|
||||
import base64
|
||||
|
||||
def extract_imports(code):
|
||||
tree = ast.parse(code)
|
||||
imports = []
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, (ast.Import, ast.ImportFrom)):
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
imports.append(f"import {alias.name}")
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
module = node.module
|
||||
for alias in node.names:
|
||||
imports.append(f"from {module} import {alias.name}")
|
||||
return imports
|
||||
seccomp_prefix = """
|
||||
from seccomp import *
|
||||
import sys
|
||||
allowed_syscalls = [
|
||||
"syscall.SYS_ARCH_PRCTL", "syscall.SYS_BRK", "syscall.SYS_CLONE",
|
||||
"syscall.SYS_CLOSE", "syscall.SYS_EPOLL_CREATE1", "syscall.SYS_EXECVE",
|
||||
"syscall.SYS_EXIT", "syscall.SYS_EXIT_GROUP", "syscall.SYS_FCNTL",
|
||||
"syscall.SYS_FSTAT", "syscall.SYS_FUTEX", "syscall.SYS_GETDENTS64",
|
||||
"syscall.SYS_GETEGID", "syscall.SYS_GETEUID", "syscall.SYS_GETGID",
|
||||
"syscall.SYS_GETRANDOM", "syscall.SYS_GETTID", "syscall.SYS_GETUID",
|
||||
"syscall.SYS_IOCTL", "syscall.SYS_LSEEK", "syscall.SYS_LSTAT",
|
||||
"syscall.SYS_MBIND", "syscall.SYS_MEMBARRIER", "syscall.SYS_MMAP",
|
||||
"syscall.SYS_MPROTECT", "syscall.SYS_MUNMAP", "syscall.SYS_OPEN",
|
||||
"syscall.SYS_PREAD64", "syscall.SYS_READ", "syscall.SYS_READLINK",
|
||||
"syscall.SYS_READV", "syscall.SYS_RT_SIGACTION", "syscall.SYS_RT_SIGPROCMASK",
|
||||
"syscall.SYS_SCHED_GETAFFINITY", "syscall.SYS_SET_TID_ADDRESS",
|
||||
"syscall.SYS_STAT", "syscall.SYS_UNAME",
|
||||
"syscall.SYS_MREMAP", "syscall.SYS_RT_SIGRETURN", "syscall.SYS_SETUID",
|
||||
"syscall.SYS_SETGID", "syscall.SYS_GETPID", "syscall.SYS_GETPPID",
|
||||
"syscall.SYS_TGKILL", "syscall.SYS_SCHED_YIELD", "syscall.SYS_SET_ROBUST_LIST",
|
||||
"syscall.SYS_GET_ROBUST_LIST", "syscall.SYS_RSEQ", "syscall.SYS_CLOCK_GETTIME",
|
||||
"syscall.SYS_GETTIMEOFDAY", "syscall.SYS_NANOSLEEP", "syscall.SYS_EPOLL_CTL",
|
||||
"syscall.SYS_CLOCK_NANOSLEEP", "syscall.SYS_PSELECT6", "syscall.SYS_TIME",
|
||||
"syscall.SYS_SIGALTSTACK", "syscall.SYS_MKDIRAT", "syscall.SYS_MKDIR"
|
||||
]
|
||||
allowed_syscalls_tmp = allowed_syscalls
|
||||
L = []
|
||||
for item in allowed_syscalls_tmp:
|
||||
item = item.strip()
|
||||
parts = item.split(".")[1][4:].lower()
|
||||
L.append(parts)
|
||||
f = SyscallFilter(defaction=KILL)
|
||||
for item in L:
|
||||
f.add_rule(ALLOW, item)
|
||||
f.add_rule(ALLOW, "write", Arg(0, EQ, sys.stdout.fileno()))
|
||||
f.add_rule(ALLOW, "write", Arg(0, EQ, sys.stderr.fileno()))
|
||||
f.add_rule(ALLOW, 307)
|
||||
f.add_rule(ALLOW, 318)
|
||||
f.add_rule(ALLOW, 334)
|
||||
f.load()
|
||||
"""
|
||||
|
||||
def remove_print_statements(code):
|
||||
class PrintRemover(ast.NodeTransformer):
|
||||
def visit_Expr(self, node):
|
||||
if (
|
||||
isinstance(node.value, ast.Call)
|
||||
and isinstance(node.value.func, ast.Name)
|
||||
and node.value.func.id == "print"
|
||||
):
|
||||
return None
|
||||
return node
|
||||
|
||||
tree = ast.parse(code)
|
||||
modified_tree = PrintRemover().visit(tree)
|
||||
ast.fix_missing_locations(modified_tree)
|
||||
return ast.unparse(modified_tree)
|
||||
|
||||
def detect_dangerous_imports(code):
|
||||
dangerous_modules = ["os", "sys", "subprocess", "shutil", "socket", "ctypes", "multiprocessing", "threading", "pickle"]
|
||||
tree = ast.parse(code)
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
if alias.name in dangerous_modules:
|
||||
return alias.name
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
if node.module in dangerous_modules:
|
||||
return node.module
|
||||
return None
|
||||
|
||||
def run_pythonCode(data:dict):
|
||||
if not data or "code" not in data or "variables" not in data:
|
||||
return {"error": "Invalid request format"}
|
||||
code = data["code"]
|
||||
code = remove_print_statements(code)
|
||||
dangerous_import = detect_dangerous_imports(code)
|
||||
if dangerous_import:
|
||||
return {"error": f"Importing {dangerous_import} is not allowed."}
|
||||
variables = data["variables"]
|
||||
imports = "\\n".join(extract_imports(code))
|
||||
var_def = ""
|
||||
output_code = "res = main("
|
||||
for k, v in variables.items():
|
||||
if isinstance(v, str):
|
||||
one_var = k + " = \\"" + v + "\\"\\n"
|
||||
else:
|
||||
one_var = k + " = " + str(v) + "\\n"
|
||||
var_def = var_def + one_var
|
||||
output_code = output_code + k + ", "
|
||||
if output_code[-1] == "(":
|
||||
output_code = output_code + ")\\n"
|
||||
else:
|
||||
output_code = output_code[:-2] + ")\\n"
|
||||
output_code = output_code + "print(res)"
|
||||
code = imports + "\\n" + seccomp_prefix + "\\n" + var_def + "\\n" + code + "\\n" + output_code
|
||||
try:
|
||||
result = subprocess.run(["python3", "-c", code], capture_output=True, text=True, timeout=10)
|
||||
if result.returncode == -31:
|
||||
return {"error": "Dangerous behavior detected."}
|
||||
if result.stderr != "":
|
||||
return {"error": result.stderr}
|
||||
|
||||
out = ast.literal_eval(result.stdout.strip())
|
||||
return out
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"error": "Timeout error"}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
`;
|
@@ -1,6 +1,6 @@
|
||||
import { Controller, Post, Body, HttpCode } from '@nestjs/common';
|
||||
import { RunCodeDto } from './dto/create-sandbox.dto';
|
||||
import { runSandbox } from './utils';
|
||||
import { runJsSandbox, runPythonSandbox } from './utils';
|
||||
|
||||
@Controller('sandbox')
|
||||
export class SandboxController {
|
||||
@@ -9,6 +9,12 @@ export class SandboxController {
|
||||
@Post('/js')
|
||||
@HttpCode(200)
|
||||
runJs(@Body() codeProps: RunCodeDto) {
|
||||
return runSandbox(codeProps);
|
||||
return runJsSandbox(codeProps);
|
||||
}
|
||||
|
||||
@Post('/python')
|
||||
@HttpCode(200)
|
||||
runPython(@Body() codeProps: RunCodeDto) {
|
||||
return runPythonSandbox(codeProps);
|
||||
}
|
||||
}
|
||||
|
@@ -6,24 +6,30 @@ import { timeDelay } from './jsFn/delay';
|
||||
import { strToBase64 } from './jsFn/str2Base64';
|
||||
import { createHmac } from './jsFn/crypto';
|
||||
|
||||
import { spawn } from 'child_process';
|
||||
import { pythonScript } from './constants';
|
||||
const CustomLogStr = 'CUSTOM_LOG';
|
||||
|
||||
/*
|
||||
Rewrite code to add custom functions: Promise function; Log.
|
||||
*/
|
||||
function getFnCode(code: string) {
|
||||
// rewrite log
|
||||
code = code.replace(/console\.log/g, `${CustomLogStr}`);
|
||||
export const runJsSandbox = async ({
|
||||
code,
|
||||
variables = {}
|
||||
}: RunCodeDto): Promise<RunCodeResponse> => {
|
||||
/*
|
||||
Rewrite code to add custom functions: Promise function; Log.
|
||||
*/
|
||||
function getFnCode(code: string) {
|
||||
// rewrite log
|
||||
code = code.replace(/console\.log/g, `${CustomLogStr}`);
|
||||
|
||||
// Promise function rewrite
|
||||
const rewriteSystemFn = `
|
||||
// Promise function rewrite
|
||||
const rewriteSystemFn = `
|
||||
const thisDelay = (...args) => global_delay.applySyncPromise(undefined,args)
|
||||
`;
|
||||
|
||||
// rewrite delay
|
||||
code = code.replace(/delay\((.*)\)/g, `thisDelay($1)`);
|
||||
// rewrite delay
|
||||
code = code.replace(/delay\((.*)\)/g, `thisDelay($1)`);
|
||||
|
||||
const runCode = `
|
||||
const runCode = `
|
||||
(async() => {
|
||||
try {
|
||||
${rewriteSystemFn}
|
||||
@@ -36,23 +42,18 @@ function getFnCode(code: string) {
|
||||
}
|
||||
})
|
||||
`;
|
||||
return runCode;
|
||||
}
|
||||
return runCode;
|
||||
}
|
||||
// Register global function
|
||||
function registerSystemFn(jail: IsolatedVM.Reference<Record<string | number | symbol, any>>) {
|
||||
return Promise.all([
|
||||
jail.set('global_delay', new Reference(timeDelay)),
|
||||
jail.set('countToken', countToken),
|
||||
jail.set('strToBase64', strToBase64),
|
||||
jail.set('createHmac', createHmac)
|
||||
]);
|
||||
}
|
||||
|
||||
// Register global function
|
||||
function registerSystemFn(jail: IsolatedVM.Reference<Record<string | number | symbol, any>>) {
|
||||
return Promise.all([
|
||||
jail.set('global_delay', new Reference(timeDelay)),
|
||||
jail.set('countToken', countToken),
|
||||
jail.set('strToBase64', strToBase64),
|
||||
jail.set('createHmac', createHmac)
|
||||
]);
|
||||
}
|
||||
|
||||
export const runSandbox = async ({
|
||||
code,
|
||||
variables = {}
|
||||
}: RunCodeDto): Promise<RunCodeResponse> => {
|
||||
const logData = [];
|
||||
|
||||
const isolate = new Isolate({ memoryLimit: 32 });
|
||||
@@ -106,3 +107,50 @@ export const runSandbox = async ({
|
||||
return Promise.reject(err);
|
||||
}
|
||||
};
|
||||
|
||||
export const runPythonSandbox = async ({
|
||||
code,
|
||||
variables = {}
|
||||
}: RunCodeDto): Promise<RunCodeResponse> => {
|
||||
const mainCallCode = `
|
||||
data = ${JSON.stringify({ code, variables })}
|
||||
res = run_pythonCode(data)
|
||||
print(json.dumps(res))
|
||||
`;
|
||||
|
||||
const fullCode = [pythonScript, mainCallCode].filter(Boolean).join('\n');
|
||||
|
||||
const pythonProcess = spawn('python3', ['-u', '-c', fullCode]);
|
||||
|
||||
const stdoutChunks: string[] = [];
|
||||
const stderrChunks: string[] = [];
|
||||
|
||||
pythonProcess.stdout.on('data', (data) => stdoutChunks.push(data.toString()));
|
||||
pythonProcess.stderr.on('data', (data) => stderrChunks.push(data.toString()));
|
||||
|
||||
const stdoutPromise = new Promise<string>((resolve) => {
|
||||
pythonProcess.on('close', (code) => {
|
||||
if (code !== 0) {
|
||||
resolve(JSON.stringify({ error: stderrChunks.join('') }));
|
||||
} else {
|
||||
resolve(stdoutChunks.join(''));
|
||||
}
|
||||
});
|
||||
});
|
||||
const stdout = await stdoutPromise;
|
||||
|
||||
try {
|
||||
const parsedOutput = JSON.parse(stdout);
|
||||
if (parsedOutput.error) {
|
||||
return Promise.reject(parsedOutput.error || 'Unknown error');
|
||||
}
|
||||
return { codeReturn: parsedOutput, log: '' };
|
||||
} catch (err) {
|
||||
if (stdout.includes('malformed node or string on line 1')) {
|
||||
return Promise.reject(`The result should be a parsable variable, such as a list. ${stdout}`);
|
||||
} else if (stdout.includes('Unexpected end of JSON input')) {
|
||||
return Promise.reject(`Not allowed print or ${stdout}`);
|
||||
}
|
||||
return Promise.reject(`Run failed: ${err}`);
|
||||
}
|
||||
};
|
||||
|
41
projects/sandbox/testSystemCall.sh
Normal file
41
projects/sandbox/testSystemCall.sh
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
|
||||
temp_dir=$(mktemp -d)
|
||||
trap 'rm -rf "$temp_dir"' EXIT
|
||||
|
||||
syscall_table_file="$temp_dir/syscall_table.txt"
|
||||
code_file="$temp_dir/test_code.py"
|
||||
strace_log="$temp_dir/strace.log"
|
||||
syscalls_file="$temp_dir/syscalls.txt"
|
||||
|
||||
code='
|
||||
import pandas as pd
|
||||
def main():
|
||||
data = {"Name": ["Alice", "Bob"], "Age": [25, 30]}
|
||||
df = pd.DataFrame(data)
|
||||
return {
|
||||
"head": df.head().to_dict()
|
||||
}
|
||||
'
|
||||
|
||||
if ! ausyscall --dump > "$syscall_table_file" 2>/dev/null; then
|
||||
grep -E '^#define __NR_' /usr/include/asm/unistd_64.h | \
|
||||
sed 's/#define __NR_//;s/[ \t]\+/ /g' | \
|
||||
awk '{print $1, $2}' > "$syscall_table_file"
|
||||
fi
|
||||
|
||||
echo "$code" > "$code_file"
|
||||
|
||||
strace -ff -e trace=all -o "$strace_log" python3 "$code_file" >/dev/null 2>&1
|
||||
|
||||
cat "$strace_log"* 2>/dev/null | grep -oE '^[[:alnum:]_]+' | sort -u > "$syscalls_file"
|
||||
|
||||
allowed_syscalls=()
|
||||
while read raw_name; do
|
||||
go_name=$(echo "$raw_name" | tr 'a-z' 'A-Z' | sed 's/-/_/g')
|
||||
allowed_syscalls+=("\"syscall.SYS_${go_name}\"")
|
||||
done < "$syscalls_file"
|
||||
|
||||
echo "allowed_syscalls = ["
|
||||
printf ' %s,\n' "${allowed_syscalls[@]}" | paste -sd ' \n'
|
||||
echo "]"
|
Reference in New Issue
Block a user