Python Sandbox (#4380)

* Python3 Sandbox (#3944)

* update python box (#4251)

* update python box

* Adjust the height of the NodeCode border.

* update python sandbox and add test systemcall bash

* update sandbox

* add VERSION_RELEASE (#4376)

* save empty docx

* fix pythonbox log error

* fix: js template

---------

Co-authored-by: dogfar <37035781+dogfar@users.noreply.github.com>
Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com>
Co-authored-by: gggaaallleee <1293587368@qq.com>
This commit is contained in:
Archer
2025-03-28 13:45:09 +08:00
committed by GitHub
parent 8323c2d27e
commit 565a966d19
23 changed files with 777 additions and 92 deletions

View File

@@ -1,8 +1,22 @@
# --------- install dependence -----------
FROM python:3.11-alpine AS python_base
ENV VERSION_RELEASE = Alpine3.11
# 安装make和g++
RUN apk add --no-cache make g++
RUN apk add --no-cache make g++ tar wget gperf automake libtool linux-headers
WORKDIR /app
COPY projects/sandbox/requirements.txt /app/requirements.txt
RUN wget https://github.com/seccomp/libseccomp/releases/download/v2.5.5/libseccomp-2.5.5.tar.gz && \
tar -zxvf libseccomp-2.5.5.tar.gz && \
cd libseccomp-2.5.5 && \
./configure --prefix=/usr && \
make && \
make install && \
pip install --no-cache-dir -i https://mirrors.aliyun.com/pypi/simple Cython && \
pip install --no-cache-dir -i https://mirrors.aliyun.com/pypi/simple -r /app/requirements.txt && \
cd src/python && \
python setup.py install
FROM node:20.14.0-alpine AS install
@@ -10,7 +24,7 @@ WORKDIR /app
ARG proxy
RUN [ -z "$proxy" ] || sed -i 's/dl-cdn.alpinelinux.org/mirrors.ustc.edu.cn/g' /etc/apk/repositories
RUN apk add --no-cache make g++
RUN apk add --no-cache make g++ python3
# copy py3.11
COPY --from=python_base /usr/local /usr/local
@@ -42,9 +56,12 @@ RUN pnpm --filter=sandbox build
FROM node:20.14.0-alpine AS runner
WORKDIR /app
RUN apk add --no-cache libffi libffi-dev strace bash
COPY --from=python_base /usr/local /usr/local
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/projects/sandbox /app/projects/sandbox
ENV NODE_ENV=production
ENV PATH="/usr/local/bin:${PATH}"
CMD ["node", "--no-node-snapshot", "projects/sandbox/dist/main.js"]

View File

@@ -0,0 +1,2 @@
numpy
pandas

View File

@@ -0,0 +1,130 @@
export const pythonScript = `
import subprocess
import json
import ast
import base64
def extract_imports(code):
tree = ast.parse(code)
imports = []
for node in ast.walk(tree):
if isinstance(node, (ast.Import, ast.ImportFrom)):
if isinstance(node, ast.Import):
for alias in node.names:
imports.append(f"import {alias.name}")
elif isinstance(node, ast.ImportFrom):
module = node.module
for alias in node.names:
imports.append(f"from {module} import {alias.name}")
return imports
seccomp_prefix = """
from seccomp import *
import sys
allowed_syscalls = [
"syscall.SYS_ARCH_PRCTL", "syscall.SYS_BRK", "syscall.SYS_CLONE",
"syscall.SYS_CLOSE", "syscall.SYS_EPOLL_CREATE1", "syscall.SYS_EXECVE",
"syscall.SYS_EXIT", "syscall.SYS_EXIT_GROUP", "syscall.SYS_FCNTL",
"syscall.SYS_FSTAT", "syscall.SYS_FUTEX", "syscall.SYS_GETDENTS64",
"syscall.SYS_GETEGID", "syscall.SYS_GETEUID", "syscall.SYS_GETGID",
"syscall.SYS_GETRANDOM", "syscall.SYS_GETTID", "syscall.SYS_GETUID",
"syscall.SYS_IOCTL", "syscall.SYS_LSEEK", "syscall.SYS_LSTAT",
"syscall.SYS_MBIND", "syscall.SYS_MEMBARRIER", "syscall.SYS_MMAP",
"syscall.SYS_MPROTECT", "syscall.SYS_MUNMAP", "syscall.SYS_OPEN",
"syscall.SYS_PREAD64", "syscall.SYS_READ", "syscall.SYS_READLINK",
"syscall.SYS_READV", "syscall.SYS_RT_SIGACTION", "syscall.SYS_RT_SIGPROCMASK",
"syscall.SYS_SCHED_GETAFFINITY", "syscall.SYS_SET_TID_ADDRESS",
"syscall.SYS_STAT", "syscall.SYS_UNAME",
"syscall.SYS_MREMAP", "syscall.SYS_RT_SIGRETURN", "syscall.SYS_SETUID",
"syscall.SYS_SETGID", "syscall.SYS_GETPID", "syscall.SYS_GETPPID",
"syscall.SYS_TGKILL", "syscall.SYS_SCHED_YIELD", "syscall.SYS_SET_ROBUST_LIST",
"syscall.SYS_GET_ROBUST_LIST", "syscall.SYS_RSEQ", "syscall.SYS_CLOCK_GETTIME",
"syscall.SYS_GETTIMEOFDAY", "syscall.SYS_NANOSLEEP", "syscall.SYS_EPOLL_CTL",
"syscall.SYS_CLOCK_NANOSLEEP", "syscall.SYS_PSELECT6", "syscall.SYS_TIME",
"syscall.SYS_SIGALTSTACK", "syscall.SYS_MKDIRAT", "syscall.SYS_MKDIR"
]
allowed_syscalls_tmp = allowed_syscalls
L = []
for item in allowed_syscalls_tmp:
item = item.strip()
parts = item.split(".")[1][4:].lower()
L.append(parts)
f = SyscallFilter(defaction=KILL)
for item in L:
f.add_rule(ALLOW, item)
f.add_rule(ALLOW, "write", Arg(0, EQ, sys.stdout.fileno()))
f.add_rule(ALLOW, "write", Arg(0, EQ, sys.stderr.fileno()))
f.add_rule(ALLOW, 307)
f.add_rule(ALLOW, 318)
f.add_rule(ALLOW, 334)
f.load()
"""
def remove_print_statements(code):
class PrintRemover(ast.NodeTransformer):
def visit_Expr(self, node):
if (
isinstance(node.value, ast.Call)
and isinstance(node.value.func, ast.Name)
and node.value.func.id == "print"
):
return None
return node
tree = ast.parse(code)
modified_tree = PrintRemover().visit(tree)
ast.fix_missing_locations(modified_tree)
return ast.unparse(modified_tree)
def detect_dangerous_imports(code):
dangerous_modules = ["os", "sys", "subprocess", "shutil", "socket", "ctypes", "multiprocessing", "threading", "pickle"]
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
if alias.name in dangerous_modules:
return alias.name
elif isinstance(node, ast.ImportFrom):
if node.module in dangerous_modules:
return node.module
return None
def run_pythonCode(data:dict):
if not data or "code" not in data or "variables" not in data:
return {"error": "Invalid request format"}
code = data["code"]
code = remove_print_statements(code)
dangerous_import = detect_dangerous_imports(code)
if dangerous_import:
return {"error": f"Importing {dangerous_import} is not allowed."}
variables = data["variables"]
imports = "\\n".join(extract_imports(code))
var_def = ""
output_code = "res = main("
for k, v in variables.items():
if isinstance(v, str):
one_var = k + " = \\"" + v + "\\"\\n"
else:
one_var = k + " = " + str(v) + "\\n"
var_def = var_def + one_var
output_code = output_code + k + ", "
if output_code[-1] == "(":
output_code = output_code + ")\\n"
else:
output_code = output_code[:-2] + ")\\n"
output_code = output_code + "print(res)"
code = imports + "\\n" + seccomp_prefix + "\\n" + var_def + "\\n" + code + "\\n" + output_code
try:
result = subprocess.run(["python3", "-c", code], capture_output=True, text=True, timeout=10)
if result.returncode == -31:
return {"error": "Dangerous behavior detected."}
if result.stderr != "":
return {"error": result.stderr}
out = ast.literal_eval(result.stdout.strip())
return out
except subprocess.TimeoutExpired:
return {"error": "Timeout error"}
except Exception as e:
return {"error": str(e)}
`;

View File

@@ -1,6 +1,6 @@
import { Controller, Post, Body, HttpCode } from '@nestjs/common';
import { RunCodeDto } from './dto/create-sandbox.dto';
import { runSandbox } from './utils';
import { runJsSandbox, runPythonSandbox } from './utils';
@Controller('sandbox')
export class SandboxController {
@@ -9,6 +9,12 @@ export class SandboxController {
@Post('/js')
@HttpCode(200)
runJs(@Body() codeProps: RunCodeDto) {
return runSandbox(codeProps);
return runJsSandbox(codeProps);
}
@Post('/python')
@HttpCode(200)
runPython(@Body() codeProps: RunCodeDto) {
return runPythonSandbox(codeProps);
}
}

View File

@@ -6,24 +6,30 @@ import { timeDelay } from './jsFn/delay';
import { strToBase64 } from './jsFn/str2Base64';
import { createHmac } from './jsFn/crypto';
import { spawn } from 'child_process';
import { pythonScript } from './constants';
const CustomLogStr = 'CUSTOM_LOG';
/*
Rewrite code to add custom functions: Promise function; Log.
*/
function getFnCode(code: string) {
// rewrite log
code = code.replace(/console\.log/g, `${CustomLogStr}`);
export const runJsSandbox = async ({
code,
variables = {}
}: RunCodeDto): Promise<RunCodeResponse> => {
/*
Rewrite code to add custom functions: Promise function; Log.
*/
function getFnCode(code: string) {
// rewrite log
code = code.replace(/console\.log/g, `${CustomLogStr}`);
// Promise function rewrite
const rewriteSystemFn = `
// Promise function rewrite
const rewriteSystemFn = `
const thisDelay = (...args) => global_delay.applySyncPromise(undefined,args)
`;
// rewrite delay
code = code.replace(/delay\((.*)\)/g, `thisDelay($1)`);
// rewrite delay
code = code.replace(/delay\((.*)\)/g, `thisDelay($1)`);
const runCode = `
const runCode = `
(async() => {
try {
${rewriteSystemFn}
@@ -36,23 +42,18 @@ function getFnCode(code: string) {
}
})
`;
return runCode;
}
return runCode;
}
// Register global function
function registerSystemFn(jail: IsolatedVM.Reference<Record<string | number | symbol, any>>) {
return Promise.all([
jail.set('global_delay', new Reference(timeDelay)),
jail.set('countToken', countToken),
jail.set('strToBase64', strToBase64),
jail.set('createHmac', createHmac)
]);
}
// Register global function
function registerSystemFn(jail: IsolatedVM.Reference<Record<string | number | symbol, any>>) {
return Promise.all([
jail.set('global_delay', new Reference(timeDelay)),
jail.set('countToken', countToken),
jail.set('strToBase64', strToBase64),
jail.set('createHmac', createHmac)
]);
}
export const runSandbox = async ({
code,
variables = {}
}: RunCodeDto): Promise<RunCodeResponse> => {
const logData = [];
const isolate = new Isolate({ memoryLimit: 32 });
@@ -106,3 +107,50 @@ export const runSandbox = async ({
return Promise.reject(err);
}
};
export const runPythonSandbox = async ({
code,
variables = {}
}: RunCodeDto): Promise<RunCodeResponse> => {
const mainCallCode = `
data = ${JSON.stringify({ code, variables })}
res = run_pythonCode(data)
print(json.dumps(res))
`;
const fullCode = [pythonScript, mainCallCode].filter(Boolean).join('\n');
const pythonProcess = spawn('python3', ['-u', '-c', fullCode]);
const stdoutChunks: string[] = [];
const stderrChunks: string[] = [];
pythonProcess.stdout.on('data', (data) => stdoutChunks.push(data.toString()));
pythonProcess.stderr.on('data', (data) => stderrChunks.push(data.toString()));
const stdoutPromise = new Promise<string>((resolve) => {
pythonProcess.on('close', (code) => {
if (code !== 0) {
resolve(JSON.stringify({ error: stderrChunks.join('') }));
} else {
resolve(stdoutChunks.join(''));
}
});
});
const stdout = await stdoutPromise;
try {
const parsedOutput = JSON.parse(stdout);
if (parsedOutput.error) {
return Promise.reject(parsedOutput.error || 'Unknown error');
}
return { codeReturn: parsedOutput, log: '' };
} catch (err) {
if (stdout.includes('malformed node or string on line 1')) {
return Promise.reject(`The result should be a parsable variable, such as a list. ${stdout}`);
} else if (stdout.includes('Unexpected end of JSON input')) {
return Promise.reject(`Not allowed print or ${stdout}`);
}
return Promise.reject(`Run failed: ${err}`);
}
};

View File

@@ -0,0 +1,41 @@
#!/bin/bash
temp_dir=$(mktemp -d)
trap 'rm -rf "$temp_dir"' EXIT
syscall_table_file="$temp_dir/syscall_table.txt"
code_file="$temp_dir/test_code.py"
strace_log="$temp_dir/strace.log"
syscalls_file="$temp_dir/syscalls.txt"
code='
import pandas as pd
def main():
data = {"Name": ["Alice", "Bob"], "Age": [25, 30]}
df = pd.DataFrame(data)
return {
"head": df.head().to_dict()
}
'
if ! ausyscall --dump > "$syscall_table_file" 2>/dev/null; then
grep -E '^#define __NR_' /usr/include/asm/unistd_64.h | \
sed 's/#define __NR_//;s/[ \t]\+/ /g' | \
awk '{print $1, $2}' > "$syscall_table_file"
fi
echo "$code" > "$code_file"
strace -ff -e trace=all -o "$strace_log" python3 "$code_file" >/dev/null 2>&1
cat "$strace_log"* 2>/dev/null | grep -oE '^[[:alnum:]_]+' | sort -u > "$syscalls_file"
allowed_syscalls=()
while read raw_name; do
go_name=$(echo "$raw_name" | tr 'a-z' 'A-Z' | sed 's/-/_/g')
allowed_syscalls+=("\"syscall.SYS_${go_name}\"")
done < "$syscalls_file"
echo "allowed_syscalls = ["
printf ' %s,\n' "${allowed_syscalls[@]}" | paste -sd ' \n'
echo "]"