refactor: rename MAXKB_SANDBOX_PYTHON_ALLOW_HOSTS_REGEXES to MAXKB_SANDBOX_PYTHON_BANNED_HOSTS

This commit is contained in:
liqiang-fit2cloud 2025-11-06 15:41:35 +08:00
parent 1b464c4d8f
commit f0ecad6de1
3 changed files with 59 additions and 86 deletions

View File

@ -28,7 +28,7 @@ class ToolExecutor:
if self.sandbox:
os.system(f"chown -R {self.user}:root {self.sandbox_path}")
self.banned_keywords = CONFIG.get("SANDBOX_PYTHON_BANNED_KEYWORDS", 'nothing_is_banned').split(',');
self.allow_host_regexes = CONFIG.get("SANDBOX_PYTHON_ALLOW_HOST_REGEXES", '');
self.banned_hosts = CONFIG.get("SANDBOX_PYTHON_BANNED_HOSTS", '');
def _createdir(self):
old_mask = os.umask(0o077)
@ -183,7 +183,7 @@ exec({dedent(code)!a})
'cwd': self.sandbox_path,
'env': {
'LD_PRELOAD': '/opt/maxkb-app/sandbox/sandbox.so',
'SANDBOX_ALLOW_HOST_REGEXES': self.allow_host_regexes,
'SANDBOX_BANNED_HOSTS': self.banned_hosts,
},
'transport': 'stdio',
}
@ -203,7 +203,7 @@ exec({dedent(code)!a})
kwargs = {'cwd': BASE_DIR}
kwargs['env'] = {
'LD_PRELOAD': '/opt/maxkb-app/sandbox/sandbox.so',
'SANDBOX_ALLOW_HOST_REGEXES': self.allow_host_regexes,
'SANDBOX_BANNED_HOSTS': self.banned_hosts,
}
subprocess_result = subprocess.run(
['su', '-s', python_directory, '-c', "exec(open('" + exec_python_file + "').read())", self.user],

View File

@ -47,7 +47,7 @@ ENV PATH=/opt/py3/bin:$PATH \
MAXKB_SANDBOX=1 \
MAXKB_SANDBOX_PYTHON_PACKAGE_PATHS="/opt/py3/lib/python3.11/site-packages,/opt/maxkb-app/sandbox/python-packages,/opt/maxkb/python-packages" \
MAXKB_SANDBOX_PYTHON_BANNED_KEYWORDS="subprocess.,system(,exec(,execve(,pty.,eval(,compile(,shutil.,input(,__import__" \
MAXKB_SANDBOX_PYTHON_ALLOW_HOST_REGEXES=".*,!=127\.0\.0\.1,!=localhost,!=maxkb,!=pgsql,!=redis" \
MAXKB_SANDBOX_PYTHON_BANNED_HOSTS="127.0.0.1,localhost,maxkb,pgsql,redis" \
MAXKB_ADMIN_PATH=/admin
EXPOSE 6379

View File

@ -1,125 +1,98 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include <dlfcn.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include <unistd.h>
#include <sys/socket.h>
#include <errno.h>
static int (*real_connect)(int, const struct sockaddr *, socklen_t) = NULL;
static int (*real_getaddrinfo)(const char *, const char *, const struct addrinfo *, struct addrinfo **) = NULL;
static __thread char last_resolved_host[256] = {0};
static __thread int last_host_checked = 0; // 标记是否已检查过域名1=已检查且允许)
static const char *ENV_NAME = "SANDBOX_BANNED_HOSTS";
/** 检查是否符合允许规则 */
static int is_allowed_by_env(const char *target, const char *env_val) {
if (!target) return 0;
if (!env_val || !*env_val) {
fprintf(stderr, "[sandbox] ❌ No allow rules set — deny all by default\n");
return 0;
}
/**
*
* target:
* env_val:
* 1 = 0 =
*/
static int match_env_patterns(const char *target, const char *env_val) {
if (!target || !env_val || !*env_val) return 0;
char *patterns = strdup(env_val);
char *token = strtok(patterns, ",");
int allowed = 0;
int matched = 0;
while (token) {
// 去掉前后空格
while (*token == ' ' || *token == '\t') token++;
char *end = token + strlen(token) - 1;
while (end > token && (*end == ' ' || *end == '\t')) *end-- = '\0';
if (*token) {
if (strncmp(token, "!=", 2) == 0) {
const char *pattern = token + 2;
regex_t regex;
if (regcomp(&regex, pattern, REG_EXTENDED | REG_NOSUB | REG_ICASE) != 0) {
fprintf(stderr, "[sandbox] ⚠️ Invalid regex ignored: %s\n", pattern);
} else {
if (regexec(&regex, target, 0, NULL, 0) == 0) {
fprintf(stderr, "[sandbox] ❌ Deny %s (matched deny /%s/)\n", target, pattern);
regfree(&regex);
free(patterns);
return 0;
}
regex_t regex;
// 精确匹配,加 ^ 和 $,忽略大小写
char fullpattern[512];
snprintf(fullpattern, sizeof(fullpattern), "^%s$", token);
if (regcomp(&regex, fullpattern, REG_EXTENDED | REG_NOSUB | REG_ICASE) == 0) {
if (regexec(&regex, target, 0, NULL, 0) == 0) {
matched = 1;
regfree(&regex);
break;
}
regfree(&regex);
} else {
regex_t regex;
if (regcomp(&regex, token, REG_EXTENDED | REG_NOSUB | REG_ICASE) != 0) {
fprintf(stderr, "[sandbox] ⚠️ Invalid regex ignored: %s\n", token);
} else {
if (regexec(&regex, target, 0, NULL, 0) == 0)
allowed = 1;
regfree(&regex);
}
fprintf(stderr, "[sandbox] ⚠️ Invalid regex '%s' — allowing host by default\n", token);
}
}
token = strtok(NULL, ",");
}
free(patterns);
return allowed;
return matched;
}
/** 检查逻辑封装 */
static int check_host(const char *host) {
const char *env = getenv("SANDBOX_ALLOW_HOST_REGEXES");
return is_allowed_by_env(host, env);
}
/** 拦截 getaddrinfo() — 检查域名 */
int getaddrinfo(const char *node, const char *service,
const struct addrinfo *hints, struct addrinfo **res) {
if (!real_getaddrinfo)
real_getaddrinfo = dlsym(RTLD_NEXT, "getaddrinfo");
if (node) {
strncpy(last_resolved_host, node, sizeof(last_resolved_host) - 1);
last_resolved_host[sizeof(last_resolved_host) - 1] = '\0';
last_host_checked = 0;
// 判断是否为纯 IP跳过 IPv4/IPv6
struct in_addr ipv4;
struct in6_addr ipv6;
int is_ip = (inet_pton(AF_INET, node, &ipv4) == 1) ||
(inet_pton(AF_INET6, node, &ipv6) == 1);
if (!is_ip) {
if (!check_host(node)) {
fprintf(stderr, "[sandbox] 🚫 Blocked DNS lookup for %s\n", node);
return EAI_FAIL;
}
last_host_checked = 1; // 已检查并通过
}
}
return real_getaddrinfo(node, service, hints, res);
}
/** 拦截 connect() — 检查 IP仅当没检查过域名 */
/** 拦截 connect() —— 精确匹配 IP */
int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
static int (*real_connect)(int, const struct sockaddr *, socklen_t) = NULL;
if (!real_connect)
real_connect = dlsym(RTLD_NEXT, "connect");
char ip[INET6_ADDRSTRLEN] = {0};
const char *banned_env = getenv(ENV_NAME);
char ip[INET6_ADDRSTRLEN] = {0};
if (addr->sa_family == AF_INET)
inet_ntop(AF_INET, &((struct sockaddr_in *)addr)->sin_addr, ip, sizeof(ip));
else if (addr->sa_family == AF_INET6)
inet_ntop(AF_INET6, &((struct sockaddr_in6 *)addr)->sin6_addr, ip, sizeof(ip));
// 如果域名已经检查通过,则跳过 IP 检查
if (last_host_checked) {
return real_connect(sockfd, addr, addrlen);
}
// 没有检查过域名(可能是 IP 直连,如 curl
if (!check_host(ip)) {
fprintf(stderr, "[sandbox] 🚫 Blocked connect to %s (no domain check)\n", ip);
if (banned_env && match_env_patterns(ip, banned_env)) {
fprintf(stderr, "[sandbox] 🚫 Access to host %s is banned\n", ip);
errno = EACCES;
return -1;
}
return real_connect(sockfd, addr, addrlen);
}
/** 拦截 getaddrinfo() —— 精确匹配域名 */
int getaddrinfo(const char *node, const char *service,
const struct addrinfo *hints, struct addrinfo **res) {
static int (*real_getaddrinfo)(const char *, const char *,
const struct addrinfo *, struct addrinfo **) = NULL;
if (!real_getaddrinfo)
real_getaddrinfo = dlsym(RTLD_NEXT, "getaddrinfo");
const char *banned_env = getenv(ENV_NAME);
if (banned_env && node && match_env_patterns(node, banned_env)) {
fprintf(stderr, "[sandbox] 🚫 Access to host %s is banned\n", node);
return EAI_FAIL; // 模拟 DNS 失败
}
return real_getaddrinfo(node, service, hints, res);
}