From f0ecad6de156aa9c58e29c67040746194bbd1243 Mon Sep 17 00:00:00 2001 From: liqiang-fit2cloud Date: Thu, 6 Nov 2025 15:41:35 +0800 Subject: [PATCH] refactor: rename MAXKB_SANDBOX_PYTHON_ALLOW_HOSTS_REGEXES to MAXKB_SANDBOX_PYTHON_BANNED_HOSTS --- apps/common/utils/tool_code.py | 6 +- installer/Dockerfile-base | 2 +- installer/sandbox.c | 137 +++++++++++++-------------------- 3 files changed, 59 insertions(+), 86 deletions(-) diff --git a/apps/common/utils/tool_code.py b/apps/common/utils/tool_code.py index a8c0c1f35..7143dbff7 100644 --- a/apps/common/utils/tool_code.py +++ b/apps/common/utils/tool_code.py @@ -28,7 +28,7 @@ class ToolExecutor: if self.sandbox: os.system(f"chown -R {self.user}:root {self.sandbox_path}") self.banned_keywords = CONFIG.get("SANDBOX_PYTHON_BANNED_KEYWORDS", 'nothing_is_banned').split(','); - self.allow_host_regexes = CONFIG.get("SANDBOX_PYTHON_ALLOW_HOST_REGEXES", ''); + self.banned_hosts = CONFIG.get("SANDBOX_PYTHON_BANNED_HOSTS", ''); def _createdir(self): old_mask = os.umask(0o077) @@ -183,7 +183,7 @@ exec({dedent(code)!a}) 'cwd': self.sandbox_path, 'env': { 'LD_PRELOAD': '/opt/maxkb-app/sandbox/sandbox.so', - 'SANDBOX_ALLOW_HOST_REGEXES': self.allow_host_regexes, + 'SANDBOX_BANNED_HOSTS': self.banned_hosts, }, 'transport': 'stdio', } @@ -203,7 +203,7 @@ exec({dedent(code)!a}) kwargs = {'cwd': BASE_DIR} kwargs['env'] = { 'LD_PRELOAD': '/opt/maxkb-app/sandbox/sandbox.so', - 'SANDBOX_ALLOW_HOST_REGEXES': self.allow_host_regexes, + 'SANDBOX_BANNED_HOSTS': self.banned_hosts, } subprocess_result = subprocess.run( ['su', '-s', python_directory, '-c', "exec(open('" + exec_python_file + "').read())", self.user], diff --git a/installer/Dockerfile-base b/installer/Dockerfile-base index 80a6417e6..ebf6c00ce 100644 --- a/installer/Dockerfile-base +++ b/installer/Dockerfile-base @@ -47,7 +47,7 @@ ENV PATH=/opt/py3/bin:$PATH \ MAXKB_SANDBOX=1 \ MAXKB_SANDBOX_PYTHON_PACKAGE_PATHS="/opt/py3/lib/python3.11/site-packages,/opt/maxkb-app/sandbox/python-packages,/opt/maxkb/python-packages" \ MAXKB_SANDBOX_PYTHON_BANNED_KEYWORDS="subprocess.,system(,exec(,execve(,pty.,eval(,compile(,shutil.,input(,__import__" \ - MAXKB_SANDBOX_PYTHON_ALLOW_HOST_REGEXES=".*,!=127\.0\.0\.1,!=localhost,!=maxkb,!=pgsql,!=redis" \ + MAXKB_SANDBOX_PYTHON_BANNED_HOSTS="127.0.0.1,localhost,maxkb,pgsql,redis" \ MAXKB_ADMIN_PATH=/admin EXPOSE 6379 \ No newline at end of file diff --git a/installer/sandbox.c b/installer/sandbox.c index e71a49f3b..a06507c9a 100644 --- a/installer/sandbox.c +++ b/installer/sandbox.c @@ -1,125 +1,98 @@ #define _GNU_SOURCE -#include -#include -#include -#include #include #include #include +#include +#include +#include +#include +#include #include +#include -static int (*real_connect)(int, const struct sockaddr *, socklen_t) = NULL; -static int (*real_getaddrinfo)(const char *, const char *, const struct addrinfo *, struct addrinfo **) = NULL; -static __thread char last_resolved_host[256] = {0}; -static __thread int last_host_checked = 0; // 标记是否已检查过域名(1=已检查且允许) +static const char *ENV_NAME = "SANDBOX_BANNED_HOSTS"; -/** 检查是否符合允许规则 */ -static int is_allowed_by_env(const char *target, const char *env_val) { - if (!target) return 0; - if (!env_val || !*env_val) { - fprintf(stderr, "[sandbox] ❌ No allow rules set — deny all by default\n"); - return 0; - } +/** + * 精确匹配黑名单 + * target: 待检测字符串 + * env_val: 逗号分隔的黑名单列表 + * 返回 1 = 匹配,0 = 不匹配 + */ +static int match_env_patterns(const char *target, const char *env_val) { + if (!target || !env_val || !*env_val) return 0; char *patterns = strdup(env_val); char *token = strtok(patterns, ","); - int allowed = 0; + int matched = 0; while (token) { + // 去掉前后空格 while (*token == ' ' || *token == '\t') token++; char *end = token + strlen(token) - 1; while (end > token && (*end == ' ' || *end == '\t')) *end-- = '\0'; if (*token) { - if (strncmp(token, "!=", 2) == 0) { - const char *pattern = token + 2; - regex_t regex; - if (regcomp(®ex, pattern, REG_EXTENDED | REG_NOSUB | REG_ICASE) != 0) { - fprintf(stderr, "[sandbox] ⚠️ Invalid regex ignored: %s\n", pattern); - } else { - if (regexec(®ex, target, 0, NULL, 0) == 0) { - fprintf(stderr, "[sandbox] ❌ Deny %s (matched deny /%s/)\n", target, pattern); - regfree(®ex); - free(patterns); - return 0; - } + regex_t regex; + // 精确匹配,加 ^ 和 $,忽略大小写 + char fullpattern[512]; + snprintf(fullpattern, sizeof(fullpattern), "^%s$", token); + + if (regcomp(®ex, fullpattern, REG_EXTENDED | REG_NOSUB | REG_ICASE) == 0) { + if (regexec(®ex, target, 0, NULL, 0) == 0) { + matched = 1; regfree(®ex); + break; } + regfree(®ex); } else { - regex_t regex; - if (regcomp(®ex, token, REG_EXTENDED | REG_NOSUB | REG_ICASE) != 0) { - fprintf(stderr, "[sandbox] ⚠️ Invalid regex ignored: %s\n", token); - } else { - if (regexec(®ex, target, 0, NULL, 0) == 0) - allowed = 1; - regfree(®ex); - } + fprintf(stderr, "[sandbox] ⚠️ Invalid regex '%s' — allowing host by default\n", token); } } + token = strtok(NULL, ","); } free(patterns); - return allowed; + return matched; } -/** 检查逻辑封装 */ -static int check_host(const char *host) { - const char *env = getenv("SANDBOX_ALLOW_HOST_REGEXES"); - return is_allowed_by_env(host, env); -} - -/** 拦截 getaddrinfo() — 检查域名 */ -int getaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, struct addrinfo **res) { - if (!real_getaddrinfo) - real_getaddrinfo = dlsym(RTLD_NEXT, "getaddrinfo"); - - if (node) { - strncpy(last_resolved_host, node, sizeof(last_resolved_host) - 1); - last_resolved_host[sizeof(last_resolved_host) - 1] = '\0'; - last_host_checked = 0; - - // 判断是否为纯 IP(跳过 IPv4/IPv6) - struct in_addr ipv4; - struct in6_addr ipv6; - int is_ip = (inet_pton(AF_INET, node, &ipv4) == 1) || - (inet_pton(AF_INET6, node, &ipv6) == 1); - - if (!is_ip) { - if (!check_host(node)) { - fprintf(stderr, "[sandbox] 🚫 Blocked DNS lookup for %s\n", node); - return EAI_FAIL; - } - last_host_checked = 1; // 已检查并通过 - } - } - - return real_getaddrinfo(node, service, hints, res); -} - -/** 拦截 connect() — 检查 IP(仅当没检查过域名) */ +/** 拦截 connect() —— 精确匹配 IP */ int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { + static int (*real_connect)(int, const struct sockaddr *, socklen_t) = NULL; if (!real_connect) real_connect = dlsym(RTLD_NEXT, "connect"); - char ip[INET6_ADDRSTRLEN] = {0}; + const char *banned_env = getenv(ENV_NAME); + char ip[INET6_ADDRSTRLEN] = {0}; if (addr->sa_family == AF_INET) inet_ntop(AF_INET, &((struct sockaddr_in *)addr)->sin_addr, ip, sizeof(ip)); else if (addr->sa_family == AF_INET6) inet_ntop(AF_INET6, &((struct sockaddr_in6 *)addr)->sin6_addr, ip, sizeof(ip)); - // 如果域名已经检查通过,则跳过 IP 检查 - if (last_host_checked) { - return real_connect(sockfd, addr, addrlen); - } - - // 没有检查过域名(可能是 IP 直连,如 curl) - if (!check_host(ip)) { - fprintf(stderr, "[sandbox] 🚫 Blocked connect to %s (no domain check)\n", ip); + if (banned_env && match_env_patterns(ip, banned_env)) { + fprintf(stderr, "[sandbox] 🚫 Access to host %s is banned\n", ip); + errno = EACCES; return -1; } return real_connect(sockfd, addr, addrlen); } + +/** 拦截 getaddrinfo() —— 精确匹配域名 */ +int getaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, struct addrinfo **res) { + static int (*real_getaddrinfo)(const char *, const char *, + const struct addrinfo *, struct addrinfo **) = NULL; + if (!real_getaddrinfo) + real_getaddrinfo = dlsym(RTLD_NEXT, "getaddrinfo"); + + const char *banned_env = getenv(ENV_NAME); + + if (banned_env && node && match_env_patterns(node, banned_env)) { + fprintf(stderr, "[sandbox] 🚫 Access to host %s is banned\n", node); + return EAI_FAIL; // 模拟 DNS 失败 + } + + return real_getaddrinfo(node, service, hints, res); +}