diff --git a/apps/common/utils/tool_code.py b/apps/common/utils/tool_code.py index 902622647..1cd690974 100644 --- a/apps/common/utils/tool_code.py +++ b/apps/common/utils/tool_code.py @@ -27,8 +27,8 @@ class ToolExecutor: else: self.sandbox_path = os.path.join(PROJECT_DIR, 'data', 'sandbox') self.user = None - self.banned_keywords = CONFIG.get("SANDBOX_PYTHON_BANNED_KEYWORDS", 'nothing_is_banned').split(','); self.sandbox_so_path = f'{self.sandbox_path}/sandbox.so' + self.process_timeout_seconds = int(CONFIG.get("SANDBOX_PYTHON_PROCESS_TIMEOUT_SECONDS", '3600')) try: self._init_dir() except Exception as e: @@ -60,21 +60,22 @@ class ToolExecutor: os.system(f"chown -R {self.user}:root {tmp_dir_path}") if os.path.exists(self.sandbox_so_path): os.chmod(self.sandbox_so_path, 0o440) - # 初始化host黑名单 - banned_hosts_file_path = f'{self.sandbox_path}/.SANDBOX_BANNED_HOSTS' - if os.path.exists(banned_hosts_file_path): - os.remove(banned_hosts_file_path) + # 初始化sandbox配置文件 + sandbox_conf_file_path = f'{self.sandbox_path}/.sandbox.conf' + if os.path.exists(sandbox_conf_file_path): + os.remove(sandbox_conf_file_path) + allow_subprocess = CONFIG.get("SANDBOX_PYTHON_ALLOW_SUBPROCESS", '0') banned_hosts = CONFIG.get("SANDBOX_PYTHON_BANNED_HOSTS", '').strip() if banned_hosts: hostname = socket.gethostname() local_ip = socket.gethostbyname(hostname) banned_hosts = f"{banned_hosts},{hostname},{local_ip}" - with open(banned_hosts_file_path, "w") as f: - f.write(banned_hosts) - os.chmod(banned_hosts_file_path, 0o440) + with open(sandbox_conf_file_path, "w") as f: + f.write(f"SANDBOX_PYTHON_BANNED_HOSTS={banned_hosts}\n") + f.write(f"SANDBOX_PYTHON_ALLOW_SUBPROCESS={allow_subprocess}\n") + os.chmod(sandbox_conf_file_path, 0o440) def exec_code(self, code_str, keywords, function_name=None): - self.validate_banned_keywords(code_str) _id = str(uuid.uuid7()) success = '{"code":200,"msg":"成功","data":exec_result}' err = '{"code":500,"msg":str(e),"data":None}' @@ -115,8 +116,6 @@ except Exception as e: raise Exception(result.get('msg')) def _generate_mcp_server_code(self, _code, params): - self.validate_banned_keywords(_code) - # 解析代码,提取导入语句和函数定义 try: tree = ast.parse(_code) @@ -230,19 +229,19 @@ exec({dedent(code)!a}) } maxkb_logger.debug(f"Sandbox execute code: {_code}") compressed_and_base64_encoded_code_str = base64.b64encode(gzip.compress(_code.encode())).decode() - subprocess_result = subprocess.run( - ['su', '-s', python_directory, '-c', - f'import base64,gzip; exec(gzip.decompress(base64.b64decode(\'{compressed_and_base64_encoded_code_str}\')).decode())', - self.user], - text=True, - capture_output=True, **kwargs) + try: + subprocess_result = subprocess.run( + ['su', '-s', python_directory, '-c', + f'import base64,gzip; exec(gzip.decompress(base64.b64decode(\'{compressed_and_base64_encoded_code_str}\')).decode())', + self.user], + text=True, + capture_output=True, + timeout=self.process_timeout_seconds, + **kwargs) + except subprocess.TimeoutExpired: + raise Exception(_("Sandbox process execution timeout, consider increasing MAXKB_SANDBOX_PYTHON_PROCESS_TIMEOUT_SECONDS.")) return subprocess_result - def validate_banned_keywords(self, code_str): - matched = next((bad for bad in self.banned_keywords if bad in code_str), None) - if matched: - raise Exception(f"keyword '{matched}' is banned in the tool.") - def validate_mcp_transport(self, code_str): servers = json.loads(code_str) for server, config in servers.items(): diff --git a/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/tti.py b/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/tti.py index 82f1d7185..7825501e4 100644 --- a/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/tti.py +++ b/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/tti.py @@ -9,6 +9,7 @@ from common.forms import BaseForm, PasswordInputField, SingleSelect, SliderField from models_provider.base_model_provider import BaseModelCredential, ValidCode from common.utils.logger import maxkb_logger + class QwenModelParams(BaseForm): """ Parameters class for the Qwen Text-to-Image model. @@ -26,7 +27,8 @@ class QwenModelParams(BaseForm): {'value': '1280*720', 'label': '1280*720'}, ], text_field='label', - value_field='value' + value_field='value', + attrs={'allow-create': True, 'filterable': True} ) n = SliderField( diff --git a/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/tti.py b/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/tti.py index 6ff912482..2ca3696af 100644 --- a/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/tti.py +++ b/apps/models_provider/impl/aliyun_bai_lian_model_provider/model/tti.py @@ -2,7 +2,7 @@ from http import HTTPStatus from typing import Dict -from dashscope import ImageSynthesis +from dashscope import ImageSynthesis, MultiModalConversation from django.utils.translation import gettext from langchain_community.chat_models import ChatTongyi from langchain_core.messages import HumanMessage @@ -46,17 +46,48 @@ class QwenTextToImageModel(MaxKBBaseModel, BaseTextToImage): chat.invoke([HumanMessage([{"type": "text", "text": gettext('Hello')}])]) def generate_image(self, prompt: str, negative_prompt: str = None): - rsp = ImageSynthesis.call(api_key=self.api_key, - model=self.model_name, - base_url='https://dashscope.aliyuncs.com/compatible-mode/v1', - prompt=prompt, - negative_prompt=negative_prompt, - **self.params) - file_urls = [] - if rsp.status_code == HTTPStatus.OK: - for result in rsp.output.results: - file_urls.append(result.url) - else: - maxkb_logger.error('sync_call Failed, status_code: %s, code: %s, message: %s' % - (rsp.status_code, rsp.code, rsp.message)) - return file_urls + if self.model_name.startswith("wan"): + rsp = ImageSynthesis.call(api_key=self.api_key, + model=self.model_name, + base_url='https://dashscope.aliyuncs.com/compatible-mode/v1', + prompt=prompt, + negative_prompt=negative_prompt, + **self.params) + file_urls = [] + if rsp.status_code == HTTPStatus.OK: + for result in rsp.output.results: + file_urls.append(result.url) + else: + maxkb_logger.error('sync_call Failed, status_code: %s, code: %s, message: %s' % + (rsp.status_code, rsp.code, rsp.message)) + return file_urls + elif self.model_name.startswith("qwen"): + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": prompt + } + ] + } + ] + rsp = MultiModalConversation.call( + api_key=self.api_key, + model=self.model_name, + messages=messages, + result_format='message', + base_url='https://dashscope.aliyuncs.com/v1', + stream=False, + negative_prompt=negative_prompt, + **self.params + ) + file_urls = [] + if rsp.status_code == HTTPStatus.OK: + for result in rsp.output.choices: + file_urls.append(result.message.content[0].get('image')) + else: + maxkb_logger.error('sync_call Failed, status_code: %s, code: %s, message: %s' % + (rsp.status_code, rsp.code, rsp.message)) + return file_urls diff --git a/apps/tools/serializers/tool.py b/apps/tools/serializers/tool.py index 64fab3656..c9e1d02b1 100644 --- a/apps/tools/serializers/tool.py +++ b/apps/tools/serializers/tool.py @@ -354,7 +354,6 @@ class ToolSerializer(serializers.Serializer): self.is_valid(raise_exception=True) ToolCreateRequest(data=instance).is_valid(raise_exception=True) # 校验代码是否包括禁止的关键字 - ToolExecutor().validate_banned_keywords(instance.get('code', '')) if instance.get('tool_type') == ToolType.MCP: ToolExecutor().validate_mcp_transport(instance.get('code', '')) @@ -391,7 +390,6 @@ class ToolSerializer(serializers.Serializer): def test_connection(self): self.is_valid(raise_exception=True) # 校验代码是否包括禁止的关键字 - ToolExecutor().validate_banned_keywords(self.data.get('code', '')) ToolExecutor().validate_mcp_transport(self.data.get('code', '')) # 校验mcp json @@ -486,7 +484,6 @@ class ToolSerializer(serializers.Serializer): self.is_valid(raise_exception=True) ToolEditRequest(data=instance).is_valid(raise_exception=True) # 校验代码是否包括禁止的关键字 - ToolExecutor().validate_banned_keywords(instance.get('code', '')) if instance.get('tool_type') == ToolType.MCP: ToolExecutor().validate_mcp_transport(instance.get('code', '')) diff --git a/installer/Dockerfile-base b/installer/Dockerfile-base index 4e4eccb9f..a772326fb 100644 --- a/installer/Dockerfile-base +++ b/installer/Dockerfile-base @@ -48,7 +48,6 @@ ENV PATH=/opt/py3/bin:$PATH \ MAXKB_SANDBOX=1 \ MAXKB_SANDBOX_HOME=/opt/maxkb-app/sandbox \ MAXKB_SANDBOX_PYTHON_PACKAGE_PATHS="/opt/py3/lib/python3.11/site-packages,/opt/maxkb-app/sandbox/python-packages,/opt/maxkb/python-packages" \ - MAXKB_SANDBOX_PYTHON_BANNED_KEYWORDS="subprocess.,system(,exec(,execve(,pty.,eval(,compile(,shutil.,input(,__import__" \ MAXKB_SANDBOX_PYTHON_BANNED_HOSTS="127.0.0.1,localhost,host.docker.internal,maxkb,pgsql,redis" \ MAXKB_ADMIN_PATH=/admin diff --git a/installer/sandbox.c b/installer/sandbox.c index 9d3a7c928..fecdb27cf 100644 --- a/installer/sandbox.c +++ b/installer/sandbox.c @@ -11,68 +11,91 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include -static const char *BANNED_FILE_NAME = ".SANDBOX_BANNED_HOSTS"; +#define CONFIG_FILE ".sandbox.conf" -/** - * 从 .so 文件所在目录读取 .SANDBOX_BANNED_HOSTS 文件内容 - * 返回 malloc 出的字符串(需 free),读取失败则返回空字符串 - */ -static char *load_banned_hosts() { - Dl_info info; - if (dladdr((void *)load_banned_hosts, &info) == 0 || !info.dli_fname) { - fprintf(stderr, "[sandbox] ⚠️ Unable to locate shared object path — allowing all hosts\n"); - return strdup(""); +static char *banned_hosts = NULL; +static int allow_subprocess = 0; // 默认禁止 + +static void load_sandbox_config() { + Dl_info info; + if (dladdr((void *)load_sandbox_config, &info) == 0 || !info.dli_fname) { + banned_hosts = strdup(""); + allow_subprocess = 0; + return; + } + char so_path[PATH_MAX]; + strncpy(so_path, info.dli_fname, sizeof(so_path)); + so_path[sizeof(so_path) - 1] = '\0'; + char *dir = dirname(so_path); + char config_path[PATH_MAX]; + snprintf(config_path, sizeof(config_path), "%s/%s", dir, CONFIG_FILE); + FILE *fp = fopen(config_path, "r"); + if (!fp) { + banned_hosts = strdup(""); + allow_subprocess = 0; + return; + } + char line[512]; + banned_hosts = strdup(""); + allow_subprocess = 0; + while (fgets(line, sizeof(line), fp)) { + char *key = strtok(line, "="); + char *value = strtok(NULL, "\n"); + if (!key || !value) continue; + while (*key == ' ' || *key == '\t') key++; + char *kend = key + strlen(key) - 1; + while (kend > key && (*kend == ' ' || *kend == '\t')) *kend-- = '\0'; + while (*value == ' ' || *value == '\t') value++; + char *vend = value + strlen(value) - 1; + while (vend > value && (*vend == ' ' || *vend == '\t')) *vend-- = '\0'; + if (strcmp(key, "SANDBOX_PYTHON_BANNED_HOSTS") == 0) { + free(banned_hosts); + banned_hosts = strdup(value); + } else if (strcmp(key, "SANDBOX_PYTHON_ALLOW_SUBPROCESS") == 0) { + allow_subprocess = atoi(value); + } } - - char so_path[PATH_MAX]; - strncpy(so_path, info.dli_fname, sizeof(so_path)); - so_path[sizeof(so_path) - 1] = '\0'; - - char *dir = dirname(so_path); - char file_path[PATH_MAX]; - snprintf(file_path, sizeof(file_path), "%s/%s", dir, BANNED_FILE_NAME); - - FILE *fp = fopen(file_path, "r"); - if (!fp) { - fprintf(stderr, "[sandbox] ⚠️ Cannot open %s — allowing all hosts\n", file_path); - return strdup(""); - } - - char *buf = malloc(4096); - if (!buf) { - fclose(fp); - fprintf(stderr, "[sandbox] ⚠️ Memory allocation failed — allowing all hosts\n"); - return strdup(""); - } - - size_t len = fread(buf, 1, 4095, fp); - buf[len] = '\0'; fclose(fp); - return buf; } - +static void ensure_config_loaded() { + if (!banned_hosts) load_sandbox_config(); +} +static int is_sandbox_user() { + uid_t uid = getuid(); + struct passwd *pw = getpwuid(uid); + if (!pw || !pw->pw_name) { + return 1; // 无法识别用户 → 认为是sandbox + } + if (strcmp(pw->pw_name, "sandbox") == 0) { + return 1; + } + return 0; +} /** * 精确匹配黑名单 */ static int match_env_patterns(const char *target, const char *env_val) { if (!target || !env_val || !*env_val) return 0; - char *patterns = strdup(env_val); char *token = strtok(patterns, ","); int matched = 0; - while (token) { - // 去掉前后空格 while (*token == ' ' || *token == '\t') token++; char *end = token + strlen(token) - 1; while (end > token && (*end == ' ' || *end == '\t')) *end-- = '\0'; - if (*token) { regex_t regex; char fullpattern[512]; snprintf(fullpattern, sizeof(fullpattern), "^%s$", token); - if (regcomp(®ex, fullpattern, REG_EXTENDED | REG_NOSUB | REG_ICASE) == 0) { if (regexec(®ex, target, 0, NULL, 0) == 0) { matched = 1; @@ -80,13 +103,10 @@ static int match_env_patterns(const char *target, const char *env_val) { break; } regfree(®ex); - } else { - fprintf(stderr, "[sandbox] ⚠️ Invalid regex '%s' — allowing host by default\n", token); } } token = strtok(NULL, ","); } - free(patterns); return matched; } @@ -96,22 +116,17 @@ int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { static int (*real_connect)(int, const struct sockaddr *, socklen_t) = NULL; if (!real_connect) real_connect = dlsym(RTLD_NEXT, "connect"); - - static char *banned_env = NULL; - if (!banned_env) banned_env = load_banned_hosts(); - + ensure_config_loaded(); char ip[INET6_ADDRSTRLEN] = {0}; if (addr->sa_family == AF_INET) inet_ntop(AF_INET, &((struct sockaddr_in *)addr)->sin_addr, ip, sizeof(ip)); else if (addr->sa_family == AF_INET6) inet_ntop(AF_INET6, &((struct sockaddr_in6 *)addr)->sin6_addr, ip, sizeof(ip)); - - if (banned_env && *banned_env && match_env_patterns(ip, banned_env)) { + if (is_sandbox_user() && banned_hosts && *banned_hosts && match_env_patterns(ip, banned_hosts)) { fprintf(stderr, "[sandbox] 🚫 Access to host %s is banned\n", ip); errno = EACCES; // EACCES 的值是 13, 意思是 Permission denied return -1; } - return real_connect(sockfd, addr, addrlen); } @@ -122,23 +137,133 @@ int getaddrinfo(const char *node, const char *service, const struct addrinfo *, struct addrinfo **) = NULL; if (!real_getaddrinfo) real_getaddrinfo = dlsym(RTLD_NEXT, "getaddrinfo"); - - static char *banned_env = NULL; - if (!banned_env) banned_env = load_banned_hosts(); - - if (banned_env && *banned_env && node) { + ensure_config_loaded(); + if (banned_hosts && *banned_hosts && node) { // 检测 node 是否是 IP struct in_addr ipv4; struct in6_addr ipv6; int is_ip = (inet_pton(AF_INET, node, &ipv4) == 1) || (inet_pton(AF_INET6, node, &ipv6) == 1); - // 只对“非IP的域名”进行屏蔽 - if (!is_ip && match_env_patterns(node, banned_env)) { + if (is_sandbox_user() && !is_ip && match_env_patterns(node, banned_hosts )) { fprintf(stderr, "[sandbox] 🚫 Access to host %s is banned (DNS blocked)\n", node); return EAI_FAIL; // 模拟 DNS 层禁止 } } - return real_getaddrinfo(node, service, hints, res); } +/* ------------------ 禁止创建子进程------------------ */ +static int allow_create_subprocess() { + ensure_config_loaded(); + return allow_subprocess || !is_sandbox_user(); +} +static int deny() { + fprintf(stderr, "[sandbox] Permission denied to create subprocess in sandbox.\n"); + _exit(1); + return -1; +} +#define RESOLVE_REAL(func) \ + static typeof(func) *real_##func = NULL; \ + if (!real_##func) { \ + real_##func = dlsym(RTLD_NEXT, #func); \ + } + +int execve(const char *filename, char *const argv[], char *const envp[]) { + RESOLVE_REAL(execve); + if (!allow_create_subprocess()) return deny(); + return real_execve(filename, argv, envp); +} + +int execveat(int dirfd, const char *pathname, + char *const argv[], char *const envp[], int flags) { + RESOLVE_REAL(execveat); + if (!allow_create_subprocess()) return deny(); + return real_execveat(dirfd, pathname, argv, envp, flags); +} + +pid_t fork(void) { + RESOLVE_REAL(fork); + if (!allow_create_subprocess()) return deny(); + return real_fork(); +} + +pid_t vfork(void) { + RESOLVE_REAL(vfork); + if (!allow_create_subprocess()) return deny(); + return real_vfork(); +} + +int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ...) { + RESOLVE_REAL(clone); + if (!allow_create_subprocess()) return deny(); + va_list ap; + va_start(ap, arg); + long a4 = va_arg(ap, long); + long a5 = va_arg(ap, long); + va_end(ap); + return real_clone(fn, child_stack, flags, arg, (void *)a4, (void *)a5); +} + +int posix_spawn(pid_t *pid, const char *path, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *attrp, + char *const argv[], char *const envp[]) { + RESOLVE_REAL(posix_spawn); + if (!allow_create_subprocess()) return deny(); + return real_posix_spawn(pid, path, file_actions, attrp, argv, envp); +} + +int posix_spawnp(pid_t *pid, const char *file, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *attrp, + char *const argv[], char *const envp[]) { + RESOLVE_REAL(posix_spawnp); + if (!allow_create_subprocess()) return deny(); + return real_posix_spawnp(pid, file, file_actions, attrp, argv, envp); +} +int __posix_spawn(pid_t *pid, const char *path, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *attrp, + char *const argv[], char *const envp[]) { + RESOLVE_REAL(__posix_spawn); + if (!allow_create_subprocess()) return deny(); + return real___posix_spawn(pid, path, file_actions, attrp, argv, envp); +} + +int __posix_spawnp(pid_t *pid, const char *file, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *attrp, + char *const argv[], char *const envp[]) { + RESOLVE_REAL(__posix_spawnp); + if (!allow_create_subprocess()) return deny(); + return real___posix_spawnp(pid, file, file_actions, attrp, argv, envp); +} + +int system(const char *command) { + RESOLVE_REAL(system); + if (!allow_create_subprocess()) return deny(); + return real_system(command); +} +int __libc_system(const char *command) { + RESOLVE_REAL(__libc_system); + if (!allow_create_subprocess()) return deny(); + return real___libc_system(command); +} +long (*real_syscall)(long, ...) = NULL; +long syscall(long number, ...) { + if (!real_syscall) real_syscall = dlsym(RTLD_NEXT, "syscall"); + va_list ap; + va_start(ap, number); + long a1 = va_arg(ap, long); + long a2 = va_arg(ap, long); + long a3 = va_arg(ap, long); + long a4 = va_arg(ap, long); + long a5 = va_arg(ap, long); + long a6 = va_arg(ap, long); + va_end(ap); + if (number == SYS_execve || number == SYS_execveat || + number == SYS_fork || number == SYS_vfork || number == SYS_clone) { + if (!allow_create_subprocess()) return deny(); + } + return real_syscall(number, a1, a2, a3, a4, a5, a6); +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 9d4dd900f..6b31cc8db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,16 +20,16 @@ dependencies = [ "cffi==1.17.1", "beautifulsoup4==4.13.4", "jieba==0.42.1", - "langchain-openai==0.3.31", - "langchain-anthropic==0.3.17", - "langchain-community==0.3.27", - "langchain-deepseek==0.1.3", - "langchain-google-genai==2.1.7", - "langchain-mcp-adapters==0.1.9", - "langchain-huggingface==0.3.0", - "langchain-ollama==0.3.4", + "langchain-openai==0.3.35", + "langchain-anthropic==0.3.22", + "langchain-community==0.3.31", + "langchain-deepseek==0.1.4", + "langchain-google-genai==2.1.12", + "langchain-mcp-adapters==0.1.13", + "langchain-huggingface==0.3.1", + "langchain-ollama==0.3.10", + "langchain_core==0.3.80", "langgraph==0.5.3", - "langchain_core==0.3.74", "torch==2.8.0", "sentence-transformers==5.0.0", "qianfan==0.4.12.3", @@ -38,7 +38,7 @@ dependencies = [ "boto3==1.39.4", "tencentcloud-sdk-python==3.0.1420", "xinference-client==1.7.1.post1", - "anthropic==0.57.1", + "anthropic==0.74.1", "dashscope==1.23.8", "celery[sqlalchemy]==5.5.3", "django-celery-beat==2.8.1",