diff --git a/apps/common/utils/tool_code.py b/apps/common/utils/tool_code.py index 1cd690974..fa6b79749 100644 --- a/apps/common/utils/tool_code.py +++ b/apps/common/utils/tool_code.py @@ -7,6 +7,8 @@ import os import socket import subprocess import sys +import signal +import time import uuid_utils.compat as uuid from common.utils.logger import maxkb_logger from django.utils.translation import gettext_lazy as _ @@ -84,13 +86,14 @@ class ToolExecutor: python_paths = CONFIG.get_sandbox_python_package_paths().split(',') _exec_code = f""" try: - import sys, json, base64, builtins + import os, sys, json, base64, builtins path_to_exclude = ['/opt/py3/lib/python3.11/site-packages', '/opt/maxkb-app/apps'] sys.path = [p for p in sys.path if p not in path_to_exclude] sys.path += {python_paths} locals_v={'{}'} keywords={keywords} globals_v={'{}'} + os.environ.clear() exec({dedent(code_str)!a}, globals_v, locals_v) f_name, f = {action_function} for local in locals_v: @@ -182,16 +185,14 @@ except Exception as e: python_paths = CONFIG.get_sandbox_python_package_paths().split(',') code = self._generate_mcp_server_code(code_str, params) return f""" -import os -import sys -import logging +import os, sys, logging logging.basicConfig(level=logging.WARNING) logging.getLogger("mcp").setLevel(logging.ERROR) logging.getLogger("mcp.server").setLevel(logging.ERROR) - path_to_exclude = ['/opt/py3/lib/python3.11/site-packages', '/opt/maxkb-app/apps'] sys.path = [p for p in sys.path if p not in path_to_exclude] sys.path += {python_paths} +os.environ.clear() exec({dedent(code)!a}) """ @@ -223,24 +224,40 @@ exec({dedent(code)!a}) return tool_config def _exec_sandbox(self, _code): - kwargs = {'cwd': BASE_DIR} - kwargs['env'] = { + kwargs = {'cwd': BASE_DIR, 'env': { 'LD_PRELOAD': self.sandbox_so_path, - } + }} maxkb_logger.debug(f"Sandbox execute code: {_code}") compressed_and_base64_encoded_code_str = base64.b64encode(gzip.compress(_code.encode())).decode() + cmd = [ + 'su', '-s', python_directory, '-c', + f'import base64,gzip; exec(gzip.decompress(base64.b64decode(\'{compressed_and_base64_encoded_code_str}\')).decode())', + self.user + ] try: - subprocess_result = subprocess.run( - ['su', '-s', python_directory, '-c', - f'import base64,gzip; exec(gzip.decompress(base64.b64decode(\'{compressed_and_base64_encoded_code_str}\')).decode())', - self.user], + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, text=True, - capture_output=True, - timeout=self.process_timeout_seconds, - **kwargs) + **kwargs, + start_new_session=True + ) + proc.wait(timeout=self.process_timeout_seconds) + return subprocess.CompletedProcess( + proc.args, + proc.returncode, + proc.stdout.read(), + proc.stderr.read() + ) except subprocess.TimeoutExpired: + pgid = os.getpgid(proc.pid) + os.killpg(pgid, signal.SIGTERM) #温和终止 + time.sleep(1) #留出短暂时间让进程清理 + if proc.poll() is None: #如果仍未终止,强制终止 + os.killpg(pgid, signal.SIGKILL) + proc.wait() raise Exception(_("Sandbox process execution timeout, consider increasing MAXKB_SANDBOX_PYTHON_PROCESS_TIMEOUT_SECONDS.")) - return subprocess_result def validate_mcp_transport(self, code_str): servers = json.loads(code_str) diff --git a/apps/oss/views/file.py b/apps/oss/views/file.py index b9f6ca67d..185c76c19 100644 --- a/apps/oss/views/file.py +++ b/apps/oss/views/file.py @@ -1,5 +1,8 @@ # coding=utf-8 import base64 +import ipaddress +import socket +from urllib.parse import urlparse import requests from django.utils.translation import gettext_lazy as _ @@ -83,7 +86,17 @@ class GetUrlView(APIView): ) def get(self, request: Request): url = request.query_params.get('url') - response = requests.get(url) + parsed = validate_url(url) + + response = requests.get( + url, + timeout=3, + allow_redirects=False + ) + final_host = urlparse(response.url).hostname + if is_private_ip(final_host): + raise ValueError("Blocked unsafe redirect to internal host") + # 返回状态码 响应内容大小 响应的contenttype 还有字节流 content_type = response.headers.get('Content-Type', '') # 根据内容类型决定如何处理 @@ -99,3 +112,43 @@ class GetUrlView(APIView): 'Content-Type': content_type, 'content': content, }) + + +def is_private_ip(host: str) -> bool: + """检测 IP 是否属于内网、环回、云 metadata 的危险地址""" + try: + ip = ipaddress.ip_address(socket.gethostbyname(host)) + return ( + ip.is_private or + ip.is_loopback or + ip.is_reserved or + ip.is_link_local or + ip.is_multicast + ) + except Exception: + return True + + +def validate_url(url: str): + """验证 URL 是否安全""" + if not url: + raise ValueError("URL is required") + + parsed = urlparse(url) + + # 仅允许 http / https + if parsed.scheme not in ("http", "https"): + raise ValueError("Only http and https are allowed") + + host = parsed.hostname + path = parsed.path + + # 域名不能为空 + if not host: + raise ValueError("Invalid URL") + + # 禁止访问内部、保留、环回、云 metadata + if is_private_ip(host): + raise ValueError("Access to internal IP addresses is blocked") + + return parsed diff --git a/apps/system_manage/serializers/user_resource_permission.py b/apps/system_manage/serializers/user_resource_permission.py index b34b8544e..2efaa5a62 100644 --- a/apps/system_manage/serializers/user_resource_permission.py +++ b/apps/system_manage/serializers/user_resource_permission.py @@ -9,6 +9,7 @@ import json import os +from django.contrib.postgres.fields import ArrayField from django.core.cache import cache from django.db import models from django.db.models import QuerySet, Q, TextField @@ -343,10 +344,13 @@ class ResourceUserPermissionSerializer(serializers.Serializer): "role": models.CharField(), "role_setting.type": models.CharField(), "user_role_relation.workspace_id": models.CharField(), + 'tmp.type_list': ArrayField(models.CharField()), + 'tmp.role_name_list_str': models.CharField() })) nick_name = instance.get('nick_name') username = instance.get('username') + role_name = instance.get('role') permission = instance.get('permission') query_p_list = [None if p == "NOT_AUTH" else p for p in permission] @@ -375,15 +379,31 @@ class ResourceUserPermissionSerializer(serializers.Serializer): **{"u.id__in": QuerySet(workspace_user_role_mapping_model).filter( workspace_id=self.data.get('workspace_id')).values("user_id")}) if is_x_pack_ee: - user_query_set = user_query_set.filter( - **{'role_setting.type': "USER", 'user_role_relation.workspace_id': self.data.get('workspace_id')}) + user_query_set = user_query_set.filter(**{ + "tmp.type_list__contains": ["USER"] + }) + role_name_and_type_query_set = QuerySet(model=get_dynamics_model({ + 'user_role_relation.workspace_id': models.CharField(), + })).filter(**{ + "user_role_relation.workspace_id": self.data.get('workspace_id'), + }) + if role_name: + user_query_set = user_query_set.filter( + **{'tmp.role_name_list_str__icontains': str(role_name)} + ) + + return { + 'workspace_user_resource_permission_query_set': workspace_user_resource_permission_query_set, + 'user_query_set': user_query_set, + 'role_name_and_type_query_set': role_name_and_type_query_set + } else: user_query_set = user_query_set.filter( **{'role': "USER"}) - return { - 'workspace_user_resource_permission_query_set': workspace_user_resource_permission_query_set, - 'user_query_set': user_query_set - } + return { + 'workspace_user_resource_permission_query_set': workspace_user_resource_permission_query_set, + 'user_query_set': user_query_set + } def list(self, instance, with_valid=True): if with_valid: diff --git a/apps/system_manage/sql/get_resource_user_permission_detail_ee.sql b/apps/system_manage/sql/get_resource_user_permission_detail_ee.sql index a90050016..f910e9479 100644 --- a/apps/system_manage/sql/get_resource_user_permission_detail_ee.sql +++ b/apps/system_manage/sql/get_resource_user_permission_detail_ee.sql @@ -1,34 +1,41 @@ SELECT - distinct(u.id), + DISTINCT u.id, u.nick_name, u.username, - case - when - wurp."permission" is null then 'NOT_AUTH' - else wurp."permission" - end + tmp.role_name_list AS role_name, + CASE + WHEN wurp."permission" IS NULL THEN 'NOT_AUTH' + ELSE wurp."permission" + END AS permission FROM public."user" u LEFT JOIN ( SELECT - user_id , - (case - when auth_type = 'ROLE' - and 'ROLE' = any( permission_list) then 'ROLE' - when auth_type = 'RESOURCE_PERMISSION_GROUP' - and 'MANAGE'= any(permission_list) then 'MANAGE' - when auth_type = 'RESOURCE_PERMISSION_GROUP' - and 'VIEW' = any( permission_list) then 'VIEW' - else null - end) as "permission" + user_id, + CASE + WHEN auth_type = 'ROLE' + AND 'ROLE' = ANY(permission_list) THEN 'ROLE' + WHEN auth_type = 'RESOURCE_PERMISSION_GROUP' + AND 'MANAGE' = ANY(permission_list) THEN 'MANAGE' + WHEN auth_type = 'RESOURCE_PERMISSION_GROUP' + AND 'VIEW' = ANY(permission_list) THEN 'VIEW' + ELSE NULL + END AS "permission" FROM workspace_user_resource_permission - ${workspace_user_resource_permission_query_set} - ) wurp -ON - u.id = wurp.user_id -left join user_role_relation user_role_relation -on user_role_relation.user_id = u.id -left join role_setting role_setting -on role_setting.id = user_role_relation.role_id + ${workspace_user_resource_permission_query_set} +) wurp ON u.id = wurp.user_id +LEFT JOIN ( + SELECT + ARRAY_AGG(role_setting.role_name) AS role_name_list, + ARRAY_AGG(role_setting.role_name)::text AS role_name_list_str, + ARRAY_AGG(role_setting.type) AS type_list, + user_role_relation.user_id + FROM user_role_relation user_role_relation + LEFT JOIN role_setting role_setting + ON role_setting.id = user_role_relation.role_id + ${role_name_and_type_query_set} + GROUP BY + user_role_relation.user_id) tmp +ON u.id = tmp.user_id ${user_query_set} \ No newline at end of file diff --git a/apps/system_manage/views/user_resource_permission.py b/apps/system_manage/views/user_resource_permission.py index ba2c56252..68cd61459 100644 --- a/apps/system_manage/views/user_resource_permission.py +++ b/apps/system_manage/views/user_resource_permission.py @@ -196,6 +196,7 @@ class WorkspaceResourceUserPermissionView(APIView): return result.success(ResourceUserPermissionSerializer( data={'workspace_id': workspace_id, "target": target, 'auth_target_type': resource, } ).page({'username': request.query_params.get("username"), + 'role': request.query_params.get("role"), 'nick_name': request.query_params.get("nick_name"), 'permission': request.query_params.getlist("permission[]")}, current_page, page_size, )) diff --git a/apps/tools/serializers/tool.py b/apps/tools/serializers/tool.py index c9e1d02b1..b7be45512 100644 --- a/apps/tools/serializers/tool.py +++ b/apps/tools/serializers/tool.py @@ -431,7 +431,7 @@ class ToolSerializer(serializers.Serializer): @staticmethod def convert_value(name: str, value: str, _type: str, is_required: bool): - if not is_required and value is None: + if not is_required and (value is None or (isinstance(value, str) and len(value.strip()) == 0)): return None try: if _type == 'int': diff --git a/installer/sandbox.c b/installer/sandbox.c index fecdb27cf..570d0c7b1 100644 --- a/installer/sandbox.c +++ b/installer/sandbox.c @@ -19,8 +19,12 @@ #include #include #include +#include +#include #define CONFIG_FILE ".sandbox.conf" +#define KEY_BANNED_HOSTS "SANDBOX_PYTHON_BANNED_HOSTS" +#define KEY_ALLOW_SUBPROCESS "SANDBOX_PYTHON_ALLOW_SUBPROCESS" static char *banned_hosts = NULL; static int allow_subprocess = 0; // 默认禁止 @@ -57,10 +61,10 @@ static void load_sandbox_config() { while (*value == ' ' || *value == '\t') value++; char *vend = value + strlen(value) - 1; while (vend > value && (*vend == ' ' || *vend == '\t')) *vend-- = '\0'; - if (strcmp(key, "SANDBOX_PYTHON_BANNED_HOSTS") == 0) { + if (strcmp(key, KEY_BANNED_HOSTS) == 0) { free(banned_hosts); banned_hosts = strdup(value); - } else if (strcmp(key, "SANDBOX_PYTHON_ALLOW_SUBPROCESS") == 0) { + } else if (strcmp(key, KEY_ALLOW_SUBPROCESS) == 0) { allow_subprocess = atoi(value); } } @@ -158,7 +162,7 @@ static int allow_create_subprocess() { return allow_subprocess || !is_sandbox_user(); } static int deny() { - fprintf(stderr, "[sandbox] Permission denied to create subprocess in sandbox.\n"); + fprintf(stderr, "Permission denied to create subprocess.\n"); _exit(1); return -1; } @@ -167,7 +171,6 @@ static int deny() { if (!real_##func) { \ real_##func = dlsym(RTLD_NEXT, #func); \ } - int execve(const char *filename, char *const argv[], char *const envp[]) { RESOLVE_REAL(execve); if (!allow_create_subprocess()) return deny(); @@ -180,7 +183,21 @@ int execveat(int dirfd, const char *pathname, if (!allow_create_subprocess()) return deny(); return real_execveat(dirfd, pathname, argv, envp, flags); } - +int __execve(const char *filename, char *const argv[], char *const envp[]) { + RESOLVE_REAL(__execve); + if (!allow_create_subprocess()) return deny(); + return real___execve(filename, argv, envp); +} +int execvpe(const char *file, char *const argv[], char *const envp[]) { + RESOLVE_REAL(execvpe); + if (!allow_create_subprocess()) return deny(); + return real_execvpe(file, argv, envp); +} +int __execvpe(const char *file, char *const argv[], char *const envp[]) { + RESOLVE_REAL(__execvpe); + if (!allow_create_subprocess()) return deny(); + return real___execvpe(file, argv, envp); +} pid_t fork(void) { RESOLVE_REAL(fork); if (!allow_create_subprocess()) return deny(); @@ -203,7 +220,11 @@ int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ...) { va_end(ap); return real_clone(fn, child_stack, flags, arg, (void *)a4, (void *)a5); } - +int clone3(struct clone_args *cl_args, size_t size) { + RESOLVE_REAL(clone3); + if (!allow_create_subprocess()) return deny(); + return real_clone3(cl_args, size); +} int posix_spawn(pid_t *pid, const char *path, const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *attrp, @@ -249,9 +270,19 @@ int __libc_system(const char *command) { if (!allow_create_subprocess()) return deny(); return real___libc_system(command); } +pid_t forkpty(int *amaster, char *name, const struct termios *termp, const struct winsize *winp) { + RESOLVE_REAL(forkpty); + if (!allow_create_subprocess()) return deny(); + return real_forkpty(amaster, name, termp, winp); +} +pid_t __forkpty(int *amaster, char *name, const struct termios *termp, const struct winsize *winp) { + RESOLVE_REAL(__forkpty); + if (!allow_create_subprocess()) return deny(); + return real___forkpty(amaster, name, termp, winp); +} long (*real_syscall)(long, ...) = NULL; long syscall(long number, ...) { - if (!real_syscall) real_syscall = dlsym(RTLD_NEXT, "syscall"); + RESOLVE_REAL(syscall); va_list ap; va_start(ap, number); long a1 = va_arg(ap, long); @@ -261,9 +292,20 @@ long syscall(long number, ...) { long a5 = va_arg(ap, long); long a6 = va_arg(ap, long); va_end(ap); - if (number == SYS_execve || number == SYS_execveat || - number == SYS_fork || number == SYS_vfork || number == SYS_clone) { - if (!allow_create_subprocess()) return deny(); + switch (number) { + case SYS_execve: + case SYS_execveat: + case SYS_fork: + case SYS_vfork: + case SYS_clone: + case SYS_clone3: +#ifdef SYS_posix_spawn + case SYS_posix_spawn: +#endif +#ifdef SYS_posix_spawnp + case SYS_posix_spawnp: +#endif + if (!allow_create_subprocess()) return deny(); } return real_syscall(number, a1, a2, a3, a4, a5, a6); } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6b31cc8db..bd5a75fbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ dependencies = [ "xlrd==2.0.2", "xlwt==1.3.0", "pymupdf==1.26.3", - "pypdf==6.1.3", + "pypdf==6.4.0", "pydub==0.25.1", "pysilk==0.0.1", "gunicorn==23.0.0", diff --git a/ui/src/api/user/login.ts b/ui/src/api/user/login.ts index a97ee097d..f3e2270d3 100644 --- a/ui/src/api/user/login.ts +++ b/ui/src/api/user/login.ts @@ -99,7 +99,11 @@ const postLanguage: (data: any, loading?: Ref) => Promise> ) => { return post('/user/language', data, undefined, loading) } - +const samlLogin: (loading?: Ref) => Promise> = ( + loading, +) => { + return get('/saml2', '', loading) +} export default { login, logout, @@ -112,5 +116,6 @@ export default { getDingOauth2Callback, getLarkCallback, getQrSource, - ldapLogin + ldapLogin, + samlLogin } diff --git a/ui/src/components/resource-authorization-drawer/index.vue b/ui/src/components/resource-authorization-drawer/index.vue index 0eb8e76e1..9ae6af2c0 100644 --- a/ui/src/components/resource-authorization-drawer/index.vue +++ b/ui/src/components/resource-authorization-drawer/index.vue @@ -23,6 +23,7 @@ + - + - +