Merge branch 'v2-c' into knowledge_workflow

# Conflicts:
#	apps/common/utils/tool_code.py
This commit is contained in:
CaptainB 2025-11-25 10:40:45 +08:00
commit 757cff4837
7 changed files with 264 additions and 111 deletions

View File

@ -27,8 +27,8 @@ class ToolExecutor:
else:
self.sandbox_path = os.path.join(PROJECT_DIR, 'data', 'sandbox')
self.user = None
self.banned_keywords = CONFIG.get("SANDBOX_PYTHON_BANNED_KEYWORDS", 'nothing_is_banned').split(',');
self.sandbox_so_path = f'{self.sandbox_path}/sandbox.so'
self.process_timeout_seconds = int(CONFIG.get("SANDBOX_PYTHON_PROCESS_TIMEOUT_SECONDS", '3600'))
try:
self._init_dir()
except Exception as e:
@ -60,21 +60,22 @@ class ToolExecutor:
os.system(f"chown -R {self.user}:root {tmp_dir_path}")
if os.path.exists(self.sandbox_so_path):
os.chmod(self.sandbox_so_path, 0o440)
# 初始化host黑名单
banned_hosts_file_path = f'{self.sandbox_path}/.SANDBOX_BANNED_HOSTS'
if os.path.exists(banned_hosts_file_path):
os.remove(banned_hosts_file_path)
# 初始化sandbox配置文件
sandbox_conf_file_path = f'{self.sandbox_path}/.sandbox.conf'
if os.path.exists(sandbox_conf_file_path):
os.remove(sandbox_conf_file_path)
allow_subprocess = CONFIG.get("SANDBOX_PYTHON_ALLOW_SUBPROCESS", '0')
banned_hosts = CONFIG.get("SANDBOX_PYTHON_BANNED_HOSTS", '').strip()
if banned_hosts:
hostname = socket.gethostname()
local_ip = socket.gethostbyname(hostname)
banned_hosts = f"{banned_hosts},{hostname},{local_ip}"
with open(banned_hosts_file_path, "w") as f:
f.write(banned_hosts)
os.chmod(banned_hosts_file_path, 0o440)
with open(sandbox_conf_file_path, "w") as f:
f.write(f"SANDBOX_PYTHON_BANNED_HOSTS={banned_hosts}\n")
f.write(f"SANDBOX_PYTHON_ALLOW_SUBPROCESS={allow_subprocess}\n")
os.chmod(sandbox_conf_file_path, 0o440)
def exec_code(self, code_str, keywords, function_name=None):
self.validate_banned_keywords(code_str)
_id = str(uuid.uuid7())
success = '{"code":200,"msg":"成功","data":exec_result}'
err = '{"code":500,"msg":str(e),"data":None}'
@ -115,8 +116,6 @@ except Exception as e:
raise Exception(result.get('msg'))
def _generate_mcp_server_code(self, _code, params):
self.validate_banned_keywords(_code)
# 解析代码,提取导入语句和函数定义
try:
tree = ast.parse(_code)
@ -230,19 +229,19 @@ exec({dedent(code)!a})
}
maxkb_logger.debug(f"Sandbox execute code: {_code}")
compressed_and_base64_encoded_code_str = base64.b64encode(gzip.compress(_code.encode())).decode()
subprocess_result = subprocess.run(
['su', '-s', python_directory, '-c',
f'import base64,gzip; exec(gzip.decompress(base64.b64decode(\'{compressed_and_base64_encoded_code_str}\')).decode())',
self.user],
text=True,
capture_output=True, **kwargs)
try:
subprocess_result = subprocess.run(
['su', '-s', python_directory, '-c',
f'import base64,gzip; exec(gzip.decompress(base64.b64decode(\'{compressed_and_base64_encoded_code_str}\')).decode())',
self.user],
text=True,
capture_output=True,
timeout=self.process_timeout_seconds,
**kwargs)
except subprocess.TimeoutExpired:
raise Exception(_("Sandbox process execution timeout, consider increasing MAXKB_SANDBOX_PYTHON_PROCESS_TIMEOUT_SECONDS."))
return subprocess_result
def validate_banned_keywords(self, code_str):
matched = next((bad for bad in self.banned_keywords if bad in code_str), None)
if matched:
raise Exception(f"keyword '{matched}' is banned in the tool.")
def validate_mcp_transport(self, code_str):
servers = json.loads(code_str)
for server, config in servers.items():

View File

@ -9,6 +9,7 @@ from common.forms import BaseForm, PasswordInputField, SingleSelect, SliderField
from models_provider.base_model_provider import BaseModelCredential, ValidCode
from common.utils.logger import maxkb_logger
class QwenModelParams(BaseForm):
"""
Parameters class for the Qwen Text-to-Image model.
@ -26,7 +27,8 @@ class QwenModelParams(BaseForm):
{'value': '1280*720', 'label': '1280*720'},
],
text_field='label',
value_field='value'
value_field='value',
attrs={'allow-create': True, 'filterable': True}
)
n = SliderField(

View File

@ -2,7 +2,7 @@
from http import HTTPStatus
from typing import Dict
from dashscope import ImageSynthesis
from dashscope import ImageSynthesis, MultiModalConversation
from django.utils.translation import gettext
from langchain_community.chat_models import ChatTongyi
from langchain_core.messages import HumanMessage
@ -46,17 +46,48 @@ class QwenTextToImageModel(MaxKBBaseModel, BaseTextToImage):
chat.invoke([HumanMessage([{"type": "text", "text": gettext('Hello')}])])
def generate_image(self, prompt: str, negative_prompt: str = None):
rsp = ImageSynthesis.call(api_key=self.api_key,
model=self.model_name,
base_url='https://dashscope.aliyuncs.com/compatible-mode/v1',
prompt=prompt,
negative_prompt=negative_prompt,
**self.params)
file_urls = []
if rsp.status_code == HTTPStatus.OK:
for result in rsp.output.results:
file_urls.append(result.url)
else:
maxkb_logger.error('sync_call Failed, status_code: %s, code: %s, message: %s' %
(rsp.status_code, rsp.code, rsp.message))
return file_urls
if self.model_name.startswith("wan"):
rsp = ImageSynthesis.call(api_key=self.api_key,
model=self.model_name,
base_url='https://dashscope.aliyuncs.com/compatible-mode/v1',
prompt=prompt,
negative_prompt=negative_prompt,
**self.params)
file_urls = []
if rsp.status_code == HTTPStatus.OK:
for result in rsp.output.results:
file_urls.append(result.url)
else:
maxkb_logger.error('sync_call Failed, status_code: %s, code: %s, message: %s' %
(rsp.status_code, rsp.code, rsp.message))
return file_urls
elif self.model_name.startswith("qwen"):
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
}
]
}
]
rsp = MultiModalConversation.call(
api_key=self.api_key,
model=self.model_name,
messages=messages,
result_format='message',
base_url='https://dashscope.aliyuncs.com/v1',
stream=False,
negative_prompt=negative_prompt,
**self.params
)
file_urls = []
if rsp.status_code == HTTPStatus.OK:
for result in rsp.output.choices:
file_urls.append(result.message.content[0].get('image'))
else:
maxkb_logger.error('sync_call Failed, status_code: %s, code: %s, message: %s' %
(rsp.status_code, rsp.code, rsp.message))
return file_urls

View File

@ -354,7 +354,6 @@ class ToolSerializer(serializers.Serializer):
self.is_valid(raise_exception=True)
ToolCreateRequest(data=instance).is_valid(raise_exception=True)
# 校验代码是否包括禁止的关键字
ToolExecutor().validate_banned_keywords(instance.get('code', ''))
if instance.get('tool_type') == ToolType.MCP:
ToolExecutor().validate_mcp_transport(instance.get('code', ''))
@ -391,7 +390,6 @@ class ToolSerializer(serializers.Serializer):
def test_connection(self):
self.is_valid(raise_exception=True)
# 校验代码是否包括禁止的关键字
ToolExecutor().validate_banned_keywords(self.data.get('code', ''))
ToolExecutor().validate_mcp_transport(self.data.get('code', ''))
# 校验mcp json
@ -486,7 +484,6 @@ class ToolSerializer(serializers.Serializer):
self.is_valid(raise_exception=True)
ToolEditRequest(data=instance).is_valid(raise_exception=True)
# 校验代码是否包括禁止的关键字
ToolExecutor().validate_banned_keywords(instance.get('code', ''))
if instance.get('tool_type') == ToolType.MCP:
ToolExecutor().validate_mcp_transport(instance.get('code', ''))

View File

@ -48,7 +48,6 @@ ENV PATH=/opt/py3/bin:$PATH \
MAXKB_SANDBOX=1 \
MAXKB_SANDBOX_HOME=/opt/maxkb-app/sandbox \
MAXKB_SANDBOX_PYTHON_PACKAGE_PATHS="/opt/py3/lib/python3.11/site-packages,/opt/maxkb-app/sandbox/python-packages,/opt/maxkb/python-packages" \
MAXKB_SANDBOX_PYTHON_BANNED_KEYWORDS="subprocess.,system(,exec(,execve(,pty.,eval(,compile(,shutil.,input(,__import__" \
MAXKB_SANDBOX_PYTHON_BANNED_HOSTS="127.0.0.1,localhost,host.docker.internal,maxkb,pgsql,redis" \
MAXKB_ADMIN_PATH=/admin

View File

@ -11,68 +11,91 @@
#include <errno.h>
#include <limits.h>
#include <libgen.h>
#include <pwd.h>
#include <stdarg.h>
#include <spawn.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <time.h>
#include <execinfo.h>
#include <dlfcn.h>
static const char *BANNED_FILE_NAME = ".SANDBOX_BANNED_HOSTS";
#define CONFIG_FILE ".sandbox.conf"
/**
* .so .SANDBOX_BANNED_HOSTS
* malloc free
*/
static char *load_banned_hosts() {
Dl_info info;
if (dladdr((void *)load_banned_hosts, &info) == 0 || !info.dli_fname) {
fprintf(stderr, "[sandbox] ⚠️ Unable to locate shared object path — allowing all hosts\n");
return strdup("");
static char *banned_hosts = NULL;
static int allow_subprocess = 0; // 默认禁止
static void load_sandbox_config() {
Dl_info info;
if (dladdr((void *)load_sandbox_config, &info) == 0 || !info.dli_fname) {
banned_hosts = strdup("");
allow_subprocess = 0;
return;
}
char so_path[PATH_MAX];
strncpy(so_path, info.dli_fname, sizeof(so_path));
so_path[sizeof(so_path) - 1] = '\0';
char *dir = dirname(so_path);
char config_path[PATH_MAX];
snprintf(config_path, sizeof(config_path), "%s/%s", dir, CONFIG_FILE);
FILE *fp = fopen(config_path, "r");
if (!fp) {
banned_hosts = strdup("");
allow_subprocess = 0;
return;
}
char line[512];
banned_hosts = strdup("");
allow_subprocess = 0;
while (fgets(line, sizeof(line), fp)) {
char *key = strtok(line, "=");
char *value = strtok(NULL, "\n");
if (!key || !value) continue;
while (*key == ' ' || *key == '\t') key++;
char *kend = key + strlen(key) - 1;
while (kend > key && (*kend == ' ' || *kend == '\t')) *kend-- = '\0';
while (*value == ' ' || *value == '\t') value++;
char *vend = value + strlen(value) - 1;
while (vend > value && (*vend == ' ' || *vend == '\t')) *vend-- = '\0';
if (strcmp(key, "SANDBOX_PYTHON_BANNED_HOSTS") == 0) {
free(banned_hosts);
banned_hosts = strdup(value);
} else if (strcmp(key, "SANDBOX_PYTHON_ALLOW_SUBPROCESS") == 0) {
allow_subprocess = atoi(value);
}
}
char so_path[PATH_MAX];
strncpy(so_path, info.dli_fname, sizeof(so_path));
so_path[sizeof(so_path) - 1] = '\0';
char *dir = dirname(so_path);
char file_path[PATH_MAX];
snprintf(file_path, sizeof(file_path), "%s/%s", dir, BANNED_FILE_NAME);
FILE *fp = fopen(file_path, "r");
if (!fp) {
fprintf(stderr, "[sandbox] ⚠️ Cannot open %s — allowing all hosts\n", file_path);
return strdup("");
}
char *buf = malloc(4096);
if (!buf) {
fclose(fp);
fprintf(stderr, "[sandbox] ⚠️ Memory allocation failed — allowing all hosts\n");
return strdup("");
}
size_t len = fread(buf, 1, 4095, fp);
buf[len] = '\0';
fclose(fp);
return buf;
}
static void ensure_config_loaded() {
if (!banned_hosts) load_sandbox_config();
}
static int is_sandbox_user() {
uid_t uid = getuid();
struct passwd *pw = getpwuid(uid);
if (!pw || !pw->pw_name) {
return 1; // 无法识别用户 → 认为是sandbox
}
if (strcmp(pw->pw_name, "sandbox") == 0) {
return 1;
}
return 0;
}
/**
*
*/
static int match_env_patterns(const char *target, const char *env_val) {
if (!target || !env_val || !*env_val) return 0;
char *patterns = strdup(env_val);
char *token = strtok(patterns, ",");
int matched = 0;
while (token) {
// 去掉前后空格
while (*token == ' ' || *token == '\t') token++;
char *end = token + strlen(token) - 1;
while (end > token && (*end == ' ' || *end == '\t')) *end-- = '\0';
if (*token) {
regex_t regex;
char fullpattern[512];
snprintf(fullpattern, sizeof(fullpattern), "^%s$", token);
if (regcomp(&regex, fullpattern, REG_EXTENDED | REG_NOSUB | REG_ICASE) == 0) {
if (regexec(&regex, target, 0, NULL, 0) == 0) {
matched = 1;
@ -80,13 +103,10 @@ static int match_env_patterns(const char *target, const char *env_val) {
break;
}
regfree(&regex);
} else {
fprintf(stderr, "[sandbox] ⚠️ Invalid regex '%s' — allowing host by default\n", token);
}
}
token = strtok(NULL, ",");
}
free(patterns);
return matched;
}
@ -96,22 +116,17 @@ int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
static int (*real_connect)(int, const struct sockaddr *, socklen_t) = NULL;
if (!real_connect)
real_connect = dlsym(RTLD_NEXT, "connect");
static char *banned_env = NULL;
if (!banned_env) banned_env = load_banned_hosts();
ensure_config_loaded();
char ip[INET6_ADDRSTRLEN] = {0};
if (addr->sa_family == AF_INET)
inet_ntop(AF_INET, &((struct sockaddr_in *)addr)->sin_addr, ip, sizeof(ip));
else if (addr->sa_family == AF_INET6)
inet_ntop(AF_INET6, &((struct sockaddr_in6 *)addr)->sin6_addr, ip, sizeof(ip));
if (banned_env && *banned_env && match_env_patterns(ip, banned_env)) {
if (is_sandbox_user() && banned_hosts && *banned_hosts && match_env_patterns(ip, banned_hosts)) {
fprintf(stderr, "[sandbox] 🚫 Access to host %s is banned\n", ip);
errno = EACCES; // EACCES 的值是 13, 意思是 Permission denied
return -1;
}
return real_connect(sockfd, addr, addrlen);
}
@ -122,23 +137,133 @@ int getaddrinfo(const char *node, const char *service,
const struct addrinfo *, struct addrinfo **) = NULL;
if (!real_getaddrinfo)
real_getaddrinfo = dlsym(RTLD_NEXT, "getaddrinfo");
static char *banned_env = NULL;
if (!banned_env) banned_env = load_banned_hosts();
if (banned_env && *banned_env && node) {
ensure_config_loaded();
if (banned_hosts && *banned_hosts && node) {
// 检测 node 是否是 IP
struct in_addr ipv4;
struct in6_addr ipv6;
int is_ip = (inet_pton(AF_INET, node, &ipv4) == 1) ||
(inet_pton(AF_INET6, node, &ipv6) == 1);
// 只对“非IP的域名”进行屏蔽
if (!is_ip && match_env_patterns(node, banned_env)) {
if (is_sandbox_user() && !is_ip && match_env_patterns(node, banned_hosts )) {
fprintf(stderr, "[sandbox] 🚫 Access to host %s is banned (DNS blocked)\n", node);
return EAI_FAIL; // 模拟 DNS 层禁止
}
}
return real_getaddrinfo(node, service, hints, res);
}
/* ------------------ 禁止创建子进程------------------ */
static int allow_create_subprocess() {
ensure_config_loaded();
return allow_subprocess || !is_sandbox_user();
}
static int deny() {
fprintf(stderr, "[sandbox] Permission denied to create subprocess in sandbox.\n");
_exit(1);
return -1;
}
#define RESOLVE_REAL(func) \
static typeof(func) *real_##func = NULL; \
if (!real_##func) { \
real_##func = dlsym(RTLD_NEXT, #func); \
}
int execve(const char *filename, char *const argv[], char *const envp[]) {
RESOLVE_REAL(execve);
if (!allow_create_subprocess()) return deny();
return real_execve(filename, argv, envp);
}
int execveat(int dirfd, const char *pathname,
char *const argv[], char *const envp[], int flags) {
RESOLVE_REAL(execveat);
if (!allow_create_subprocess()) return deny();
return real_execveat(dirfd, pathname, argv, envp, flags);
}
pid_t fork(void) {
RESOLVE_REAL(fork);
if (!allow_create_subprocess()) return deny();
return real_fork();
}
pid_t vfork(void) {
RESOLVE_REAL(vfork);
if (!allow_create_subprocess()) return deny();
return real_vfork();
}
int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ...) {
RESOLVE_REAL(clone);
if (!allow_create_subprocess()) return deny();
va_list ap;
va_start(ap, arg);
long a4 = va_arg(ap, long);
long a5 = va_arg(ap, long);
va_end(ap);
return real_clone(fn, child_stack, flags, arg, (void *)a4, (void *)a5);
}
int posix_spawn(pid_t *pid, const char *path,
const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *attrp,
char *const argv[], char *const envp[]) {
RESOLVE_REAL(posix_spawn);
if (!allow_create_subprocess()) return deny();
return real_posix_spawn(pid, path, file_actions, attrp, argv, envp);
}
int posix_spawnp(pid_t *pid, const char *file,
const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *attrp,
char *const argv[], char *const envp[]) {
RESOLVE_REAL(posix_spawnp);
if (!allow_create_subprocess()) return deny();
return real_posix_spawnp(pid, file, file_actions, attrp, argv, envp);
}
int __posix_spawn(pid_t *pid, const char *path,
const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *attrp,
char *const argv[], char *const envp[]) {
RESOLVE_REAL(__posix_spawn);
if (!allow_create_subprocess()) return deny();
return real___posix_spawn(pid, path, file_actions, attrp, argv, envp);
}
int __posix_spawnp(pid_t *pid, const char *file,
const posix_spawn_file_actions_t *file_actions,
const posix_spawnattr_t *attrp,
char *const argv[], char *const envp[]) {
RESOLVE_REAL(__posix_spawnp);
if (!allow_create_subprocess()) return deny();
return real___posix_spawnp(pid, file, file_actions, attrp, argv, envp);
}
int system(const char *command) {
RESOLVE_REAL(system);
if (!allow_create_subprocess()) return deny();
return real_system(command);
}
int __libc_system(const char *command) {
RESOLVE_REAL(__libc_system);
if (!allow_create_subprocess()) return deny();
return real___libc_system(command);
}
long (*real_syscall)(long, ...) = NULL;
long syscall(long number, ...) {
if (!real_syscall) real_syscall = dlsym(RTLD_NEXT, "syscall");
va_list ap;
va_start(ap, number);
long a1 = va_arg(ap, long);
long a2 = va_arg(ap, long);
long a3 = va_arg(ap, long);
long a4 = va_arg(ap, long);
long a5 = va_arg(ap, long);
long a6 = va_arg(ap, long);
va_end(ap);
if (number == SYS_execve || number == SYS_execveat ||
number == SYS_fork || number == SYS_vfork || number == SYS_clone) {
if (!allow_create_subprocess()) return deny();
}
return real_syscall(number, a1, a2, a3, a4, a5, a6);
}

View File

@ -20,16 +20,16 @@ dependencies = [
"cffi==1.17.1",
"beautifulsoup4==4.13.4",
"jieba==0.42.1",
"langchain-openai==0.3.31",
"langchain-anthropic==0.3.17",
"langchain-community==0.3.27",
"langchain-deepseek==0.1.3",
"langchain-google-genai==2.1.7",
"langchain-mcp-adapters==0.1.9",
"langchain-huggingface==0.3.0",
"langchain-ollama==0.3.4",
"langchain-openai==0.3.35",
"langchain-anthropic==0.3.22",
"langchain-community==0.3.31",
"langchain-deepseek==0.1.4",
"langchain-google-genai==2.1.12",
"langchain-mcp-adapters==0.1.13",
"langchain-huggingface==0.3.1",
"langchain-ollama==0.3.10",
"langchain_core==0.3.80",
"langgraph==0.5.3",
"langchain_core==0.3.74",
"torch==2.8.0",
"sentence-transformers==5.0.0",
"qianfan==0.4.12.3",
@ -38,7 +38,7 @@ dependencies = [
"boto3==1.39.4",
"tencentcloud-sdk-python==3.0.1420",
"xinference-client==1.7.1.post1",
"anthropic==0.57.1",
"anthropic==0.74.1",
"dashscope==1.23.8",
"celery[sqlalchemy]==5.5.3",
"django-celery-beat==2.8.1",