qemu-seccomp.c (11239B)
1/* 2 * QEMU seccomp mode 2 support with libseccomp 3 * 4 * Copyright IBM, Corp. 2012 5 * 6 * Authors: 7 * Eduardo Otubo <eotubo@br.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16#include "qemu/osdep.h" 17#include "qapi/error.h" 18#include "qemu/config-file.h" 19#include "qemu/option.h" 20#include "qemu/module.h" 21#include <sys/prctl.h> 22#include <seccomp.h> 23#include "sysemu/seccomp.h" 24#include <linux/seccomp.h> 25 26/* For some architectures (notably ARM) cacheflush is not supported until 27 * libseccomp 2.2.3, but configure enforces that we are using a more recent 28 * version on those hosts, so it is OK for this check to be less strict. 29 */ 30#if SCMP_VER_MAJOR >= 3 31 #define HAVE_CACHEFLUSH 32#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 2 33 #define HAVE_CACHEFLUSH 34#endif 35 36struct QemuSeccompSyscall { 37 int32_t num; 38 uint8_t set; 39 uint8_t narg; 40 const struct scmp_arg_cmp *arg_cmp; 41}; 42 43const struct scmp_arg_cmp sched_setscheduler_arg[] = { 44 /* was SCMP_A1(SCMP_CMP_NE, SCHED_IDLE), but expanded due to GCC 4.x bug */ 45 { .arg = 1, .op = SCMP_CMP_NE, .datum_a = SCHED_IDLE } 46}; 47 48static const struct QemuSeccompSyscall denylist[] = { 49 /* default set of syscalls that should get blocked */ 50 { SCMP_SYS(reboot), QEMU_SECCOMP_SET_DEFAULT }, 51 { SCMP_SYS(swapon), QEMU_SECCOMP_SET_DEFAULT }, 52 { SCMP_SYS(swapoff), QEMU_SECCOMP_SET_DEFAULT }, 53 { SCMP_SYS(syslog), QEMU_SECCOMP_SET_DEFAULT }, 54 { SCMP_SYS(mount), QEMU_SECCOMP_SET_DEFAULT }, 55 { SCMP_SYS(umount), QEMU_SECCOMP_SET_DEFAULT }, 56 { SCMP_SYS(kexec_load), QEMU_SECCOMP_SET_DEFAULT }, 57 { SCMP_SYS(afs_syscall), QEMU_SECCOMP_SET_DEFAULT }, 58 { SCMP_SYS(break), QEMU_SECCOMP_SET_DEFAULT }, 59 { SCMP_SYS(ftime), QEMU_SECCOMP_SET_DEFAULT }, 60 { SCMP_SYS(getpmsg), QEMU_SECCOMP_SET_DEFAULT }, 61 { SCMP_SYS(gtty), QEMU_SECCOMP_SET_DEFAULT }, 62 { SCMP_SYS(lock), QEMU_SECCOMP_SET_DEFAULT }, 63 { SCMP_SYS(mpx), QEMU_SECCOMP_SET_DEFAULT }, 64 { SCMP_SYS(prof), QEMU_SECCOMP_SET_DEFAULT }, 65 { SCMP_SYS(profil), QEMU_SECCOMP_SET_DEFAULT }, 66 { SCMP_SYS(putpmsg), QEMU_SECCOMP_SET_DEFAULT }, 67 { SCMP_SYS(security), QEMU_SECCOMP_SET_DEFAULT }, 68 { SCMP_SYS(stty), QEMU_SECCOMP_SET_DEFAULT }, 69 { SCMP_SYS(tuxcall), QEMU_SECCOMP_SET_DEFAULT }, 70 { SCMP_SYS(ulimit), QEMU_SECCOMP_SET_DEFAULT }, 71 { SCMP_SYS(vserver), QEMU_SECCOMP_SET_DEFAULT }, 72 /* obsolete */ 73 { SCMP_SYS(readdir), QEMU_SECCOMP_SET_OBSOLETE }, 74 { SCMP_SYS(_sysctl), QEMU_SECCOMP_SET_OBSOLETE }, 75 { SCMP_SYS(bdflush), QEMU_SECCOMP_SET_OBSOLETE }, 76 { SCMP_SYS(create_module), QEMU_SECCOMP_SET_OBSOLETE }, 77 { SCMP_SYS(get_kernel_syms), QEMU_SECCOMP_SET_OBSOLETE }, 78 { SCMP_SYS(query_module), QEMU_SECCOMP_SET_OBSOLETE }, 79 { SCMP_SYS(sgetmask), QEMU_SECCOMP_SET_OBSOLETE }, 80 { SCMP_SYS(ssetmask), QEMU_SECCOMP_SET_OBSOLETE }, 81 { SCMP_SYS(sysfs), QEMU_SECCOMP_SET_OBSOLETE }, 82 { SCMP_SYS(uselib), QEMU_SECCOMP_SET_OBSOLETE }, 83 { SCMP_SYS(ustat), QEMU_SECCOMP_SET_OBSOLETE }, 84 /* privileged */ 85 { SCMP_SYS(setuid), QEMU_SECCOMP_SET_PRIVILEGED }, 86 { SCMP_SYS(setgid), QEMU_SECCOMP_SET_PRIVILEGED }, 87 { SCMP_SYS(setpgid), QEMU_SECCOMP_SET_PRIVILEGED }, 88 { SCMP_SYS(setsid), QEMU_SECCOMP_SET_PRIVILEGED }, 89 { SCMP_SYS(setreuid), QEMU_SECCOMP_SET_PRIVILEGED }, 90 { SCMP_SYS(setregid), QEMU_SECCOMP_SET_PRIVILEGED }, 91 { SCMP_SYS(setresuid), QEMU_SECCOMP_SET_PRIVILEGED }, 92 { SCMP_SYS(setresgid), QEMU_SECCOMP_SET_PRIVILEGED }, 93 { SCMP_SYS(setfsuid), QEMU_SECCOMP_SET_PRIVILEGED }, 94 { SCMP_SYS(setfsgid), QEMU_SECCOMP_SET_PRIVILEGED }, 95 /* spawn */ 96 { SCMP_SYS(fork), QEMU_SECCOMP_SET_SPAWN }, 97 { SCMP_SYS(vfork), QEMU_SECCOMP_SET_SPAWN }, 98 { SCMP_SYS(execve), QEMU_SECCOMP_SET_SPAWN }, 99 /* resource control */ 100 { SCMP_SYS(setpriority), QEMU_SECCOMP_SET_RESOURCECTL }, 101 { SCMP_SYS(sched_setparam), QEMU_SECCOMP_SET_RESOURCECTL }, 102 { SCMP_SYS(sched_setscheduler), QEMU_SECCOMP_SET_RESOURCECTL, 103 ARRAY_SIZE(sched_setscheduler_arg), sched_setscheduler_arg }, 104 { SCMP_SYS(sched_setaffinity), QEMU_SECCOMP_SET_RESOURCECTL }, 105}; 106 107static inline __attribute__((unused)) int 108qemu_seccomp(unsigned int operation, unsigned int flags, void *args) 109{ 110#ifdef __NR_seccomp 111 return syscall(__NR_seccomp, operation, flags, args); 112#else 113 errno = ENOSYS; 114 return -1; 115#endif 116} 117 118static uint32_t qemu_seccomp_get_action(int set) 119{ 120 switch (set) { 121 case QEMU_SECCOMP_SET_DEFAULT: 122 case QEMU_SECCOMP_SET_OBSOLETE: 123 case QEMU_SECCOMP_SET_PRIVILEGED: 124 case QEMU_SECCOMP_SET_SPAWN: { 125#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \ 126 defined(SECCOMP_RET_KILL_PROCESS) 127 static int kill_process = -1; 128 if (kill_process == -1) { 129 uint32_t action = SECCOMP_RET_KILL_PROCESS; 130 131 if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { 132 kill_process = 1; 133 } else { 134 kill_process = 0; 135 } 136 } 137 if (kill_process == 1) { 138 return SCMP_ACT_KILL_PROCESS; 139 } 140#endif 141 return SCMP_ACT_TRAP; 142 } 143 144 case QEMU_SECCOMP_SET_RESOURCECTL: 145 return SCMP_ACT_ERRNO(EPERM); 146 147 default: 148 g_assert_not_reached(); 149 } 150} 151 152 153static int seccomp_start(uint32_t seccomp_opts, Error **errp) 154{ 155 int rc = -1; 156 unsigned int i = 0; 157 scmp_filter_ctx ctx; 158 159 ctx = seccomp_init(SCMP_ACT_ALLOW); 160 if (ctx == NULL) { 161 error_setg(errp, "failed to initialize seccomp context"); 162 goto seccomp_return; 163 } 164 165 rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); 166 if (rc != 0) { 167 error_setg_errno(errp, -rc, 168 "failed to set seccomp thread synchronization"); 169 goto seccomp_return; 170 } 171 172 for (i = 0; i < ARRAY_SIZE(denylist); i++) { 173 uint32_t action; 174 if (!(seccomp_opts & denylist[i].set)) { 175 continue; 176 } 177 178 action = qemu_seccomp_get_action(denylist[i].set); 179 rc = seccomp_rule_add_array(ctx, action, denylist[i].num, 180 denylist[i].narg, denylist[i].arg_cmp); 181 if (rc < 0) { 182 error_setg_errno(errp, -rc, 183 "failed to add seccomp denylist rules"); 184 goto seccomp_return; 185 } 186 } 187 188 rc = seccomp_load(ctx); 189 if (rc < 0) { 190 error_setg_errno(errp, -rc, 191 "failed to load seccomp syscall filter in kernel"); 192 } 193 194 seccomp_return: 195 seccomp_release(ctx); 196 return rc < 0 ? -1 : 0; 197} 198 199int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) 200{ 201 if (qemu_opt_get_bool(opts, "enable", false)) { 202 uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT 203 | QEMU_SECCOMP_SET_OBSOLETE; 204 const char *value = NULL; 205 206 value = qemu_opt_get(opts, "obsolete"); 207 if (value) { 208 if (g_str_equal(value, "allow")) { 209 seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE; 210 } else if (g_str_equal(value, "deny")) { 211 /* this is the default option, this if is here 212 * to provide a little bit of consistency for 213 * the command line */ 214 } else { 215 error_setg(errp, "invalid argument for obsolete"); 216 return -1; 217 } 218 } 219 220 value = qemu_opt_get(opts, "elevateprivileges"); 221 if (value) { 222 if (g_str_equal(value, "deny")) { 223 seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; 224 } else if (g_str_equal(value, "children")) { 225 seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; 226 227 /* calling prctl directly because we're 228 * not sure if host has CAP_SYS_ADMIN set*/ 229 if (prctl(PR_SET_NO_NEW_PRIVS, 1)) { 230 error_setg(errp, "failed to set no_new_privs aborting"); 231 return -1; 232 } 233 } else if (g_str_equal(value, "allow")) { 234 /* default value */ 235 } else { 236 error_setg(errp, "invalid argument for elevateprivileges"); 237 return -1; 238 } 239 } 240 241 value = qemu_opt_get(opts, "spawn"); 242 if (value) { 243 if (g_str_equal(value, "deny")) { 244 seccomp_opts |= QEMU_SECCOMP_SET_SPAWN; 245 } else if (g_str_equal(value, "allow")) { 246 /* default value */ 247 } else { 248 error_setg(errp, "invalid argument for spawn"); 249 return -1; 250 } 251 } 252 253 value = qemu_opt_get(opts, "resourcecontrol"); 254 if (value) { 255 if (g_str_equal(value, "deny")) { 256 seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL; 257 } else if (g_str_equal(value, "allow")) { 258 /* default value */ 259 } else { 260 error_setg(errp, "invalid argument for resourcecontrol"); 261 return -1; 262 } 263 } 264 265 if (seccomp_start(seccomp_opts, errp) < 0) { 266 return -1; 267 } 268 } 269 270 return 0; 271} 272 273static QemuOptsList qemu_sandbox_opts = { 274 .name = "sandbox", 275 .implied_opt_name = "enable", 276 .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head), 277 .desc = { 278 { 279 .name = "enable", 280 .type = QEMU_OPT_BOOL, 281 }, 282 { 283 .name = "obsolete", 284 .type = QEMU_OPT_STRING, 285 }, 286 { 287 .name = "elevateprivileges", 288 .type = QEMU_OPT_STRING, 289 }, 290 { 291 .name = "spawn", 292 .type = QEMU_OPT_STRING, 293 }, 294 { 295 .name = "resourcecontrol", 296 .type = QEMU_OPT_STRING, 297 }, 298 { /* end of list */ } 299 }, 300}; 301 302static void seccomp_register(void) 303{ 304 bool add = false; 305 306 /* FIXME: use seccomp_api_get() >= 2 check when released */ 307 308#if defined(SECCOMP_FILTER_FLAG_TSYNC) 309 int check; 310 311 /* check host TSYNC capability, it returns errno == ENOSYS if unavailable */ 312 check = qemu_seccomp(SECCOMP_SET_MODE_FILTER, 313 SECCOMP_FILTER_FLAG_TSYNC, NULL); 314 if (check < 0 && errno == EFAULT) { 315 add = true; 316 } 317#endif 318 319 if (add) { 320 qemu_add_opts(&qemu_sandbox_opts); 321 } 322} 323opts_init(seccomp_register);