cgroup_util.c (11850B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2 3#define _GNU_SOURCE 4 5#include <errno.h> 6#include <fcntl.h> 7#include <linux/limits.h> 8#include <poll.h> 9#include <signal.h> 10#include <stdio.h> 11#include <stdlib.h> 12#include <string.h> 13#include <sys/inotify.h> 14#include <sys/stat.h> 15#include <sys/types.h> 16#include <sys/wait.h> 17#include <unistd.h> 18 19#include "cgroup_util.h" 20#include "../clone3/clone3_selftests.h" 21 22/* Returns read len on success, or -errno on failure. */ 23static ssize_t read_text(const char *path, char *buf, size_t max_len) 24{ 25 ssize_t len; 26 int fd; 27 28 fd = open(path, O_RDONLY); 29 if (fd < 0) 30 return -errno; 31 32 len = read(fd, buf, max_len - 1); 33 34 if (len >= 0) 35 buf[len] = 0; 36 37 close(fd); 38 return len < 0 ? -errno : len; 39} 40 41/* Returns written len on success, or -errno on failure. */ 42static ssize_t write_text(const char *path, char *buf, ssize_t len) 43{ 44 int fd; 45 46 fd = open(path, O_WRONLY | O_APPEND); 47 if (fd < 0) 48 return -errno; 49 50 len = write(fd, buf, len); 51 close(fd); 52 return len < 0 ? -errno : len; 53} 54 55char *cg_name(const char *root, const char *name) 56{ 57 size_t len = strlen(root) + strlen(name) + 2; 58 char *ret = malloc(len); 59 60 snprintf(ret, len, "%s/%s", root, name); 61 62 return ret; 63} 64 65char *cg_name_indexed(const char *root, const char *name, int index) 66{ 67 size_t len = strlen(root) + strlen(name) + 10; 68 char *ret = malloc(len); 69 70 snprintf(ret, len, "%s/%s_%d", root, name, index); 71 72 return ret; 73} 74 75char *cg_control(const char *cgroup, const char *control) 76{ 77 size_t len = strlen(cgroup) + strlen(control) + 2; 78 char *ret = malloc(len); 79 80 snprintf(ret, len, "%s/%s", cgroup, control); 81 82 return ret; 83} 84 85/* Returns 0 on success, or -errno on failure. */ 86int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 87{ 88 char path[PATH_MAX]; 89 ssize_t ret; 90 91 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 92 93 ret = read_text(path, buf, len); 94 return ret >= 0 ? 0 : ret; 95} 96 97int cg_read_strcmp(const char *cgroup, const char *control, 98 const char *expected) 99{ 100 size_t size; 101 char *buf; 102 int ret; 103 104 /* Handle the case of comparing against empty string */ 105 if (!expected) 106 return -1; 107 else 108 size = strlen(expected) + 1; 109 110 buf = malloc(size); 111 if (!buf) 112 return -1; 113 114 if (cg_read(cgroup, control, buf, size)) { 115 free(buf); 116 return -1; 117 } 118 119 ret = strcmp(expected, buf); 120 free(buf); 121 return ret; 122} 123 124int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 125{ 126 char buf[PAGE_SIZE]; 127 128 if (cg_read(cgroup, control, buf, sizeof(buf))) 129 return -1; 130 131 return strstr(buf, needle) ? 0 : -1; 132} 133 134long cg_read_long(const char *cgroup, const char *control) 135{ 136 char buf[128]; 137 138 if (cg_read(cgroup, control, buf, sizeof(buf))) 139 return -1; 140 141 return atol(buf); 142} 143 144long cg_read_key_long(const char *cgroup, const char *control, const char *key) 145{ 146 char buf[PAGE_SIZE]; 147 char *ptr; 148 149 if (cg_read(cgroup, control, buf, sizeof(buf))) 150 return -1; 151 152 ptr = strstr(buf, key); 153 if (!ptr) 154 return -1; 155 156 return atol(ptr + strlen(key)); 157} 158 159long cg_read_lc(const char *cgroup, const char *control) 160{ 161 char buf[PAGE_SIZE]; 162 const char delim[] = "\n"; 163 char *line; 164 long cnt = 0; 165 166 if (cg_read(cgroup, control, buf, sizeof(buf))) 167 return -1; 168 169 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 170 cnt++; 171 172 return cnt; 173} 174 175/* Returns 0 on success, or -errno on failure. */ 176int cg_write(const char *cgroup, const char *control, char *buf) 177{ 178 char path[PATH_MAX]; 179 ssize_t len = strlen(buf), ret; 180 181 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 182 ret = write_text(path, buf, len); 183 return ret == len ? 0 : ret; 184} 185 186int cg_write_numeric(const char *cgroup, const char *control, long value) 187{ 188 char buf[64]; 189 int ret; 190 191 ret = sprintf(buf, "%lu", value); 192 if (ret < 0) 193 return ret; 194 195 return cg_write(cgroup, control, buf); 196} 197 198int cg_find_unified_root(char *root, size_t len) 199{ 200 char buf[10 * PAGE_SIZE]; 201 char *fs, *mount, *type; 202 const char delim[] = "\n\t "; 203 204 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 205 return -1; 206 207 /* 208 * Example: 209 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 210 */ 211 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 212 mount = strtok(NULL, delim); 213 type = strtok(NULL, delim); 214 strtok(NULL, delim); 215 strtok(NULL, delim); 216 strtok(NULL, delim); 217 218 if (strcmp(type, "cgroup2") == 0) { 219 strncpy(root, mount, len); 220 return 0; 221 } 222 } 223 224 return -1; 225} 226 227int cg_create(const char *cgroup) 228{ 229 return mkdir(cgroup, 0755); 230} 231 232int cg_wait_for_proc_count(const char *cgroup, int count) 233{ 234 char buf[10 * PAGE_SIZE] = {0}; 235 int attempts; 236 char *ptr; 237 238 for (attempts = 10; attempts >= 0; attempts--) { 239 int nr = 0; 240 241 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 242 break; 243 244 for (ptr = buf; *ptr; ptr++) 245 if (*ptr == '\n') 246 nr++; 247 248 if (nr >= count) 249 return 0; 250 251 usleep(100000); 252 } 253 254 return -1; 255} 256 257int cg_killall(const char *cgroup) 258{ 259 char buf[PAGE_SIZE]; 260 char *ptr = buf; 261 262 /* If cgroup.kill exists use it. */ 263 if (!cg_write(cgroup, "cgroup.kill", "1")) 264 return 0; 265 266 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 267 return -1; 268 269 while (ptr < buf + sizeof(buf)) { 270 int pid = strtol(ptr, &ptr, 10); 271 272 if (pid == 0) 273 break; 274 if (*ptr) 275 ptr++; 276 else 277 break; 278 if (kill(pid, SIGKILL)) 279 return -1; 280 } 281 282 return 0; 283} 284 285int cg_destroy(const char *cgroup) 286{ 287 int ret; 288 289retry: 290 ret = rmdir(cgroup); 291 if (ret && errno == EBUSY) { 292 cg_killall(cgroup); 293 usleep(100); 294 goto retry; 295 } 296 297 if (ret && errno == ENOENT) 298 ret = 0; 299 300 return ret; 301} 302 303int cg_enter(const char *cgroup, int pid) 304{ 305 char pidbuf[64]; 306 307 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 308 return cg_write(cgroup, "cgroup.procs", pidbuf); 309} 310 311int cg_enter_current(const char *cgroup) 312{ 313 return cg_write(cgroup, "cgroup.procs", "0"); 314} 315 316int cg_enter_current_thread(const char *cgroup) 317{ 318 return cg_write(cgroup, "cgroup.threads", "0"); 319} 320 321int cg_run(const char *cgroup, 322 int (*fn)(const char *cgroup, void *arg), 323 void *arg) 324{ 325 int pid, retcode; 326 327 pid = fork(); 328 if (pid < 0) { 329 return pid; 330 } else if (pid == 0) { 331 char buf[64]; 332 333 snprintf(buf, sizeof(buf), "%d", getpid()); 334 if (cg_write(cgroup, "cgroup.procs", buf)) 335 exit(EXIT_FAILURE); 336 exit(fn(cgroup, arg)); 337 } else { 338 waitpid(pid, &retcode, 0); 339 if (WIFEXITED(retcode)) 340 return WEXITSTATUS(retcode); 341 else 342 return -1; 343 } 344} 345 346pid_t clone_into_cgroup(int cgroup_fd) 347{ 348#ifdef CLONE_ARGS_SIZE_VER2 349 pid_t pid; 350 351 struct __clone_args args = { 352 .flags = CLONE_INTO_CGROUP, 353 .exit_signal = SIGCHLD, 354 .cgroup = cgroup_fd, 355 }; 356 357 pid = sys_clone3(&args, sizeof(struct __clone_args)); 358 /* 359 * Verify that this is a genuine test failure: 360 * ENOSYS -> clone3() not available 361 * E2BIG -> CLONE_INTO_CGROUP not available 362 */ 363 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 364 goto pretend_enosys; 365 366 return pid; 367 368pretend_enosys: 369#endif 370 errno = ENOSYS; 371 return -ENOSYS; 372} 373 374int clone_reap(pid_t pid, int options) 375{ 376 int ret; 377 siginfo_t info = { 378 .si_signo = 0, 379 }; 380 381again: 382 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 383 if (ret < 0) { 384 if (errno == EINTR) 385 goto again; 386 return -1; 387 } 388 389 if (options & WEXITED) { 390 if (WIFEXITED(info.si_status)) 391 return WEXITSTATUS(info.si_status); 392 } 393 394 if (options & WSTOPPED) { 395 if (WIFSTOPPED(info.si_status)) 396 return WSTOPSIG(info.si_status); 397 } 398 399 if (options & WCONTINUED) { 400 if (WIFCONTINUED(info.si_status)) 401 return 0; 402 } 403 404 return -1; 405} 406 407int dirfd_open_opath(const char *dir) 408{ 409 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 410} 411 412#define close_prot_errno(fd) \ 413 if (fd >= 0) { \ 414 int _e_ = errno; \ 415 close(fd); \ 416 errno = _e_; \ 417 } 418 419static int clone_into_cgroup_run_nowait(const char *cgroup, 420 int (*fn)(const char *cgroup, void *arg), 421 void *arg) 422{ 423 int cgroup_fd; 424 pid_t pid; 425 426 cgroup_fd = dirfd_open_opath(cgroup); 427 if (cgroup_fd < 0) 428 return -1; 429 430 pid = clone_into_cgroup(cgroup_fd); 431 close_prot_errno(cgroup_fd); 432 if (pid == 0) 433 exit(fn(cgroup, arg)); 434 435 return pid; 436} 437 438int cg_run_nowait(const char *cgroup, 439 int (*fn)(const char *cgroup, void *arg), 440 void *arg) 441{ 442 int pid; 443 444 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 445 if (pid > 0) 446 return pid; 447 448 /* Genuine test failure. */ 449 if (pid < 0 && errno != ENOSYS) 450 return -1; 451 452 pid = fork(); 453 if (pid == 0) { 454 char buf[64]; 455 456 snprintf(buf, sizeof(buf), "%d", getpid()); 457 if (cg_write(cgroup, "cgroup.procs", buf)) 458 exit(EXIT_FAILURE); 459 exit(fn(cgroup, arg)); 460 } 461 462 return pid; 463} 464 465int get_temp_fd(void) 466{ 467 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 468} 469 470int alloc_pagecache(int fd, size_t size) 471{ 472 char buf[PAGE_SIZE]; 473 struct stat st; 474 int i; 475 476 if (fstat(fd, &st)) 477 goto cleanup; 478 479 size += st.st_size; 480 481 if (ftruncate(fd, size)) 482 goto cleanup; 483 484 for (i = 0; i < size; i += sizeof(buf)) 485 read(fd, buf, sizeof(buf)); 486 487 return 0; 488 489cleanup: 490 return -1; 491} 492 493int alloc_anon(const char *cgroup, void *arg) 494{ 495 size_t size = (unsigned long)arg; 496 char *buf, *ptr; 497 498 buf = malloc(size); 499 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 500 *ptr = 0; 501 502 free(buf); 503 return 0; 504} 505 506int is_swap_enabled(void) 507{ 508 char buf[PAGE_SIZE]; 509 const char delim[] = "\n"; 510 int cnt = 0; 511 char *line; 512 513 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 514 return -1; 515 516 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 517 cnt++; 518 519 return cnt > 1; 520} 521 522int set_oom_adj_score(int pid, int score) 523{ 524 char path[PATH_MAX]; 525 int fd, len; 526 527 sprintf(path, "/proc/%d/oom_score_adj", pid); 528 529 fd = open(path, O_WRONLY | O_APPEND); 530 if (fd < 0) 531 return fd; 532 533 len = dprintf(fd, "%d", score); 534 if (len < 0) { 535 close(fd); 536 return len; 537 } 538 539 close(fd); 540 return 0; 541} 542 543int proc_mount_contains(const char *option) 544{ 545 char buf[4 * PAGE_SIZE]; 546 ssize_t read; 547 548 read = read_text("/proc/mounts", buf, sizeof(buf)); 549 if (read < 0) 550 return read; 551 552 return strstr(buf, option) != NULL; 553} 554 555ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 556{ 557 char path[PATH_MAX]; 558 559 if (!pid) 560 snprintf(path, sizeof(path), "/proc/%s/%s", 561 thread ? "thread-self" : "self", item); 562 else 563 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 564 565 size = read_text(path, buf, size); 566 return size < 0 ? -1 : size; 567} 568 569int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 570{ 571 char buf[PAGE_SIZE]; 572 573 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 574 return -1; 575 576 return strstr(buf, needle) ? 0 : -1; 577} 578 579int clone_into_cgroup_run_wait(const char *cgroup) 580{ 581 int cgroup_fd; 582 pid_t pid; 583 584 cgroup_fd = dirfd_open_opath(cgroup); 585 if (cgroup_fd < 0) 586 return -1; 587 588 pid = clone_into_cgroup(cgroup_fd); 589 close_prot_errno(cgroup_fd); 590 if (pid < 0) 591 return -1; 592 593 if (pid == 0) 594 exit(EXIT_SUCCESS); 595 596 /* 597 * We don't care whether this fails. We only care whether the initial 598 * clone succeeded. 599 */ 600 (void)clone_reap(pid, WEXITED); 601 return 0; 602} 603 604static int __prepare_for_wait(const char *cgroup, const char *filename) 605{ 606 int fd, ret = -1; 607 608 fd = inotify_init1(0); 609 if (fd == -1) 610 return fd; 611 612 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); 613 if (ret == -1) { 614 close(fd); 615 fd = -1; 616 } 617 618 return fd; 619} 620 621int cg_prepare_for_wait(const char *cgroup) 622{ 623 return __prepare_for_wait(cgroup, "cgroup.events"); 624} 625 626int memcg_prepare_for_wait(const char *cgroup) 627{ 628 return __prepare_for_wait(cgroup, "memory.events"); 629} 630 631int cg_wait_for(int fd) 632{ 633 int ret = -1; 634 struct pollfd fds = { 635 .fd = fd, 636 .events = POLLIN, 637 }; 638 639 while (true) { 640 ret = poll(&fds, 1, 10000); 641 642 if (ret == -1) { 643 if (errno == EINTR) 644 continue; 645 646 break; 647 } 648 649 if (ret > 0 && fds.revents & POLLIN) { 650 ret = 0; 651 break; 652 } 653 } 654 655 return ret; 656}