builtin-sched.c (92170B)
1// SPDX-License-Identifier: GPL-2.0 2#include "builtin.h" 3#include "perf.h" 4#include "perf-sys.h" 5 6#include "util/cpumap.h" 7#include "util/evlist.h" 8#include "util/evsel.h" 9#include "util/evsel_fprintf.h" 10#include "util/symbol.h" 11#include "util/thread.h" 12#include "util/header.h" 13#include "util/session.h" 14#include "util/tool.h" 15#include "util/cloexec.h" 16#include "util/thread_map.h" 17#include "util/color.h" 18#include "util/stat.h" 19#include "util/string2.h" 20#include "util/callchain.h" 21#include "util/time-utils.h" 22 23#include <subcmd/pager.h> 24#include <subcmd/parse-options.h> 25#include "util/trace-event.h" 26 27#include "util/debug.h" 28#include "util/event.h" 29 30#include <linux/kernel.h> 31#include <linux/log2.h> 32#include <linux/zalloc.h> 33#include <sys/prctl.h> 34#include <sys/resource.h> 35#include <inttypes.h> 36 37#include <errno.h> 38#include <semaphore.h> 39#include <pthread.h> 40#include <math.h> 41#include <api/fs/fs.h> 42#include <perf/cpumap.h> 43#include <linux/time64.h> 44#include <linux/err.h> 45 46#include <linux/ctype.h> 47 48#define PR_SET_NAME 15 /* Set process name */ 49#define MAX_CPUS 4096 50#define COMM_LEN 20 51#define SYM_LEN 129 52#define MAX_PID 1024000 53 54static const char *cpu_list; 55static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 56 57struct sched_atom; 58 59struct task_desc { 60 unsigned long nr; 61 unsigned long pid; 62 char comm[COMM_LEN]; 63 64 unsigned long nr_events; 65 unsigned long curr_event; 66 struct sched_atom **atoms; 67 68 pthread_t thread; 69 sem_t sleep_sem; 70 71 sem_t ready_for_work; 72 sem_t work_done_sem; 73 74 u64 cpu_usage; 75}; 76 77enum sched_event_type { 78 SCHED_EVENT_RUN, 79 SCHED_EVENT_SLEEP, 80 SCHED_EVENT_WAKEUP, 81 SCHED_EVENT_MIGRATION, 82}; 83 84struct sched_atom { 85 enum sched_event_type type; 86 int specific_wait; 87 u64 timestamp; 88 u64 duration; 89 unsigned long nr; 90 sem_t *wait_sem; 91 struct task_desc *wakee; 92}; 93 94#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" 95 96/* task state bitmask, copied from include/linux/sched.h */ 97#define TASK_RUNNING 0 98#define TASK_INTERRUPTIBLE 1 99#define TASK_UNINTERRUPTIBLE 2 100#define __TASK_STOPPED 4 101#define __TASK_TRACED 8 102/* in tsk->exit_state */ 103#define EXIT_DEAD 16 104#define EXIT_ZOMBIE 32 105#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) 106/* in tsk->state again */ 107#define TASK_DEAD 64 108#define TASK_WAKEKILL 128 109#define TASK_WAKING 256 110#define TASK_PARKED 512 111 112enum thread_state { 113 THREAD_SLEEPING = 0, 114 THREAD_WAIT_CPU, 115 THREAD_SCHED_IN, 116 THREAD_IGNORE 117}; 118 119struct work_atom { 120 struct list_head list; 121 enum thread_state state; 122 u64 sched_out_time; 123 u64 wake_up_time; 124 u64 sched_in_time; 125 u64 runtime; 126}; 127 128struct work_atoms { 129 struct list_head work_list; 130 struct thread *thread; 131 struct rb_node node; 132 u64 max_lat; 133 u64 max_lat_start; 134 u64 max_lat_end; 135 u64 total_lat; 136 u64 nb_atoms; 137 u64 total_runtime; 138 int num_merged; 139}; 140 141typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 142 143struct perf_sched; 144 145struct trace_sched_handler { 146 int (*switch_event)(struct perf_sched *sched, struct evsel *evsel, 147 struct perf_sample *sample, struct machine *machine); 148 149 int (*runtime_event)(struct perf_sched *sched, struct evsel *evsel, 150 struct perf_sample *sample, struct machine *machine); 151 152 int (*wakeup_event)(struct perf_sched *sched, struct evsel *evsel, 153 struct perf_sample *sample, struct machine *machine); 154 155 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */ 156 int (*fork_event)(struct perf_sched *sched, union perf_event *event, 157 struct machine *machine); 158 159 int (*migrate_task_event)(struct perf_sched *sched, 160 struct evsel *evsel, 161 struct perf_sample *sample, 162 struct machine *machine); 163}; 164 165#define COLOR_PIDS PERF_COLOR_BLUE 166#define COLOR_CPUS PERF_COLOR_BG_RED 167 168struct perf_sched_map { 169 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); 170 struct perf_cpu *comp_cpus; 171 bool comp; 172 struct perf_thread_map *color_pids; 173 const char *color_pids_str; 174 struct perf_cpu_map *color_cpus; 175 const char *color_cpus_str; 176 struct perf_cpu_map *cpus; 177 const char *cpus_str; 178}; 179 180struct perf_sched { 181 struct perf_tool tool; 182 const char *sort_order; 183 unsigned long nr_tasks; 184 struct task_desc **pid_to_task; 185 struct task_desc **tasks; 186 const struct trace_sched_handler *tp_handler; 187 pthread_mutex_t start_work_mutex; 188 pthread_mutex_t work_done_wait_mutex; 189 int profile_cpu; 190/* 191 * Track the current task - that way we can know whether there's any 192 * weird events, such as a task being switched away that is not current. 193 */ 194 struct perf_cpu max_cpu; 195 u32 curr_pid[MAX_CPUS]; 196 struct thread *curr_thread[MAX_CPUS]; 197 char next_shortname1; 198 char next_shortname2; 199 unsigned int replay_repeat; 200 unsigned long nr_run_events; 201 unsigned long nr_sleep_events; 202 unsigned long nr_wakeup_events; 203 unsigned long nr_sleep_corrections; 204 unsigned long nr_run_events_optimized; 205 unsigned long targetless_wakeups; 206 unsigned long multitarget_wakeups; 207 unsigned long nr_runs; 208 unsigned long nr_timestamps; 209 unsigned long nr_unordered_timestamps; 210 unsigned long nr_context_switch_bugs; 211 unsigned long nr_events; 212 unsigned long nr_lost_chunks; 213 unsigned long nr_lost_events; 214 u64 run_measurement_overhead; 215 u64 sleep_measurement_overhead; 216 u64 start_time; 217 u64 cpu_usage; 218 u64 runavg_cpu_usage; 219 u64 parent_cpu_usage; 220 u64 runavg_parent_cpu_usage; 221 u64 sum_runtime; 222 u64 sum_fluct; 223 u64 run_avg; 224 u64 all_runtime; 225 u64 all_count; 226 u64 cpu_last_switched[MAX_CPUS]; 227 struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root; 228 struct list_head sort_list, cmp_pid; 229 bool force; 230 bool skip_merge; 231 struct perf_sched_map map; 232 233 /* options for timehist command */ 234 bool summary; 235 bool summary_only; 236 bool idle_hist; 237 bool show_callchain; 238 unsigned int max_stack; 239 bool show_cpu_visual; 240 bool show_wakeups; 241 bool show_next; 242 bool show_migrations; 243 bool show_state; 244 u64 skipped_samples; 245 const char *time_str; 246 struct perf_time_interval ptime; 247 struct perf_time_interval hist_time; 248}; 249 250/* per thread run time data */ 251struct thread_runtime { 252 u64 last_time; /* time of previous sched in/out event */ 253 u64 dt_run; /* run time */ 254 u64 dt_sleep; /* time between CPU access by sleep (off cpu) */ 255 u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ 256 u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ 257 u64 dt_delay; /* time between wakeup and sched-in */ 258 u64 ready_to_run; /* time of wakeup */ 259 260 struct stats run_stats; 261 u64 total_run_time; 262 u64 total_sleep_time; 263 u64 total_iowait_time; 264 u64 total_preempt_time; 265 u64 total_delay_time; 266 267 int last_state; 268 269 char shortname[3]; 270 bool comm_changed; 271 272 u64 migrations; 273}; 274 275/* per event run time data */ 276struct evsel_runtime { 277 u64 *last_time; /* time this event was last seen per cpu */ 278 u32 ncpu; /* highest cpu slot allocated */ 279}; 280 281/* per cpu idle time data */ 282struct idle_thread_runtime { 283 struct thread_runtime tr; 284 struct thread *last_thread; 285 struct rb_root_cached sorted_root; 286 struct callchain_root callchain; 287 struct callchain_cursor cursor; 288}; 289 290/* track idle times per cpu */ 291static struct thread **idle_threads; 292static int idle_max_cpu; 293static char idle_comm[] = "<idle>"; 294 295static u64 get_nsecs(void) 296{ 297 struct timespec ts; 298 299 clock_gettime(CLOCK_MONOTONIC, &ts); 300 301 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 302} 303 304static void burn_nsecs(struct perf_sched *sched, u64 nsecs) 305{ 306 u64 T0 = get_nsecs(), T1; 307 308 do { 309 T1 = get_nsecs(); 310 } while (T1 + sched->run_measurement_overhead < T0 + nsecs); 311} 312 313static void sleep_nsecs(u64 nsecs) 314{ 315 struct timespec ts; 316 317 ts.tv_nsec = nsecs % 999999999; 318 ts.tv_sec = nsecs / 999999999; 319 320 nanosleep(&ts, NULL); 321} 322 323static void calibrate_run_measurement_overhead(struct perf_sched *sched) 324{ 325 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 326 int i; 327 328 for (i = 0; i < 10; i++) { 329 T0 = get_nsecs(); 330 burn_nsecs(sched, 0); 331 T1 = get_nsecs(); 332 delta = T1-T0; 333 min_delta = min(min_delta, delta); 334 } 335 sched->run_measurement_overhead = min_delta; 336 337 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 338} 339 340static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) 341{ 342 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 343 int i; 344 345 for (i = 0; i < 10; i++) { 346 T0 = get_nsecs(); 347 sleep_nsecs(10000); 348 T1 = get_nsecs(); 349 delta = T1-T0; 350 min_delta = min(min_delta, delta); 351 } 352 min_delta -= 10000; 353 sched->sleep_measurement_overhead = min_delta; 354 355 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 356} 357 358static struct sched_atom * 359get_new_event(struct task_desc *task, u64 timestamp) 360{ 361 struct sched_atom *event = zalloc(sizeof(*event)); 362 unsigned long idx = task->nr_events; 363 size_t size; 364 365 event->timestamp = timestamp; 366 event->nr = idx; 367 368 task->nr_events++; 369 size = sizeof(struct sched_atom *) * task->nr_events; 370 task->atoms = realloc(task->atoms, size); 371 BUG_ON(!task->atoms); 372 373 task->atoms[idx] = event; 374 375 return event; 376} 377 378static struct sched_atom *last_event(struct task_desc *task) 379{ 380 if (!task->nr_events) 381 return NULL; 382 383 return task->atoms[task->nr_events - 1]; 384} 385 386static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, 387 u64 timestamp, u64 duration) 388{ 389 struct sched_atom *event, *curr_event = last_event(task); 390 391 /* 392 * optimize an existing RUN event by merging this one 393 * to it: 394 */ 395 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 396 sched->nr_run_events_optimized++; 397 curr_event->duration += duration; 398 return; 399 } 400 401 event = get_new_event(task, timestamp); 402 403 event->type = SCHED_EVENT_RUN; 404 event->duration = duration; 405 406 sched->nr_run_events++; 407} 408 409static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, 410 u64 timestamp, struct task_desc *wakee) 411{ 412 struct sched_atom *event, *wakee_event; 413 414 event = get_new_event(task, timestamp); 415 event->type = SCHED_EVENT_WAKEUP; 416 event->wakee = wakee; 417 418 wakee_event = last_event(wakee); 419 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 420 sched->targetless_wakeups++; 421 return; 422 } 423 if (wakee_event->wait_sem) { 424 sched->multitarget_wakeups++; 425 return; 426 } 427 428 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 429 sem_init(wakee_event->wait_sem, 0, 0); 430 wakee_event->specific_wait = 1; 431 event->wait_sem = wakee_event->wait_sem; 432 433 sched->nr_wakeup_events++; 434} 435 436static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, 437 u64 timestamp, u64 task_state __maybe_unused) 438{ 439 struct sched_atom *event = get_new_event(task, timestamp); 440 441 event->type = SCHED_EVENT_SLEEP; 442 443 sched->nr_sleep_events++; 444} 445 446static struct task_desc *register_pid(struct perf_sched *sched, 447 unsigned long pid, const char *comm) 448{ 449 struct task_desc *task; 450 static int pid_max; 451 452 if (sched->pid_to_task == NULL) { 453 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) 454 pid_max = MAX_PID; 455 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); 456 } 457 if (pid >= (unsigned long)pid_max) { 458 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * 459 sizeof(struct task_desc *))) == NULL); 460 while (pid >= (unsigned long)pid_max) 461 sched->pid_to_task[pid_max++] = NULL; 462 } 463 464 task = sched->pid_to_task[pid]; 465 466 if (task) 467 return task; 468 469 task = zalloc(sizeof(*task)); 470 task->pid = pid; 471 task->nr = sched->nr_tasks; 472 strcpy(task->comm, comm); 473 /* 474 * every task starts in sleeping state - this gets ignored 475 * if there's no wakeup pointing to this sleep state: 476 */ 477 add_sched_event_sleep(sched, task, 0, 0); 478 479 sched->pid_to_task[pid] = task; 480 sched->nr_tasks++; 481 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); 482 BUG_ON(!sched->tasks); 483 sched->tasks[task->nr] = task; 484 485 if (verbose > 0) 486 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); 487 488 return task; 489} 490 491 492static void print_task_traces(struct perf_sched *sched) 493{ 494 struct task_desc *task; 495 unsigned long i; 496 497 for (i = 0; i < sched->nr_tasks; i++) { 498 task = sched->tasks[i]; 499 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 500 task->nr, task->comm, task->pid, task->nr_events); 501 } 502} 503 504static void add_cross_task_wakeups(struct perf_sched *sched) 505{ 506 struct task_desc *task1, *task2; 507 unsigned long i, j; 508 509 for (i = 0; i < sched->nr_tasks; i++) { 510 task1 = sched->tasks[i]; 511 j = i + 1; 512 if (j == sched->nr_tasks) 513 j = 0; 514 task2 = sched->tasks[j]; 515 add_sched_event_wakeup(sched, task1, 0, task2); 516 } 517} 518 519static void perf_sched__process_event(struct perf_sched *sched, 520 struct sched_atom *atom) 521{ 522 int ret = 0; 523 524 switch (atom->type) { 525 case SCHED_EVENT_RUN: 526 burn_nsecs(sched, atom->duration); 527 break; 528 case SCHED_EVENT_SLEEP: 529 if (atom->wait_sem) 530 ret = sem_wait(atom->wait_sem); 531 BUG_ON(ret); 532 break; 533 case SCHED_EVENT_WAKEUP: 534 if (atom->wait_sem) 535 ret = sem_post(atom->wait_sem); 536 BUG_ON(ret); 537 break; 538 case SCHED_EVENT_MIGRATION: 539 break; 540 default: 541 BUG_ON(1); 542 } 543} 544 545static u64 get_cpu_usage_nsec_parent(void) 546{ 547 struct rusage ru; 548 u64 sum; 549 int err; 550 551 err = getrusage(RUSAGE_SELF, &ru); 552 BUG_ON(err); 553 554 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC; 555 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC; 556 557 return sum; 558} 559 560static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) 561{ 562 struct perf_event_attr attr; 563 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; 564 int fd; 565 struct rlimit limit; 566 bool need_privilege = false; 567 568 memset(&attr, 0, sizeof(attr)); 569 570 attr.type = PERF_TYPE_SOFTWARE; 571 attr.config = PERF_COUNT_SW_TASK_CLOCK; 572 573force_again: 574 fd = sys_perf_event_open(&attr, 0, -1, -1, 575 perf_event_open_cloexec_flag()); 576 577 if (fd < 0) { 578 if (errno == EMFILE) { 579 if (sched->force) { 580 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); 581 limit.rlim_cur += sched->nr_tasks - cur_task; 582 if (limit.rlim_cur > limit.rlim_max) { 583 limit.rlim_max = limit.rlim_cur; 584 need_privilege = true; 585 } 586 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { 587 if (need_privilege && errno == EPERM) 588 strcpy(info, "Need privilege\n"); 589 } else 590 goto force_again; 591 } else 592 strcpy(info, "Have a try with -f option\n"); 593 } 594 pr_err("Error: sys_perf_event_open() syscall returned " 595 "with %d (%s)\n%s", fd, 596 str_error_r(errno, sbuf, sizeof(sbuf)), info); 597 exit(EXIT_FAILURE); 598 } 599 return fd; 600} 601 602static u64 get_cpu_usage_nsec_self(int fd) 603{ 604 u64 runtime; 605 int ret; 606 607 ret = read(fd, &runtime, sizeof(runtime)); 608 BUG_ON(ret != sizeof(runtime)); 609 610 return runtime; 611} 612 613struct sched_thread_parms { 614 struct task_desc *task; 615 struct perf_sched *sched; 616 int fd; 617}; 618 619static void *thread_func(void *ctx) 620{ 621 struct sched_thread_parms *parms = ctx; 622 struct task_desc *this_task = parms->task; 623 struct perf_sched *sched = parms->sched; 624 u64 cpu_usage_0, cpu_usage_1; 625 unsigned long i, ret; 626 char comm2[22]; 627 int fd = parms->fd; 628 629 zfree(&parms); 630 631 sprintf(comm2, ":%s", this_task->comm); 632 prctl(PR_SET_NAME, comm2); 633 if (fd < 0) 634 return NULL; 635again: 636 ret = sem_post(&this_task->ready_for_work); 637 BUG_ON(ret); 638 ret = pthread_mutex_lock(&sched->start_work_mutex); 639 BUG_ON(ret); 640 ret = pthread_mutex_unlock(&sched->start_work_mutex); 641 BUG_ON(ret); 642 643 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 644 645 for (i = 0; i < this_task->nr_events; i++) { 646 this_task->curr_event = i; 647 perf_sched__process_event(sched, this_task->atoms[i]); 648 } 649 650 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 651 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 652 ret = sem_post(&this_task->work_done_sem); 653 BUG_ON(ret); 654 655 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 656 BUG_ON(ret); 657 ret = pthread_mutex_unlock(&sched->work_done_wait_mutex); 658 BUG_ON(ret); 659 660 goto again; 661} 662 663static void create_tasks(struct perf_sched *sched) 664{ 665 struct task_desc *task; 666 pthread_attr_t attr; 667 unsigned long i; 668 int err; 669 670 err = pthread_attr_init(&attr); 671 BUG_ON(err); 672 err = pthread_attr_setstacksize(&attr, 673 (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN)); 674 BUG_ON(err); 675 err = pthread_mutex_lock(&sched->start_work_mutex); 676 BUG_ON(err); 677 err = pthread_mutex_lock(&sched->work_done_wait_mutex); 678 BUG_ON(err); 679 for (i = 0; i < sched->nr_tasks; i++) { 680 struct sched_thread_parms *parms = malloc(sizeof(*parms)); 681 BUG_ON(parms == NULL); 682 parms->task = task = sched->tasks[i]; 683 parms->sched = sched; 684 parms->fd = self_open_counters(sched, i); 685 sem_init(&task->sleep_sem, 0, 0); 686 sem_init(&task->ready_for_work, 0, 0); 687 sem_init(&task->work_done_sem, 0, 0); 688 task->curr_event = 0; 689 err = pthread_create(&task->thread, &attr, thread_func, parms); 690 BUG_ON(err); 691 } 692} 693 694static void wait_for_tasks(struct perf_sched *sched) 695{ 696 u64 cpu_usage_0, cpu_usage_1; 697 struct task_desc *task; 698 unsigned long i, ret; 699 700 sched->start_time = get_nsecs(); 701 sched->cpu_usage = 0; 702 pthread_mutex_unlock(&sched->work_done_wait_mutex); 703 704 for (i = 0; i < sched->nr_tasks; i++) { 705 task = sched->tasks[i]; 706 ret = sem_wait(&task->ready_for_work); 707 BUG_ON(ret); 708 sem_init(&task->ready_for_work, 0, 0); 709 } 710 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 711 BUG_ON(ret); 712 713 cpu_usage_0 = get_cpu_usage_nsec_parent(); 714 715 pthread_mutex_unlock(&sched->start_work_mutex); 716 717 for (i = 0; i < sched->nr_tasks; i++) { 718 task = sched->tasks[i]; 719 ret = sem_wait(&task->work_done_sem); 720 BUG_ON(ret); 721 sem_init(&task->work_done_sem, 0, 0); 722 sched->cpu_usage += task->cpu_usage; 723 task->cpu_usage = 0; 724 } 725 726 cpu_usage_1 = get_cpu_usage_nsec_parent(); 727 if (!sched->runavg_cpu_usage) 728 sched->runavg_cpu_usage = sched->cpu_usage; 729 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; 730 731 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 732 if (!sched->runavg_parent_cpu_usage) 733 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; 734 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + 735 sched->parent_cpu_usage)/sched->replay_repeat; 736 737 ret = pthread_mutex_lock(&sched->start_work_mutex); 738 BUG_ON(ret); 739 740 for (i = 0; i < sched->nr_tasks; i++) { 741 task = sched->tasks[i]; 742 sem_init(&task->sleep_sem, 0, 0); 743 task->curr_event = 0; 744 } 745} 746 747static void run_one_test(struct perf_sched *sched) 748{ 749 u64 T0, T1, delta, avg_delta, fluct; 750 751 T0 = get_nsecs(); 752 wait_for_tasks(sched); 753 T1 = get_nsecs(); 754 755 delta = T1 - T0; 756 sched->sum_runtime += delta; 757 sched->nr_runs++; 758 759 avg_delta = sched->sum_runtime / sched->nr_runs; 760 if (delta < avg_delta) 761 fluct = avg_delta - delta; 762 else 763 fluct = delta - avg_delta; 764 sched->sum_fluct += fluct; 765 if (!sched->run_avg) 766 sched->run_avg = delta; 767 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; 768 769 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC); 770 771 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC); 772 773 printf("cpu: %0.2f / %0.2f", 774 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC); 775 776#if 0 777 /* 778 * rusage statistics done by the parent, these are less 779 * accurate than the sched->sum_exec_runtime based statistics: 780 */ 781 printf(" [%0.2f / %0.2f]", 782 (double)sched->parent_cpu_usage / NSEC_PER_MSEC, 783 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC); 784#endif 785 786 printf("\n"); 787 788 if (sched->nr_sleep_corrections) 789 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); 790 sched->nr_sleep_corrections = 0; 791} 792 793static void test_calibrations(struct perf_sched *sched) 794{ 795 u64 T0, T1; 796 797 T0 = get_nsecs(); 798 burn_nsecs(sched, NSEC_PER_MSEC); 799 T1 = get_nsecs(); 800 801 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 802 803 T0 = get_nsecs(); 804 sleep_nsecs(NSEC_PER_MSEC); 805 T1 = get_nsecs(); 806 807 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 808} 809 810static int 811replay_wakeup_event(struct perf_sched *sched, 812 struct evsel *evsel, struct perf_sample *sample, 813 struct machine *machine __maybe_unused) 814{ 815 const char *comm = evsel__strval(evsel, sample, "comm"); 816 const u32 pid = evsel__intval(evsel, sample, "pid"); 817 struct task_desc *waker, *wakee; 818 819 if (verbose > 0) { 820 printf("sched_wakeup event %p\n", evsel); 821 822 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); 823 } 824 825 waker = register_pid(sched, sample->tid, "<unknown>"); 826 wakee = register_pid(sched, pid, comm); 827 828 add_sched_event_wakeup(sched, waker, sample->time, wakee); 829 return 0; 830} 831 832static int replay_switch_event(struct perf_sched *sched, 833 struct evsel *evsel, 834 struct perf_sample *sample, 835 struct machine *machine __maybe_unused) 836{ 837 const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"), 838 *next_comm = evsel__strval(evsel, sample, "next_comm"); 839 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 840 next_pid = evsel__intval(evsel, sample, "next_pid"); 841 const u64 prev_state = evsel__intval(evsel, sample, "prev_state"); 842 struct task_desc *prev, __maybe_unused *next; 843 u64 timestamp0, timestamp = sample->time; 844 int cpu = sample->cpu; 845 s64 delta; 846 847 if (verbose > 0) 848 printf("sched_switch event %p\n", evsel); 849 850 if (cpu >= MAX_CPUS || cpu < 0) 851 return 0; 852 853 timestamp0 = sched->cpu_last_switched[cpu]; 854 if (timestamp0) 855 delta = timestamp - timestamp0; 856 else 857 delta = 0; 858 859 if (delta < 0) { 860 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 861 return -1; 862 } 863 864 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 865 prev_comm, prev_pid, next_comm, next_pid, delta); 866 867 prev = register_pid(sched, prev_pid, prev_comm); 868 next = register_pid(sched, next_pid, next_comm); 869 870 sched->cpu_last_switched[cpu] = timestamp; 871 872 add_sched_event_run(sched, prev, timestamp, delta); 873 add_sched_event_sleep(sched, prev, timestamp, prev_state); 874 875 return 0; 876} 877 878static int replay_fork_event(struct perf_sched *sched, 879 union perf_event *event, 880 struct machine *machine) 881{ 882 struct thread *child, *parent; 883 884 child = machine__findnew_thread(machine, event->fork.pid, 885 event->fork.tid); 886 parent = machine__findnew_thread(machine, event->fork.ppid, 887 event->fork.ptid); 888 889 if (child == NULL || parent == NULL) { 890 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 891 child, parent); 892 goto out_put; 893 } 894 895 if (verbose > 0) { 896 printf("fork event\n"); 897 printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid); 898 printf("... child: %s/%d\n", thread__comm_str(child), child->tid); 899 } 900 901 register_pid(sched, parent->tid, thread__comm_str(parent)); 902 register_pid(sched, child->tid, thread__comm_str(child)); 903out_put: 904 thread__put(child); 905 thread__put(parent); 906 return 0; 907} 908 909struct sort_dimension { 910 const char *name; 911 sort_fn_t cmp; 912 struct list_head list; 913}; 914 915/* 916 * handle runtime stats saved per thread 917 */ 918static struct thread_runtime *thread__init_runtime(struct thread *thread) 919{ 920 struct thread_runtime *r; 921 922 r = zalloc(sizeof(struct thread_runtime)); 923 if (!r) 924 return NULL; 925 926 init_stats(&r->run_stats); 927 thread__set_priv(thread, r); 928 929 return r; 930} 931 932static struct thread_runtime *thread__get_runtime(struct thread *thread) 933{ 934 struct thread_runtime *tr; 935 936 tr = thread__priv(thread); 937 if (tr == NULL) { 938 tr = thread__init_runtime(thread); 939 if (tr == NULL) 940 pr_debug("Failed to malloc memory for runtime data.\n"); 941 } 942 943 return tr; 944} 945 946static int 947thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 948{ 949 struct sort_dimension *sort; 950 int ret = 0; 951 952 BUG_ON(list_empty(list)); 953 954 list_for_each_entry(sort, list, list) { 955 ret = sort->cmp(l, r); 956 if (ret) 957 return ret; 958 } 959 960 return ret; 961} 962 963static struct work_atoms * 964thread_atoms_search(struct rb_root_cached *root, struct thread *thread, 965 struct list_head *sort_list) 966{ 967 struct rb_node *node = root->rb_root.rb_node; 968 struct work_atoms key = { .thread = thread }; 969 970 while (node) { 971 struct work_atoms *atoms; 972 int cmp; 973 974 atoms = container_of(node, struct work_atoms, node); 975 976 cmp = thread_lat_cmp(sort_list, &key, atoms); 977 if (cmp > 0) 978 node = node->rb_left; 979 else if (cmp < 0) 980 node = node->rb_right; 981 else { 982 BUG_ON(thread != atoms->thread); 983 return atoms; 984 } 985 } 986 return NULL; 987} 988 989static void 990__thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data, 991 struct list_head *sort_list) 992{ 993 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 994 bool leftmost = true; 995 996 while (*new) { 997 struct work_atoms *this; 998 int cmp; 999 1000 this = container_of(*new, struct work_atoms, node); 1001 parent = *new; 1002 1003 cmp = thread_lat_cmp(sort_list, data, this); 1004 1005 if (cmp > 0) 1006 new = &((*new)->rb_left); 1007 else { 1008 new = &((*new)->rb_right); 1009 leftmost = false; 1010 } 1011 } 1012 1013 rb_link_node(&data->node, parent, new); 1014 rb_insert_color_cached(&data->node, root, leftmost); 1015} 1016 1017static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 1018{ 1019 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 1020 if (!atoms) { 1021 pr_err("No memory at %s\n", __func__); 1022 return -1; 1023 } 1024 1025 atoms->thread = thread__get(thread); 1026 INIT_LIST_HEAD(&atoms->work_list); 1027 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); 1028 return 0; 1029} 1030 1031static char sched_out_state(u64 prev_state) 1032{ 1033 const char *str = TASK_STATE_TO_CHAR_STR; 1034 1035 return str[prev_state]; 1036} 1037 1038static int 1039add_sched_out_event(struct work_atoms *atoms, 1040 char run_state, 1041 u64 timestamp) 1042{ 1043 struct work_atom *atom = zalloc(sizeof(*atom)); 1044 if (!atom) { 1045 pr_err("Non memory at %s", __func__); 1046 return -1; 1047 } 1048 1049 atom->sched_out_time = timestamp; 1050 1051 if (run_state == 'R') { 1052 atom->state = THREAD_WAIT_CPU; 1053 atom->wake_up_time = atom->sched_out_time; 1054 } 1055 1056 list_add_tail(&atom->list, &atoms->work_list); 1057 return 0; 1058} 1059 1060static void 1061add_runtime_event(struct work_atoms *atoms, u64 delta, 1062 u64 timestamp __maybe_unused) 1063{ 1064 struct work_atom *atom; 1065 1066 BUG_ON(list_empty(&atoms->work_list)); 1067 1068 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1069 1070 atom->runtime += delta; 1071 atoms->total_runtime += delta; 1072} 1073 1074static void 1075add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 1076{ 1077 struct work_atom *atom; 1078 u64 delta; 1079 1080 if (list_empty(&atoms->work_list)) 1081 return; 1082 1083 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1084 1085 if (atom->state != THREAD_WAIT_CPU) 1086 return; 1087 1088 if (timestamp < atom->wake_up_time) { 1089 atom->state = THREAD_IGNORE; 1090 return; 1091 } 1092 1093 atom->state = THREAD_SCHED_IN; 1094 atom->sched_in_time = timestamp; 1095 1096 delta = atom->sched_in_time - atom->wake_up_time; 1097 atoms->total_lat += delta; 1098 if (delta > atoms->max_lat) { 1099 atoms->max_lat = delta; 1100 atoms->max_lat_start = atom->wake_up_time; 1101 atoms->max_lat_end = timestamp; 1102 } 1103 atoms->nb_atoms++; 1104} 1105 1106static int latency_switch_event(struct perf_sched *sched, 1107 struct evsel *evsel, 1108 struct perf_sample *sample, 1109 struct machine *machine) 1110{ 1111 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1112 next_pid = evsel__intval(evsel, sample, "next_pid"); 1113 const u64 prev_state = evsel__intval(evsel, sample, "prev_state"); 1114 struct work_atoms *out_events, *in_events; 1115 struct thread *sched_out, *sched_in; 1116 u64 timestamp0, timestamp = sample->time; 1117 int cpu = sample->cpu, err = -1; 1118 s64 delta; 1119 1120 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1121 1122 timestamp0 = sched->cpu_last_switched[cpu]; 1123 sched->cpu_last_switched[cpu] = timestamp; 1124 if (timestamp0) 1125 delta = timestamp - timestamp0; 1126 else 1127 delta = 0; 1128 1129 if (delta < 0) { 1130 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1131 return -1; 1132 } 1133 1134 sched_out = machine__findnew_thread(machine, -1, prev_pid); 1135 sched_in = machine__findnew_thread(machine, -1, next_pid); 1136 if (sched_out == NULL || sched_in == NULL) 1137 goto out_put; 1138 1139 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1140 if (!out_events) { 1141 if (thread_atoms_insert(sched, sched_out)) 1142 goto out_put; 1143 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1144 if (!out_events) { 1145 pr_err("out-event: Internal tree error"); 1146 goto out_put; 1147 } 1148 } 1149 if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) 1150 return -1; 1151 1152 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1153 if (!in_events) { 1154 if (thread_atoms_insert(sched, sched_in)) 1155 goto out_put; 1156 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1157 if (!in_events) { 1158 pr_err("in-event: Internal tree error"); 1159 goto out_put; 1160 } 1161 /* 1162 * Take came in we have not heard about yet, 1163 * add in an initial atom in runnable state: 1164 */ 1165 if (add_sched_out_event(in_events, 'R', timestamp)) 1166 goto out_put; 1167 } 1168 add_sched_in_event(in_events, timestamp); 1169 err = 0; 1170out_put: 1171 thread__put(sched_out); 1172 thread__put(sched_in); 1173 return err; 1174} 1175 1176static int latency_runtime_event(struct perf_sched *sched, 1177 struct evsel *evsel, 1178 struct perf_sample *sample, 1179 struct machine *machine) 1180{ 1181 const u32 pid = evsel__intval(evsel, sample, "pid"); 1182 const u64 runtime = evsel__intval(evsel, sample, "runtime"); 1183 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1184 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1185 u64 timestamp = sample->time; 1186 int cpu = sample->cpu, err = -1; 1187 1188 if (thread == NULL) 1189 return -1; 1190 1191 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1192 if (!atoms) { 1193 if (thread_atoms_insert(sched, thread)) 1194 goto out_put; 1195 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1196 if (!atoms) { 1197 pr_err("in-event: Internal tree error"); 1198 goto out_put; 1199 } 1200 if (add_sched_out_event(atoms, 'R', timestamp)) 1201 goto out_put; 1202 } 1203 1204 add_runtime_event(atoms, runtime, timestamp); 1205 err = 0; 1206out_put: 1207 thread__put(thread); 1208 return err; 1209} 1210 1211static int latency_wakeup_event(struct perf_sched *sched, 1212 struct evsel *evsel, 1213 struct perf_sample *sample, 1214 struct machine *machine) 1215{ 1216 const u32 pid = evsel__intval(evsel, sample, "pid"); 1217 struct work_atoms *atoms; 1218 struct work_atom *atom; 1219 struct thread *wakee; 1220 u64 timestamp = sample->time; 1221 int err = -1; 1222 1223 wakee = machine__findnew_thread(machine, -1, pid); 1224 if (wakee == NULL) 1225 return -1; 1226 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1227 if (!atoms) { 1228 if (thread_atoms_insert(sched, wakee)) 1229 goto out_put; 1230 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1231 if (!atoms) { 1232 pr_err("wakeup-event: Internal tree error"); 1233 goto out_put; 1234 } 1235 if (add_sched_out_event(atoms, 'S', timestamp)) 1236 goto out_put; 1237 } 1238 1239 BUG_ON(list_empty(&atoms->work_list)); 1240 1241 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1242 1243 /* 1244 * As we do not guarantee the wakeup event happens when 1245 * task is out of run queue, also may happen when task is 1246 * on run queue and wakeup only change ->state to TASK_RUNNING, 1247 * then we should not set the ->wake_up_time when wake up a 1248 * task which is on run queue. 1249 * 1250 * You WILL be missing events if you've recorded only 1251 * one CPU, or are only looking at only one, so don't 1252 * skip in this case. 1253 */ 1254 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1255 goto out_ok; 1256 1257 sched->nr_timestamps++; 1258 if (atom->sched_out_time > timestamp) { 1259 sched->nr_unordered_timestamps++; 1260 goto out_ok; 1261 } 1262 1263 atom->state = THREAD_WAIT_CPU; 1264 atom->wake_up_time = timestamp; 1265out_ok: 1266 err = 0; 1267out_put: 1268 thread__put(wakee); 1269 return err; 1270} 1271 1272static int latency_migrate_task_event(struct perf_sched *sched, 1273 struct evsel *evsel, 1274 struct perf_sample *sample, 1275 struct machine *machine) 1276{ 1277 const u32 pid = evsel__intval(evsel, sample, "pid"); 1278 u64 timestamp = sample->time; 1279 struct work_atoms *atoms; 1280 struct work_atom *atom; 1281 struct thread *migrant; 1282 int err = -1; 1283 1284 /* 1285 * Only need to worry about migration when profiling one CPU. 1286 */ 1287 if (sched->profile_cpu == -1) 1288 return 0; 1289 1290 migrant = machine__findnew_thread(machine, -1, pid); 1291 if (migrant == NULL) 1292 return -1; 1293 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1294 if (!atoms) { 1295 if (thread_atoms_insert(sched, migrant)) 1296 goto out_put; 1297 register_pid(sched, migrant->tid, thread__comm_str(migrant)); 1298 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1299 if (!atoms) { 1300 pr_err("migration-event: Internal tree error"); 1301 goto out_put; 1302 } 1303 if (add_sched_out_event(atoms, 'R', timestamp)) 1304 goto out_put; 1305 } 1306 1307 BUG_ON(list_empty(&atoms->work_list)); 1308 1309 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1310 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1311 1312 sched->nr_timestamps++; 1313 1314 if (atom->sched_out_time > timestamp) 1315 sched->nr_unordered_timestamps++; 1316 err = 0; 1317out_put: 1318 thread__put(migrant); 1319 return err; 1320} 1321 1322static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1323{ 1324 int i; 1325 int ret; 1326 u64 avg; 1327 char max_lat_start[32], max_lat_end[32]; 1328 1329 if (!work_list->nb_atoms) 1330 return; 1331 /* 1332 * Ignore idle threads: 1333 */ 1334 if (!strcmp(thread__comm_str(work_list->thread), "swapper")) 1335 return; 1336 1337 sched->all_runtime += work_list->total_runtime; 1338 sched->all_count += work_list->nb_atoms; 1339 1340 if (work_list->num_merged > 1) 1341 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged); 1342 else 1343 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); 1344 1345 for (i = 0; i < 24 - ret; i++) 1346 printf(" "); 1347 1348 avg = work_list->total_lat / work_list->nb_atoms; 1349 timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start)); 1350 timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end)); 1351 1352 printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n", 1353 (double)work_list->total_runtime / NSEC_PER_MSEC, 1354 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1355 (double)work_list->max_lat / NSEC_PER_MSEC, 1356 max_lat_start, max_lat_end); 1357} 1358 1359static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1360{ 1361 if (l->thread == r->thread) 1362 return 0; 1363 if (l->thread->tid < r->thread->tid) 1364 return -1; 1365 if (l->thread->tid > r->thread->tid) 1366 return 1; 1367 return (int)(l->thread - r->thread); 1368} 1369 1370static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1371{ 1372 u64 avgl, avgr; 1373 1374 if (!l->nb_atoms) 1375 return -1; 1376 1377 if (!r->nb_atoms) 1378 return 1; 1379 1380 avgl = l->total_lat / l->nb_atoms; 1381 avgr = r->total_lat / r->nb_atoms; 1382 1383 if (avgl < avgr) 1384 return -1; 1385 if (avgl > avgr) 1386 return 1; 1387 1388 return 0; 1389} 1390 1391static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1392{ 1393 if (l->max_lat < r->max_lat) 1394 return -1; 1395 if (l->max_lat > r->max_lat) 1396 return 1; 1397 1398 return 0; 1399} 1400 1401static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1402{ 1403 if (l->nb_atoms < r->nb_atoms) 1404 return -1; 1405 if (l->nb_atoms > r->nb_atoms) 1406 return 1; 1407 1408 return 0; 1409} 1410 1411static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1412{ 1413 if (l->total_runtime < r->total_runtime) 1414 return -1; 1415 if (l->total_runtime > r->total_runtime) 1416 return 1; 1417 1418 return 0; 1419} 1420 1421static int sort_dimension__add(const char *tok, struct list_head *list) 1422{ 1423 size_t i; 1424 static struct sort_dimension avg_sort_dimension = { 1425 .name = "avg", 1426 .cmp = avg_cmp, 1427 }; 1428 static struct sort_dimension max_sort_dimension = { 1429 .name = "max", 1430 .cmp = max_cmp, 1431 }; 1432 static struct sort_dimension pid_sort_dimension = { 1433 .name = "pid", 1434 .cmp = pid_cmp, 1435 }; 1436 static struct sort_dimension runtime_sort_dimension = { 1437 .name = "runtime", 1438 .cmp = runtime_cmp, 1439 }; 1440 static struct sort_dimension switch_sort_dimension = { 1441 .name = "switch", 1442 .cmp = switch_cmp, 1443 }; 1444 struct sort_dimension *available_sorts[] = { 1445 &pid_sort_dimension, 1446 &avg_sort_dimension, 1447 &max_sort_dimension, 1448 &switch_sort_dimension, 1449 &runtime_sort_dimension, 1450 }; 1451 1452 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 1453 if (!strcmp(available_sorts[i]->name, tok)) { 1454 list_add_tail(&available_sorts[i]->list, list); 1455 1456 return 0; 1457 } 1458 } 1459 1460 return -1; 1461} 1462 1463static void perf_sched__sort_lat(struct perf_sched *sched) 1464{ 1465 struct rb_node *node; 1466 struct rb_root_cached *root = &sched->atom_root; 1467again: 1468 for (;;) { 1469 struct work_atoms *data; 1470 node = rb_first_cached(root); 1471 if (!node) 1472 break; 1473 1474 rb_erase_cached(node, root); 1475 data = rb_entry(node, struct work_atoms, node); 1476 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1477 } 1478 if (root == &sched->atom_root) { 1479 root = &sched->merged_atom_root; 1480 goto again; 1481 } 1482} 1483 1484static int process_sched_wakeup_event(struct perf_tool *tool, 1485 struct evsel *evsel, 1486 struct perf_sample *sample, 1487 struct machine *machine) 1488{ 1489 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1490 1491 if (sched->tp_handler->wakeup_event) 1492 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); 1493 1494 return 0; 1495} 1496 1497union map_priv { 1498 void *ptr; 1499 bool color; 1500}; 1501 1502static bool thread__has_color(struct thread *thread) 1503{ 1504 union map_priv priv = { 1505 .ptr = thread__priv(thread), 1506 }; 1507 1508 return priv.color; 1509} 1510 1511static struct thread* 1512map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) 1513{ 1514 struct thread *thread = machine__findnew_thread(machine, pid, tid); 1515 union map_priv priv = { 1516 .color = false, 1517 }; 1518 1519 if (!sched->map.color_pids || !thread || thread__priv(thread)) 1520 return thread; 1521 1522 if (thread_map__has(sched->map.color_pids, tid)) 1523 priv.color = true; 1524 1525 thread__set_priv(thread, priv.ptr); 1526 return thread; 1527} 1528 1529static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, 1530 struct perf_sample *sample, struct machine *machine) 1531{ 1532 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 1533 struct thread *sched_in; 1534 struct thread_runtime *tr; 1535 int new_shortname; 1536 u64 timestamp0, timestamp = sample->time; 1537 s64 delta; 1538 int i; 1539 struct perf_cpu this_cpu = { 1540 .cpu = sample->cpu, 1541 }; 1542 int cpus_nr; 1543 bool new_cpu = false; 1544 const char *color = PERF_COLOR_NORMAL; 1545 char stimestamp[32]; 1546 1547 BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); 1548 1549 if (this_cpu.cpu > sched->max_cpu.cpu) 1550 sched->max_cpu = this_cpu; 1551 1552 if (sched->map.comp) { 1553 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); 1554 if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { 1555 sched->map.comp_cpus[cpus_nr++] = this_cpu; 1556 new_cpu = true; 1557 } 1558 } else 1559 cpus_nr = sched->max_cpu.cpu; 1560 1561 timestamp0 = sched->cpu_last_switched[this_cpu.cpu]; 1562 sched->cpu_last_switched[this_cpu.cpu] = timestamp; 1563 if (timestamp0) 1564 delta = timestamp - timestamp0; 1565 else 1566 delta = 0; 1567 1568 if (delta < 0) { 1569 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1570 return -1; 1571 } 1572 1573 sched_in = map__findnew_thread(sched, machine, -1, next_pid); 1574 if (sched_in == NULL) 1575 return -1; 1576 1577 tr = thread__get_runtime(sched_in); 1578 if (tr == NULL) { 1579 thread__put(sched_in); 1580 return -1; 1581 } 1582 1583 sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); 1584 1585 printf(" "); 1586 1587 new_shortname = 0; 1588 if (!tr->shortname[0]) { 1589 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1590 /* 1591 * Don't allocate a letter-number for swapper:0 1592 * as a shortname. Instead, we use '.' for it. 1593 */ 1594 tr->shortname[0] = '.'; 1595 tr->shortname[1] = ' '; 1596 } else { 1597 tr->shortname[0] = sched->next_shortname1; 1598 tr->shortname[1] = sched->next_shortname2; 1599 1600 if (sched->next_shortname1 < 'Z') { 1601 sched->next_shortname1++; 1602 } else { 1603 sched->next_shortname1 = 'A'; 1604 if (sched->next_shortname2 < '9') 1605 sched->next_shortname2++; 1606 else 1607 sched->next_shortname2 = '0'; 1608 } 1609 } 1610 new_shortname = 1; 1611 } 1612 1613 for (i = 0; i < cpus_nr; i++) { 1614 struct perf_cpu cpu = { 1615 .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i, 1616 }; 1617 struct thread *curr_thread = sched->curr_thread[cpu.cpu]; 1618 struct thread_runtime *curr_tr; 1619 const char *pid_color = color; 1620 const char *cpu_color = color; 1621 1622 if (curr_thread && thread__has_color(curr_thread)) 1623 pid_color = COLOR_PIDS; 1624 1625 if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu)) 1626 continue; 1627 1628 if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) 1629 cpu_color = COLOR_CPUS; 1630 1631 if (cpu.cpu != this_cpu.cpu) 1632 color_fprintf(stdout, color, " "); 1633 else 1634 color_fprintf(stdout, cpu_color, "*"); 1635 1636 if (sched->curr_thread[cpu.cpu]) { 1637 curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]); 1638 if (curr_tr == NULL) { 1639 thread__put(sched_in); 1640 return -1; 1641 } 1642 color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname); 1643 } else 1644 color_fprintf(stdout, color, " "); 1645 } 1646 1647 if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu)) 1648 goto out; 1649 1650 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1651 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1652 if (new_shortname || tr->comm_changed || (verbose > 0 && sched_in->tid)) { 1653 const char *pid_color = color; 1654 1655 if (thread__has_color(sched_in)) 1656 pid_color = COLOR_PIDS; 1657 1658 color_fprintf(stdout, pid_color, "%s => %s:%d", 1659 tr->shortname, thread__comm_str(sched_in), sched_in->tid); 1660 tr->comm_changed = false; 1661 } 1662 1663 if (sched->map.comp && new_cpu) 1664 color_fprintf(stdout, color, " (CPU %d)", this_cpu); 1665 1666out: 1667 color_fprintf(stdout, color, "\n"); 1668 1669 thread__put(sched_in); 1670 1671 return 0; 1672} 1673 1674static int process_sched_switch_event(struct perf_tool *tool, 1675 struct evsel *evsel, 1676 struct perf_sample *sample, 1677 struct machine *machine) 1678{ 1679 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1680 int this_cpu = sample->cpu, err = 0; 1681 u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1682 next_pid = evsel__intval(evsel, sample, "next_pid"); 1683 1684 if (sched->curr_pid[this_cpu] != (u32)-1) { 1685 /* 1686 * Are we trying to switch away a PID that is 1687 * not current? 1688 */ 1689 if (sched->curr_pid[this_cpu] != prev_pid) 1690 sched->nr_context_switch_bugs++; 1691 } 1692 1693 if (sched->tp_handler->switch_event) 1694 err = sched->tp_handler->switch_event(sched, evsel, sample, machine); 1695 1696 sched->curr_pid[this_cpu] = next_pid; 1697 return err; 1698} 1699 1700static int process_sched_runtime_event(struct perf_tool *tool, 1701 struct evsel *evsel, 1702 struct perf_sample *sample, 1703 struct machine *machine) 1704{ 1705 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1706 1707 if (sched->tp_handler->runtime_event) 1708 return sched->tp_handler->runtime_event(sched, evsel, sample, machine); 1709 1710 return 0; 1711} 1712 1713static int perf_sched__process_fork_event(struct perf_tool *tool, 1714 union perf_event *event, 1715 struct perf_sample *sample, 1716 struct machine *machine) 1717{ 1718 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1719 1720 /* run the fork event through the perf machinery */ 1721 perf_event__process_fork(tool, event, sample, machine); 1722 1723 /* and then run additional processing needed for this command */ 1724 if (sched->tp_handler->fork_event) 1725 return sched->tp_handler->fork_event(sched, event, machine); 1726 1727 return 0; 1728} 1729 1730static int process_sched_migrate_task_event(struct perf_tool *tool, 1731 struct evsel *evsel, 1732 struct perf_sample *sample, 1733 struct machine *machine) 1734{ 1735 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1736 1737 if (sched->tp_handler->migrate_task_event) 1738 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); 1739 1740 return 0; 1741} 1742 1743typedef int (*tracepoint_handler)(struct perf_tool *tool, 1744 struct evsel *evsel, 1745 struct perf_sample *sample, 1746 struct machine *machine); 1747 1748static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused, 1749 union perf_event *event __maybe_unused, 1750 struct perf_sample *sample, 1751 struct evsel *evsel, 1752 struct machine *machine) 1753{ 1754 int err = 0; 1755 1756 if (evsel->handler != NULL) { 1757 tracepoint_handler f = evsel->handler; 1758 err = f(tool, evsel, sample, machine); 1759 } 1760 1761 return err; 1762} 1763 1764static int perf_sched__process_comm(struct perf_tool *tool __maybe_unused, 1765 union perf_event *event, 1766 struct perf_sample *sample, 1767 struct machine *machine) 1768{ 1769 struct thread *thread; 1770 struct thread_runtime *tr; 1771 int err; 1772 1773 err = perf_event__process_comm(tool, event, sample, machine); 1774 if (err) 1775 return err; 1776 1777 thread = machine__find_thread(machine, sample->pid, sample->tid); 1778 if (!thread) { 1779 pr_err("Internal error: can't find thread\n"); 1780 return -1; 1781 } 1782 1783 tr = thread__get_runtime(thread); 1784 if (tr == NULL) { 1785 thread__put(thread); 1786 return -1; 1787 } 1788 1789 tr->comm_changed = true; 1790 thread__put(thread); 1791 1792 return 0; 1793} 1794 1795static int perf_sched__read_events(struct perf_sched *sched) 1796{ 1797 const struct evsel_str_handler handlers[] = { 1798 { "sched:sched_switch", process_sched_switch_event, }, 1799 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1800 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1801 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1802 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1803 }; 1804 struct perf_session *session; 1805 struct perf_data data = { 1806 .path = input_name, 1807 .mode = PERF_DATA_MODE_READ, 1808 .force = sched->force, 1809 }; 1810 int rc = -1; 1811 1812 session = perf_session__new(&data, &sched->tool); 1813 if (IS_ERR(session)) { 1814 pr_debug("Error creating perf session"); 1815 return PTR_ERR(session); 1816 } 1817 1818 symbol__init(&session->header.env); 1819 1820 if (perf_session__set_tracepoints_handlers(session, handlers)) 1821 goto out_delete; 1822 1823 if (perf_session__has_traces(session, "record -R")) { 1824 int err = perf_session__process_events(session); 1825 if (err) { 1826 pr_err("Failed to process events, error %d", err); 1827 goto out_delete; 1828 } 1829 1830 sched->nr_events = session->evlist->stats.nr_events[0]; 1831 sched->nr_lost_events = session->evlist->stats.total_lost; 1832 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1833 } 1834 1835 rc = 0; 1836out_delete: 1837 perf_session__delete(session); 1838 return rc; 1839} 1840 1841/* 1842 * scheduling times are printed as msec.usec 1843 */ 1844static inline void print_sched_time(unsigned long long nsecs, int width) 1845{ 1846 unsigned long msecs; 1847 unsigned long usecs; 1848 1849 msecs = nsecs / NSEC_PER_MSEC; 1850 nsecs -= msecs * NSEC_PER_MSEC; 1851 usecs = nsecs / NSEC_PER_USEC; 1852 printf("%*lu.%03lu ", width, msecs, usecs); 1853} 1854 1855/* 1856 * returns runtime data for event, allocating memory for it the 1857 * first time it is used. 1858 */ 1859static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel) 1860{ 1861 struct evsel_runtime *r = evsel->priv; 1862 1863 if (r == NULL) { 1864 r = zalloc(sizeof(struct evsel_runtime)); 1865 evsel->priv = r; 1866 } 1867 1868 return r; 1869} 1870 1871/* 1872 * save last time event was seen per cpu 1873 */ 1874static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu) 1875{ 1876 struct evsel_runtime *r = evsel__get_runtime(evsel); 1877 1878 if (r == NULL) 1879 return; 1880 1881 if ((cpu >= r->ncpu) || (r->last_time == NULL)) { 1882 int i, n = __roundup_pow_of_two(cpu+1); 1883 void *p = r->last_time; 1884 1885 p = realloc(r->last_time, n * sizeof(u64)); 1886 if (!p) 1887 return; 1888 1889 r->last_time = p; 1890 for (i = r->ncpu; i < n; ++i) 1891 r->last_time[i] = (u64) 0; 1892 1893 r->ncpu = n; 1894 } 1895 1896 r->last_time[cpu] = timestamp; 1897} 1898 1899/* returns last time this event was seen on the given cpu */ 1900static u64 evsel__get_time(struct evsel *evsel, u32 cpu) 1901{ 1902 struct evsel_runtime *r = evsel__get_runtime(evsel); 1903 1904 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) 1905 return 0; 1906 1907 return r->last_time[cpu]; 1908} 1909 1910static int comm_width = 30; 1911 1912static char *timehist_get_commstr(struct thread *thread) 1913{ 1914 static char str[32]; 1915 const char *comm = thread__comm_str(thread); 1916 pid_t tid = thread->tid; 1917 pid_t pid = thread->pid_; 1918 int n; 1919 1920 if (pid == 0) 1921 n = scnprintf(str, sizeof(str), "%s", comm); 1922 1923 else if (tid != pid) 1924 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); 1925 1926 else 1927 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); 1928 1929 if (n > comm_width) 1930 comm_width = n; 1931 1932 return str; 1933} 1934 1935static void timehist_header(struct perf_sched *sched) 1936{ 1937 u32 ncpus = sched->max_cpu.cpu + 1; 1938 u32 i, j; 1939 1940 printf("%15s %6s ", "time", "cpu"); 1941 1942 if (sched->show_cpu_visual) { 1943 printf(" "); 1944 for (i = 0, j = 0; i < ncpus; ++i) { 1945 printf("%x", j++); 1946 if (j > 15) 1947 j = 0; 1948 } 1949 printf(" "); 1950 } 1951 1952 printf(" %-*s %9s %9s %9s", comm_width, 1953 "task name", "wait time", "sch delay", "run time"); 1954 1955 if (sched->show_state) 1956 printf(" %s", "state"); 1957 1958 printf("\n"); 1959 1960 /* 1961 * units row 1962 */ 1963 printf("%15s %-6s ", "", ""); 1964 1965 if (sched->show_cpu_visual) 1966 printf(" %*s ", ncpus, ""); 1967 1968 printf(" %-*s %9s %9s %9s", comm_width, 1969 "[tid/pid]", "(msec)", "(msec)", "(msec)"); 1970 1971 if (sched->show_state) 1972 printf(" %5s", ""); 1973 1974 printf("\n"); 1975 1976 /* 1977 * separator 1978 */ 1979 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); 1980 1981 if (sched->show_cpu_visual) 1982 printf(" %.*s ", ncpus, graph_dotted_line); 1983 1984 printf(" %.*s %.9s %.9s %.9s", comm_width, 1985 graph_dotted_line, graph_dotted_line, graph_dotted_line, 1986 graph_dotted_line); 1987 1988 if (sched->show_state) 1989 printf(" %.5s", graph_dotted_line); 1990 1991 printf("\n"); 1992} 1993 1994static char task_state_char(struct thread *thread, int state) 1995{ 1996 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 1997 unsigned bit = state ? ffs(state) : 0; 1998 1999 /* 'I' for idle */ 2000 if (thread->tid == 0) 2001 return 'I'; 2002 2003 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; 2004} 2005 2006static void timehist_print_sample(struct perf_sched *sched, 2007 struct evsel *evsel, 2008 struct perf_sample *sample, 2009 struct addr_location *al, 2010 struct thread *thread, 2011 u64 t, int state) 2012{ 2013 struct thread_runtime *tr = thread__priv(thread); 2014 const char *next_comm = evsel__strval(evsel, sample, "next_comm"); 2015 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 2016 u32 max_cpus = sched->max_cpu.cpu + 1; 2017 char tstr[64]; 2018 char nstr[30]; 2019 u64 wait_time; 2020 2021 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 2022 return; 2023 2024 timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); 2025 printf("%15s [%04d] ", tstr, sample->cpu); 2026 2027 if (sched->show_cpu_visual) { 2028 u32 i; 2029 char c; 2030 2031 printf(" "); 2032 for (i = 0; i < max_cpus; ++i) { 2033 /* flag idle times with 'i'; others are sched events */ 2034 if (i == sample->cpu) 2035 c = (thread->tid == 0) ? 'i' : 's'; 2036 else 2037 c = ' '; 2038 printf("%c", c); 2039 } 2040 printf(" "); 2041 } 2042 2043 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2044 2045 wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt; 2046 print_sched_time(wait_time, 6); 2047 2048 print_sched_time(tr->dt_delay, 6); 2049 print_sched_time(tr->dt_run, 6); 2050 2051 if (sched->show_state) 2052 printf(" %5c ", task_state_char(thread, state)); 2053 2054 if (sched->show_next) { 2055 snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid); 2056 printf(" %-*s", comm_width, nstr); 2057 } 2058 2059 if (sched->show_wakeups && !sched->show_next) 2060 printf(" %-*s", comm_width, ""); 2061 2062 if (thread->tid == 0) 2063 goto out; 2064 2065 if (sched->show_callchain) 2066 printf(" "); 2067 2068 sample__fprintf_sym(sample, al, 0, 2069 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | 2070 EVSEL__PRINT_CALLCHAIN_ARROW | 2071 EVSEL__PRINT_SKIP_IGNORED, 2072 &callchain_cursor, symbol_conf.bt_stop_list, stdout); 2073 2074out: 2075 printf("\n"); 2076} 2077 2078/* 2079 * Explanation of delta-time stats: 2080 * 2081 * t = time of current schedule out event 2082 * tprev = time of previous sched out event 2083 * also time of schedule-in event for current task 2084 * last_time = time of last sched change event for current task 2085 * (i.e, time process was last scheduled out) 2086 * ready_to_run = time of wakeup for current task 2087 * 2088 * -----|------------|------------|------------|------ 2089 * last ready tprev t 2090 * time to run 2091 * 2092 * |-------- dt_wait --------| 2093 * |- dt_delay -|-- dt_run --| 2094 * 2095 * dt_run = run time of current task 2096 * dt_wait = time between last schedule out event for task and tprev 2097 * represents time spent off the cpu 2098 * dt_delay = time between wakeup and schedule-in of task 2099 */ 2100 2101static void timehist_update_runtime_stats(struct thread_runtime *r, 2102 u64 t, u64 tprev) 2103{ 2104 r->dt_delay = 0; 2105 r->dt_sleep = 0; 2106 r->dt_iowait = 0; 2107 r->dt_preempt = 0; 2108 r->dt_run = 0; 2109 2110 if (tprev) { 2111 r->dt_run = t - tprev; 2112 if (r->ready_to_run) { 2113 if (r->ready_to_run > tprev) 2114 pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); 2115 else 2116 r->dt_delay = tprev - r->ready_to_run; 2117 } 2118 2119 if (r->last_time > tprev) 2120 pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); 2121 else if (r->last_time) { 2122 u64 dt_wait = tprev - r->last_time; 2123 2124 if (r->last_state == TASK_RUNNING) 2125 r->dt_preempt = dt_wait; 2126 else if (r->last_state == TASK_UNINTERRUPTIBLE) 2127 r->dt_iowait = dt_wait; 2128 else 2129 r->dt_sleep = dt_wait; 2130 } 2131 } 2132 2133 update_stats(&r->run_stats, r->dt_run); 2134 2135 r->total_run_time += r->dt_run; 2136 r->total_delay_time += r->dt_delay; 2137 r->total_sleep_time += r->dt_sleep; 2138 r->total_iowait_time += r->dt_iowait; 2139 r->total_preempt_time += r->dt_preempt; 2140} 2141 2142static bool is_idle_sample(struct perf_sample *sample, 2143 struct evsel *evsel) 2144{ 2145 /* pid 0 == swapper == idle task */ 2146 if (strcmp(evsel__name(evsel), "sched:sched_switch") == 0) 2147 return evsel__intval(evsel, sample, "prev_pid") == 0; 2148 2149 return sample->pid == 0; 2150} 2151 2152static void save_task_callchain(struct perf_sched *sched, 2153 struct perf_sample *sample, 2154 struct evsel *evsel, 2155 struct machine *machine) 2156{ 2157 struct callchain_cursor *cursor = &callchain_cursor; 2158 struct thread *thread; 2159 2160 /* want main thread for process - has maps */ 2161 thread = machine__findnew_thread(machine, sample->pid, sample->pid); 2162 if (thread == NULL) { 2163 pr_debug("Failed to get thread for pid %d.\n", sample->pid); 2164 return; 2165 } 2166 2167 if (!sched->show_callchain || sample->callchain == NULL) 2168 return; 2169 2170 if (thread__resolve_callchain(thread, cursor, evsel, sample, 2171 NULL, NULL, sched->max_stack + 2) != 0) { 2172 if (verbose > 0) 2173 pr_err("Failed to resolve callchain. Skipping\n"); 2174 2175 return; 2176 } 2177 2178 callchain_cursor_commit(cursor); 2179 2180 while (true) { 2181 struct callchain_cursor_node *node; 2182 struct symbol *sym; 2183 2184 node = callchain_cursor_current(cursor); 2185 if (node == NULL) 2186 break; 2187 2188 sym = node->ms.sym; 2189 if (sym) { 2190 if (!strcmp(sym->name, "schedule") || 2191 !strcmp(sym->name, "__schedule") || 2192 !strcmp(sym->name, "preempt_schedule")) 2193 sym->ignore = 1; 2194 } 2195 2196 callchain_cursor_advance(cursor); 2197 } 2198} 2199 2200static int init_idle_thread(struct thread *thread) 2201{ 2202 struct idle_thread_runtime *itr; 2203 2204 thread__set_comm(thread, idle_comm, 0); 2205 2206 itr = zalloc(sizeof(*itr)); 2207 if (itr == NULL) 2208 return -ENOMEM; 2209 2210 init_stats(&itr->tr.run_stats); 2211 callchain_init(&itr->callchain); 2212 callchain_cursor_reset(&itr->cursor); 2213 thread__set_priv(thread, itr); 2214 2215 return 0; 2216} 2217 2218/* 2219 * Track idle stats per cpu by maintaining a local thread 2220 * struct for the idle task on each cpu. 2221 */ 2222static int init_idle_threads(int ncpu) 2223{ 2224 int i, ret; 2225 2226 idle_threads = zalloc(ncpu * sizeof(struct thread *)); 2227 if (!idle_threads) 2228 return -ENOMEM; 2229 2230 idle_max_cpu = ncpu; 2231 2232 /* allocate the actual thread struct if needed */ 2233 for (i = 0; i < ncpu; ++i) { 2234 idle_threads[i] = thread__new(0, 0); 2235 if (idle_threads[i] == NULL) 2236 return -ENOMEM; 2237 2238 ret = init_idle_thread(idle_threads[i]); 2239 if (ret < 0) 2240 return ret; 2241 } 2242 2243 return 0; 2244} 2245 2246static void free_idle_threads(void) 2247{ 2248 int i; 2249 2250 if (idle_threads == NULL) 2251 return; 2252 2253 for (i = 0; i < idle_max_cpu; ++i) { 2254 if ((idle_threads[i])) 2255 thread__delete(idle_threads[i]); 2256 } 2257 2258 free(idle_threads); 2259} 2260 2261static struct thread *get_idle_thread(int cpu) 2262{ 2263 /* 2264 * expand/allocate array of pointers to local thread 2265 * structs if needed 2266 */ 2267 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { 2268 int i, j = __roundup_pow_of_two(cpu+1); 2269 void *p; 2270 2271 p = realloc(idle_threads, j * sizeof(struct thread *)); 2272 if (!p) 2273 return NULL; 2274 2275 idle_threads = (struct thread **) p; 2276 for (i = idle_max_cpu; i < j; ++i) 2277 idle_threads[i] = NULL; 2278 2279 idle_max_cpu = j; 2280 } 2281 2282 /* allocate a new thread struct if needed */ 2283 if (idle_threads[cpu] == NULL) { 2284 idle_threads[cpu] = thread__new(0, 0); 2285 2286 if (idle_threads[cpu]) { 2287 if (init_idle_thread(idle_threads[cpu]) < 0) 2288 return NULL; 2289 } 2290 } 2291 2292 return idle_threads[cpu]; 2293} 2294 2295static void save_idle_callchain(struct perf_sched *sched, 2296 struct idle_thread_runtime *itr, 2297 struct perf_sample *sample) 2298{ 2299 if (!sched->show_callchain || sample->callchain == NULL) 2300 return; 2301 2302 callchain_cursor__copy(&itr->cursor, &callchain_cursor); 2303} 2304 2305static struct thread *timehist_get_thread(struct perf_sched *sched, 2306 struct perf_sample *sample, 2307 struct machine *machine, 2308 struct evsel *evsel) 2309{ 2310 struct thread *thread; 2311 2312 if (is_idle_sample(sample, evsel)) { 2313 thread = get_idle_thread(sample->cpu); 2314 if (thread == NULL) 2315 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2316 2317 } else { 2318 /* there were samples with tid 0 but non-zero pid */ 2319 thread = machine__findnew_thread(machine, sample->pid, 2320 sample->tid ?: sample->pid); 2321 if (thread == NULL) { 2322 pr_debug("Failed to get thread for tid %d. skipping sample.\n", 2323 sample->tid); 2324 } 2325 2326 save_task_callchain(sched, sample, evsel, machine); 2327 if (sched->idle_hist) { 2328 struct thread *idle; 2329 struct idle_thread_runtime *itr; 2330 2331 idle = get_idle_thread(sample->cpu); 2332 if (idle == NULL) { 2333 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2334 return NULL; 2335 } 2336 2337 itr = thread__priv(idle); 2338 if (itr == NULL) 2339 return NULL; 2340 2341 itr->last_thread = thread; 2342 2343 /* copy task callchain when entering to idle */ 2344 if (evsel__intval(evsel, sample, "next_pid") == 0) 2345 save_idle_callchain(sched, itr, sample); 2346 } 2347 } 2348 2349 return thread; 2350} 2351 2352static bool timehist_skip_sample(struct perf_sched *sched, 2353 struct thread *thread, 2354 struct evsel *evsel, 2355 struct perf_sample *sample) 2356{ 2357 bool rc = false; 2358 2359 if (thread__is_filtered(thread)) { 2360 rc = true; 2361 sched->skipped_samples++; 2362 } 2363 2364 if (sched->idle_hist) { 2365 if (strcmp(evsel__name(evsel), "sched:sched_switch")) 2366 rc = true; 2367 else if (evsel__intval(evsel, sample, "prev_pid") != 0 && 2368 evsel__intval(evsel, sample, "next_pid") != 0) 2369 rc = true; 2370 } 2371 2372 return rc; 2373} 2374 2375static void timehist_print_wakeup_event(struct perf_sched *sched, 2376 struct evsel *evsel, 2377 struct perf_sample *sample, 2378 struct machine *machine, 2379 struct thread *awakened) 2380{ 2381 struct thread *thread; 2382 char tstr[64]; 2383 2384 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2385 if (thread == NULL) 2386 return; 2387 2388 /* show wakeup unless both awakee and awaker are filtered */ 2389 if (timehist_skip_sample(sched, thread, evsel, sample) && 2390 timehist_skip_sample(sched, awakened, evsel, sample)) { 2391 return; 2392 } 2393 2394 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2395 printf("%15s [%04d] ", tstr, sample->cpu); 2396 if (sched->show_cpu_visual) 2397 printf(" %*s ", sched->max_cpu.cpu + 1, ""); 2398 2399 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2400 2401 /* dt spacer */ 2402 printf(" %9s %9s %9s ", "", "", ""); 2403 2404 printf("awakened: %s", timehist_get_commstr(awakened)); 2405 2406 printf("\n"); 2407} 2408 2409static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused, 2410 union perf_event *event __maybe_unused, 2411 struct evsel *evsel __maybe_unused, 2412 struct perf_sample *sample __maybe_unused, 2413 struct machine *machine __maybe_unused) 2414{ 2415 return 0; 2416} 2417 2418static int timehist_sched_wakeup_event(struct perf_tool *tool, 2419 union perf_event *event __maybe_unused, 2420 struct evsel *evsel, 2421 struct perf_sample *sample, 2422 struct machine *machine) 2423{ 2424 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2425 struct thread *thread; 2426 struct thread_runtime *tr = NULL; 2427 /* want pid of awakened task not pid in sample */ 2428 const u32 pid = evsel__intval(evsel, sample, "pid"); 2429 2430 thread = machine__findnew_thread(machine, 0, pid); 2431 if (thread == NULL) 2432 return -1; 2433 2434 tr = thread__get_runtime(thread); 2435 if (tr == NULL) 2436 return -1; 2437 2438 if (tr->ready_to_run == 0) 2439 tr->ready_to_run = sample->time; 2440 2441 /* show wakeups if requested */ 2442 if (sched->show_wakeups && 2443 !perf_time__skip_sample(&sched->ptime, sample->time)) 2444 timehist_print_wakeup_event(sched, evsel, sample, machine, thread); 2445 2446 return 0; 2447} 2448 2449static void timehist_print_migration_event(struct perf_sched *sched, 2450 struct evsel *evsel, 2451 struct perf_sample *sample, 2452 struct machine *machine, 2453 struct thread *migrated) 2454{ 2455 struct thread *thread; 2456 char tstr[64]; 2457 u32 max_cpus; 2458 u32 ocpu, dcpu; 2459 2460 if (sched->summary_only) 2461 return; 2462 2463 max_cpus = sched->max_cpu.cpu + 1; 2464 ocpu = evsel__intval(evsel, sample, "orig_cpu"); 2465 dcpu = evsel__intval(evsel, sample, "dest_cpu"); 2466 2467 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2468 if (thread == NULL) 2469 return; 2470 2471 if (timehist_skip_sample(sched, thread, evsel, sample) && 2472 timehist_skip_sample(sched, migrated, evsel, sample)) { 2473 return; 2474 } 2475 2476 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2477 printf("%15s [%04d] ", tstr, sample->cpu); 2478 2479 if (sched->show_cpu_visual) { 2480 u32 i; 2481 char c; 2482 2483 printf(" "); 2484 for (i = 0; i < max_cpus; ++i) { 2485 c = (i == sample->cpu) ? 'm' : ' '; 2486 printf("%c", c); 2487 } 2488 printf(" "); 2489 } 2490 2491 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2492 2493 /* dt spacer */ 2494 printf(" %9s %9s %9s ", "", "", ""); 2495 2496 printf("migrated: %s", timehist_get_commstr(migrated)); 2497 printf(" cpu %d => %d", ocpu, dcpu); 2498 2499 printf("\n"); 2500} 2501 2502static int timehist_migrate_task_event(struct perf_tool *tool, 2503 union perf_event *event __maybe_unused, 2504 struct evsel *evsel, 2505 struct perf_sample *sample, 2506 struct machine *machine) 2507{ 2508 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2509 struct thread *thread; 2510 struct thread_runtime *tr = NULL; 2511 /* want pid of migrated task not pid in sample */ 2512 const u32 pid = evsel__intval(evsel, sample, "pid"); 2513 2514 thread = machine__findnew_thread(machine, 0, pid); 2515 if (thread == NULL) 2516 return -1; 2517 2518 tr = thread__get_runtime(thread); 2519 if (tr == NULL) 2520 return -1; 2521 2522 tr->migrations++; 2523 2524 /* show migrations if requested */ 2525 timehist_print_migration_event(sched, evsel, sample, machine, thread); 2526 2527 return 0; 2528} 2529 2530static int timehist_sched_change_event(struct perf_tool *tool, 2531 union perf_event *event, 2532 struct evsel *evsel, 2533 struct perf_sample *sample, 2534 struct machine *machine) 2535{ 2536 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2537 struct perf_time_interval *ptime = &sched->ptime; 2538 struct addr_location al; 2539 struct thread *thread; 2540 struct thread_runtime *tr = NULL; 2541 u64 tprev, t = sample->time; 2542 int rc = 0; 2543 int state = evsel__intval(evsel, sample, "prev_state"); 2544 2545 if (machine__resolve(machine, &al, sample) < 0) { 2546 pr_err("problem processing %d event. skipping it\n", 2547 event->header.type); 2548 rc = -1; 2549 goto out; 2550 } 2551 2552 thread = timehist_get_thread(sched, sample, machine, evsel); 2553 if (thread == NULL) { 2554 rc = -1; 2555 goto out; 2556 } 2557 2558 if (timehist_skip_sample(sched, thread, evsel, sample)) 2559 goto out; 2560 2561 tr = thread__get_runtime(thread); 2562 if (tr == NULL) { 2563 rc = -1; 2564 goto out; 2565 } 2566 2567 tprev = evsel__get_time(evsel, sample->cpu); 2568 2569 /* 2570 * If start time given: 2571 * - sample time is under window user cares about - skip sample 2572 * - tprev is under window user cares about - reset to start of window 2573 */ 2574 if (ptime->start && ptime->start > t) 2575 goto out; 2576 2577 if (tprev && ptime->start > tprev) 2578 tprev = ptime->start; 2579 2580 /* 2581 * If end time given: 2582 * - previous sched event is out of window - we are done 2583 * - sample time is beyond window user cares about - reset it 2584 * to close out stats for time window interest 2585 */ 2586 if (ptime->end) { 2587 if (tprev > ptime->end) 2588 goto out; 2589 2590 if (t > ptime->end) 2591 t = ptime->end; 2592 } 2593 2594 if (!sched->idle_hist || thread->tid == 0) { 2595 if (!cpu_list || test_bit(sample->cpu, cpu_bitmap)) 2596 timehist_update_runtime_stats(tr, t, tprev); 2597 2598 if (sched->idle_hist) { 2599 struct idle_thread_runtime *itr = (void *)tr; 2600 struct thread_runtime *last_tr; 2601 2602 BUG_ON(thread->tid != 0); 2603 2604 if (itr->last_thread == NULL) 2605 goto out; 2606 2607 /* add current idle time as last thread's runtime */ 2608 last_tr = thread__get_runtime(itr->last_thread); 2609 if (last_tr == NULL) 2610 goto out; 2611 2612 timehist_update_runtime_stats(last_tr, t, tprev); 2613 /* 2614 * remove delta time of last thread as it's not updated 2615 * and otherwise it will show an invalid value next 2616 * time. we only care total run time and run stat. 2617 */ 2618 last_tr->dt_run = 0; 2619 last_tr->dt_delay = 0; 2620 last_tr->dt_sleep = 0; 2621 last_tr->dt_iowait = 0; 2622 last_tr->dt_preempt = 0; 2623 2624 if (itr->cursor.nr) 2625 callchain_append(&itr->callchain, &itr->cursor, t - tprev); 2626 2627 itr->last_thread = NULL; 2628 } 2629 } 2630 2631 if (!sched->summary_only) 2632 timehist_print_sample(sched, evsel, sample, &al, thread, t, state); 2633 2634out: 2635 if (sched->hist_time.start == 0 && t >= ptime->start) 2636 sched->hist_time.start = t; 2637 if (ptime->end == 0 || t <= ptime->end) 2638 sched->hist_time.end = t; 2639 2640 if (tr) { 2641 /* time of this sched_switch event becomes last time task seen */ 2642 tr->last_time = sample->time; 2643 2644 /* last state is used to determine where to account wait time */ 2645 tr->last_state = state; 2646 2647 /* sched out event for task so reset ready to run time */ 2648 tr->ready_to_run = 0; 2649 } 2650 2651 evsel__save_time(evsel, sample->time, sample->cpu); 2652 2653 return rc; 2654} 2655 2656static int timehist_sched_switch_event(struct perf_tool *tool, 2657 union perf_event *event, 2658 struct evsel *evsel, 2659 struct perf_sample *sample, 2660 struct machine *machine __maybe_unused) 2661{ 2662 return timehist_sched_change_event(tool, event, evsel, sample, machine); 2663} 2664 2665static int process_lost(struct perf_tool *tool __maybe_unused, 2666 union perf_event *event, 2667 struct perf_sample *sample, 2668 struct machine *machine __maybe_unused) 2669{ 2670 char tstr[64]; 2671 2672 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2673 printf("%15s ", tstr); 2674 printf("lost %" PRI_lu64 " events on cpu %d\n", event->lost.lost, sample->cpu); 2675 2676 return 0; 2677} 2678 2679 2680static void print_thread_runtime(struct thread *t, 2681 struct thread_runtime *r) 2682{ 2683 double mean = avg_stats(&r->run_stats); 2684 float stddev; 2685 2686 printf("%*s %5d %9" PRIu64 " ", 2687 comm_width, timehist_get_commstr(t), t->ppid, 2688 (u64) r->run_stats.n); 2689 2690 print_sched_time(r->total_run_time, 8); 2691 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); 2692 print_sched_time(r->run_stats.min, 6); 2693 printf(" "); 2694 print_sched_time((u64) mean, 6); 2695 printf(" "); 2696 print_sched_time(r->run_stats.max, 6); 2697 printf(" "); 2698 printf("%5.2f", stddev); 2699 printf(" %5" PRIu64, r->migrations); 2700 printf("\n"); 2701} 2702 2703static void print_thread_waittime(struct thread *t, 2704 struct thread_runtime *r) 2705{ 2706 printf("%*s %5d %9" PRIu64 " ", 2707 comm_width, timehist_get_commstr(t), t->ppid, 2708 (u64) r->run_stats.n); 2709 2710 print_sched_time(r->total_run_time, 8); 2711 print_sched_time(r->total_sleep_time, 6); 2712 printf(" "); 2713 print_sched_time(r->total_iowait_time, 6); 2714 printf(" "); 2715 print_sched_time(r->total_preempt_time, 6); 2716 printf(" "); 2717 print_sched_time(r->total_delay_time, 6); 2718 printf("\n"); 2719} 2720 2721struct total_run_stats { 2722 struct perf_sched *sched; 2723 u64 sched_count; 2724 u64 task_count; 2725 u64 total_run_time; 2726}; 2727 2728static int __show_thread_runtime(struct thread *t, void *priv) 2729{ 2730 struct total_run_stats *stats = priv; 2731 struct thread_runtime *r; 2732 2733 if (thread__is_filtered(t)) 2734 return 0; 2735 2736 r = thread__priv(t); 2737 if (r && r->run_stats.n) { 2738 stats->task_count++; 2739 stats->sched_count += r->run_stats.n; 2740 stats->total_run_time += r->total_run_time; 2741 2742 if (stats->sched->show_state) 2743 print_thread_waittime(t, r); 2744 else 2745 print_thread_runtime(t, r); 2746 } 2747 2748 return 0; 2749} 2750 2751static int show_thread_runtime(struct thread *t, void *priv) 2752{ 2753 if (t->dead) 2754 return 0; 2755 2756 return __show_thread_runtime(t, priv); 2757} 2758 2759static int show_deadthread_runtime(struct thread *t, void *priv) 2760{ 2761 if (!t->dead) 2762 return 0; 2763 2764 return __show_thread_runtime(t, priv); 2765} 2766 2767static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) 2768{ 2769 const char *sep = " <- "; 2770 struct callchain_list *chain; 2771 size_t ret = 0; 2772 char bf[1024]; 2773 bool first; 2774 2775 if (node == NULL) 2776 return 0; 2777 2778 ret = callchain__fprintf_folded(fp, node->parent); 2779 first = (ret == 0); 2780 2781 list_for_each_entry(chain, &node->val, list) { 2782 if (chain->ip >= PERF_CONTEXT_MAX) 2783 continue; 2784 if (chain->ms.sym && chain->ms.sym->ignore) 2785 continue; 2786 ret += fprintf(fp, "%s%s", first ? "" : sep, 2787 callchain_list__sym_name(chain, bf, sizeof(bf), 2788 false)); 2789 first = false; 2790 } 2791 2792 return ret; 2793} 2794 2795static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root) 2796{ 2797 size_t ret = 0; 2798 FILE *fp = stdout; 2799 struct callchain_node *chain; 2800 struct rb_node *rb_node = rb_first_cached(root); 2801 2802 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); 2803 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, 2804 graph_dotted_line); 2805 2806 while (rb_node) { 2807 chain = rb_entry(rb_node, struct callchain_node, rb_node); 2808 rb_node = rb_next(rb_node); 2809 2810 ret += fprintf(fp, " "); 2811 print_sched_time(chain->hit, 12); 2812 ret += 16; /* print_sched_time returns 2nd arg + 4 */ 2813 ret += fprintf(fp, " %8d ", chain->count); 2814 ret += callchain__fprintf_folded(fp, chain); 2815 ret += fprintf(fp, "\n"); 2816 } 2817 2818 return ret; 2819} 2820 2821static void timehist_print_summary(struct perf_sched *sched, 2822 struct perf_session *session) 2823{ 2824 struct machine *m = &session->machines.host; 2825 struct total_run_stats totals; 2826 u64 task_count; 2827 struct thread *t; 2828 struct thread_runtime *r; 2829 int i; 2830 u64 hist_time = sched->hist_time.end - sched->hist_time.start; 2831 2832 memset(&totals, 0, sizeof(totals)); 2833 totals.sched = sched; 2834 2835 if (sched->idle_hist) { 2836 printf("\nIdle-time summary\n"); 2837 printf("%*s parent sched-out ", comm_width, "comm"); 2838 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); 2839 } else if (sched->show_state) { 2840 printf("\nWait-time summary\n"); 2841 printf("%*s parent sched-in ", comm_width, "comm"); 2842 printf(" run-time sleep iowait preempt delay\n"); 2843 } else { 2844 printf("\nRuntime summary\n"); 2845 printf("%*s parent sched-in ", comm_width, "comm"); 2846 printf(" run-time min-run avg-run max-run stddev migrations\n"); 2847 } 2848 printf("%*s (count) ", comm_width, ""); 2849 printf(" (msec) (msec) (msec) (msec) %s\n", 2850 sched->show_state ? "(msec)" : "%"); 2851 printf("%.117s\n", graph_dotted_line); 2852 2853 machine__for_each_thread(m, show_thread_runtime, &totals); 2854 task_count = totals.task_count; 2855 if (!task_count) 2856 printf("<no still running tasks>\n"); 2857 2858 printf("\nTerminated tasks:\n"); 2859 machine__for_each_thread(m, show_deadthread_runtime, &totals); 2860 if (task_count == totals.task_count) 2861 printf("<no terminated tasks>\n"); 2862 2863 /* CPU idle stats not tracked when samples were skipped */ 2864 if (sched->skipped_samples && !sched->idle_hist) 2865 return; 2866 2867 printf("\nIdle stats:\n"); 2868 for (i = 0; i < idle_max_cpu; ++i) { 2869 if (cpu_list && !test_bit(i, cpu_bitmap)) 2870 continue; 2871 2872 t = idle_threads[i]; 2873 if (!t) 2874 continue; 2875 2876 r = thread__priv(t); 2877 if (r && r->run_stats.n) { 2878 totals.sched_count += r->run_stats.n; 2879 printf(" CPU %2d idle for ", i); 2880 print_sched_time(r->total_run_time, 6); 2881 printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); 2882 } else 2883 printf(" CPU %2d idle entire time window\n", i); 2884 } 2885 2886 if (sched->idle_hist && sched->show_callchain) { 2887 callchain_param.mode = CHAIN_FOLDED; 2888 callchain_param.value = CCVAL_PERIOD; 2889 2890 callchain_register_param(&callchain_param); 2891 2892 printf("\nIdle stats by callchain:\n"); 2893 for (i = 0; i < idle_max_cpu; ++i) { 2894 struct idle_thread_runtime *itr; 2895 2896 t = idle_threads[i]; 2897 if (!t) 2898 continue; 2899 2900 itr = thread__priv(t); 2901 if (itr == NULL) 2902 continue; 2903 2904 callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain, 2905 0, &callchain_param); 2906 2907 printf(" CPU %2d:", i); 2908 print_sched_time(itr->tr.total_run_time, 6); 2909 printf(" msec\n"); 2910 timehist_print_idlehist_callchain(&itr->sorted_root); 2911 printf("\n"); 2912 } 2913 } 2914 2915 printf("\n" 2916 " Total number of unique tasks: %" PRIu64 "\n" 2917 "Total number of context switches: %" PRIu64 "\n", 2918 totals.task_count, totals.sched_count); 2919 2920 printf(" Total run time (msec): "); 2921 print_sched_time(totals.total_run_time, 2); 2922 printf("\n"); 2923 2924 printf(" Total scheduling time (msec): "); 2925 print_sched_time(hist_time, 2); 2926 printf(" (x %d)\n", sched->max_cpu.cpu); 2927} 2928 2929typedef int (*sched_handler)(struct perf_tool *tool, 2930 union perf_event *event, 2931 struct evsel *evsel, 2932 struct perf_sample *sample, 2933 struct machine *machine); 2934 2935static int perf_timehist__process_sample(struct perf_tool *tool, 2936 union perf_event *event, 2937 struct perf_sample *sample, 2938 struct evsel *evsel, 2939 struct machine *machine) 2940{ 2941 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2942 int err = 0; 2943 struct perf_cpu this_cpu = { 2944 .cpu = sample->cpu, 2945 }; 2946 2947 if (this_cpu.cpu > sched->max_cpu.cpu) 2948 sched->max_cpu = this_cpu; 2949 2950 if (evsel->handler != NULL) { 2951 sched_handler f = evsel->handler; 2952 2953 err = f(tool, event, evsel, sample, machine); 2954 } 2955 2956 return err; 2957} 2958 2959static int timehist_check_attr(struct perf_sched *sched, 2960 struct evlist *evlist) 2961{ 2962 struct evsel *evsel; 2963 struct evsel_runtime *er; 2964 2965 list_for_each_entry(evsel, &evlist->core.entries, core.node) { 2966 er = evsel__get_runtime(evsel); 2967 if (er == NULL) { 2968 pr_err("Failed to allocate memory for evsel runtime data\n"); 2969 return -1; 2970 } 2971 2972 if (sched->show_callchain && !evsel__has_callchain(evsel)) { 2973 pr_info("Samples do not have callchains.\n"); 2974 sched->show_callchain = 0; 2975 symbol_conf.use_callchain = 0; 2976 } 2977 } 2978 2979 return 0; 2980} 2981 2982static int perf_sched__timehist(struct perf_sched *sched) 2983{ 2984 struct evsel_str_handler handlers[] = { 2985 { "sched:sched_switch", timehist_sched_switch_event, }, 2986 { "sched:sched_wakeup", timehist_sched_wakeup_event, }, 2987 { "sched:sched_waking", timehist_sched_wakeup_event, }, 2988 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, 2989 }; 2990 const struct evsel_str_handler migrate_handlers[] = { 2991 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 2992 }; 2993 struct perf_data data = { 2994 .path = input_name, 2995 .mode = PERF_DATA_MODE_READ, 2996 .force = sched->force, 2997 }; 2998 2999 struct perf_session *session; 3000 struct evlist *evlist; 3001 int err = -1; 3002 3003 /* 3004 * event handlers for timehist option 3005 */ 3006 sched->tool.sample = perf_timehist__process_sample; 3007 sched->tool.mmap = perf_event__process_mmap; 3008 sched->tool.comm = perf_event__process_comm; 3009 sched->tool.exit = perf_event__process_exit; 3010 sched->tool.fork = perf_event__process_fork; 3011 sched->tool.lost = process_lost; 3012 sched->tool.attr = perf_event__process_attr; 3013 sched->tool.tracing_data = perf_event__process_tracing_data; 3014 sched->tool.build_id = perf_event__process_build_id; 3015 3016 sched->tool.ordered_events = true; 3017 sched->tool.ordering_requires_timestamps = true; 3018 3019 symbol_conf.use_callchain = sched->show_callchain; 3020 3021 session = perf_session__new(&data, &sched->tool); 3022 if (IS_ERR(session)) 3023 return PTR_ERR(session); 3024 3025 if (cpu_list) { 3026 err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 3027 if (err < 0) 3028 goto out; 3029 } 3030 3031 evlist = session->evlist; 3032 3033 symbol__init(&session->header.env); 3034 3035 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { 3036 pr_err("Invalid time string\n"); 3037 return -EINVAL; 3038 } 3039 3040 if (timehist_check_attr(sched, evlist) != 0) 3041 goto out; 3042 3043 setup_pager(); 3044 3045 /* prefer sched_waking if it is captured */ 3046 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) 3047 handlers[1].handler = timehist_sched_wakeup_ignore; 3048 3049 /* setup per-evsel handlers */ 3050 if (perf_session__set_tracepoints_handlers(session, handlers)) 3051 goto out; 3052 3053 /* sched_switch event at a minimum needs to exist */ 3054 if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) { 3055 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); 3056 goto out; 3057 } 3058 3059 if (sched->show_migrations && 3060 perf_session__set_tracepoints_handlers(session, migrate_handlers)) 3061 goto out; 3062 3063 /* pre-allocate struct for per-CPU idle stats */ 3064 sched->max_cpu.cpu = session->header.env.nr_cpus_online; 3065 if (sched->max_cpu.cpu == 0) 3066 sched->max_cpu.cpu = 4; 3067 if (init_idle_threads(sched->max_cpu.cpu)) 3068 goto out; 3069 3070 /* summary_only implies summary option, but don't overwrite summary if set */ 3071 if (sched->summary_only) 3072 sched->summary = sched->summary_only; 3073 3074 if (!sched->summary_only) 3075 timehist_header(sched); 3076 3077 err = perf_session__process_events(session); 3078 if (err) { 3079 pr_err("Failed to process events, error %d", err); 3080 goto out; 3081 } 3082 3083 sched->nr_events = evlist->stats.nr_events[0]; 3084 sched->nr_lost_events = evlist->stats.total_lost; 3085 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; 3086 3087 if (sched->summary) 3088 timehist_print_summary(sched, session); 3089 3090out: 3091 free_idle_threads(); 3092 perf_session__delete(session); 3093 3094 return err; 3095} 3096 3097 3098static void print_bad_events(struct perf_sched *sched) 3099{ 3100 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 3101 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 3102 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, 3103 sched->nr_unordered_timestamps, sched->nr_timestamps); 3104 } 3105 if (sched->nr_lost_events && sched->nr_events) { 3106 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 3107 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 3108 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 3109 } 3110 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 3111 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 3112 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 3113 sched->nr_context_switch_bugs, sched->nr_timestamps); 3114 if (sched->nr_lost_events) 3115 printf(" (due to lost events?)"); 3116 printf("\n"); 3117 } 3118} 3119 3120static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data) 3121{ 3122 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 3123 struct work_atoms *this; 3124 const char *comm = thread__comm_str(data->thread), *this_comm; 3125 bool leftmost = true; 3126 3127 while (*new) { 3128 int cmp; 3129 3130 this = container_of(*new, struct work_atoms, node); 3131 parent = *new; 3132 3133 this_comm = thread__comm_str(this->thread); 3134 cmp = strcmp(comm, this_comm); 3135 if (cmp > 0) { 3136 new = &((*new)->rb_left); 3137 } else if (cmp < 0) { 3138 new = &((*new)->rb_right); 3139 leftmost = false; 3140 } else { 3141 this->num_merged++; 3142 this->total_runtime += data->total_runtime; 3143 this->nb_atoms += data->nb_atoms; 3144 this->total_lat += data->total_lat; 3145 list_splice(&data->work_list, &this->work_list); 3146 if (this->max_lat < data->max_lat) { 3147 this->max_lat = data->max_lat; 3148 this->max_lat_start = data->max_lat_start; 3149 this->max_lat_end = data->max_lat_end; 3150 } 3151 zfree(&data); 3152 return; 3153 } 3154 } 3155 3156 data->num_merged++; 3157 rb_link_node(&data->node, parent, new); 3158 rb_insert_color_cached(&data->node, root, leftmost); 3159} 3160 3161static void perf_sched__merge_lat(struct perf_sched *sched) 3162{ 3163 struct work_atoms *data; 3164 struct rb_node *node; 3165 3166 if (sched->skip_merge) 3167 return; 3168 3169 while ((node = rb_first_cached(&sched->atom_root))) { 3170 rb_erase_cached(node, &sched->atom_root); 3171 data = rb_entry(node, struct work_atoms, node); 3172 __merge_work_atoms(&sched->merged_atom_root, data); 3173 } 3174} 3175 3176static int perf_sched__lat(struct perf_sched *sched) 3177{ 3178 struct rb_node *next; 3179 3180 setup_pager(); 3181 3182 if (perf_sched__read_events(sched)) 3183 return -1; 3184 3185 perf_sched__merge_lat(sched); 3186 perf_sched__sort_lat(sched); 3187 3188 printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3189 printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n"); 3190 printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3191 3192 next = rb_first_cached(&sched->sorted_atom_root); 3193 3194 while (next) { 3195 struct work_atoms *work_list; 3196 3197 work_list = rb_entry(next, struct work_atoms, node); 3198 output_lat_thread(sched, work_list); 3199 next = rb_next(next); 3200 thread__zput(work_list->thread); 3201 } 3202 3203 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 3204 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 3205 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count); 3206 3207 printf(" ---------------------------------------------------\n"); 3208 3209 print_bad_events(sched); 3210 printf("\n"); 3211 3212 return 0; 3213} 3214 3215static int setup_map_cpus(struct perf_sched *sched) 3216{ 3217 struct perf_cpu_map *map; 3218 3219 sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF); 3220 3221 if (sched->map.comp) { 3222 sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int)); 3223 if (!sched->map.comp_cpus) 3224 return -1; 3225 } 3226 3227 if (!sched->map.cpus_str) 3228 return 0; 3229 3230 map = perf_cpu_map__new(sched->map.cpus_str); 3231 if (!map) { 3232 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); 3233 return -1; 3234 } 3235 3236 sched->map.cpus = map; 3237 return 0; 3238} 3239 3240static int setup_color_pids(struct perf_sched *sched) 3241{ 3242 struct perf_thread_map *map; 3243 3244 if (!sched->map.color_pids_str) 3245 return 0; 3246 3247 map = thread_map__new_by_tid_str(sched->map.color_pids_str); 3248 if (!map) { 3249 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); 3250 return -1; 3251 } 3252 3253 sched->map.color_pids = map; 3254 return 0; 3255} 3256 3257static int setup_color_cpus(struct perf_sched *sched) 3258{ 3259 struct perf_cpu_map *map; 3260 3261 if (!sched->map.color_cpus_str) 3262 return 0; 3263 3264 map = perf_cpu_map__new(sched->map.color_cpus_str); 3265 if (!map) { 3266 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); 3267 return -1; 3268 } 3269 3270 sched->map.color_cpus = map; 3271 return 0; 3272} 3273 3274static int perf_sched__map(struct perf_sched *sched) 3275{ 3276 if (setup_map_cpus(sched)) 3277 return -1; 3278 3279 if (setup_color_pids(sched)) 3280 return -1; 3281 3282 if (setup_color_cpus(sched)) 3283 return -1; 3284 3285 setup_pager(); 3286 if (perf_sched__read_events(sched)) 3287 return -1; 3288 print_bad_events(sched); 3289 return 0; 3290} 3291 3292static int perf_sched__replay(struct perf_sched *sched) 3293{ 3294 unsigned long i; 3295 3296 calibrate_run_measurement_overhead(sched); 3297 calibrate_sleep_measurement_overhead(sched); 3298 3299 test_calibrations(sched); 3300 3301 if (perf_sched__read_events(sched)) 3302 return -1; 3303 3304 printf("nr_run_events: %ld\n", sched->nr_run_events); 3305 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); 3306 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); 3307 3308 if (sched->targetless_wakeups) 3309 printf("target-less wakeups: %ld\n", sched->targetless_wakeups); 3310 if (sched->multitarget_wakeups) 3311 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); 3312 if (sched->nr_run_events_optimized) 3313 printf("run atoms optimized: %ld\n", 3314 sched->nr_run_events_optimized); 3315 3316 print_task_traces(sched); 3317 add_cross_task_wakeups(sched); 3318 3319 create_tasks(sched); 3320 printf("------------------------------------------------------------\n"); 3321 for (i = 0; i < sched->replay_repeat; i++) 3322 run_one_test(sched); 3323 3324 return 0; 3325} 3326 3327static void setup_sorting(struct perf_sched *sched, const struct option *options, 3328 const char * const usage_msg[]) 3329{ 3330 char *tmp, *tok, *str = strdup(sched->sort_order); 3331 3332 for (tok = strtok_r(str, ", ", &tmp); 3333 tok; tok = strtok_r(NULL, ", ", &tmp)) { 3334 if (sort_dimension__add(tok, &sched->sort_list) < 0) { 3335 usage_with_options_msg(usage_msg, options, 3336 "Unknown --sort key: `%s'", tok); 3337 } 3338 } 3339 3340 free(str); 3341 3342 sort_dimension__add("pid", &sched->cmp_pid); 3343} 3344 3345static bool schedstat_events_exposed(void) 3346{ 3347 /* 3348 * Select "sched:sched_stat_wait" event to check 3349 * whether schedstat tracepoints are exposed. 3350 */ 3351 return IS_ERR(trace_event__tp_format("sched", "sched_stat_wait")) ? 3352 false : true; 3353} 3354 3355static int __cmd_record(int argc, const char **argv) 3356{ 3357 unsigned int rec_argc, i, j; 3358 const char **rec_argv; 3359 const char * const record_args[] = { 3360 "record", 3361 "-a", 3362 "-R", 3363 "-m", "1024", 3364 "-c", "1", 3365 "-e", "sched:sched_switch", 3366 "-e", "sched:sched_stat_runtime", 3367 "-e", "sched:sched_process_fork", 3368 "-e", "sched:sched_wakeup_new", 3369 "-e", "sched:sched_migrate_task", 3370 }; 3371 3372 /* 3373 * The tracepoints trace_sched_stat_{wait, sleep, iowait} 3374 * are not exposed to user if CONFIG_SCHEDSTATS is not set, 3375 * to prevent "perf sched record" execution failure, determine 3376 * whether to record schedstat events according to actual situation. 3377 */ 3378 const char * const schedstat_args[] = { 3379 "-e", "sched:sched_stat_wait", 3380 "-e", "sched:sched_stat_sleep", 3381 "-e", "sched:sched_stat_iowait", 3382 }; 3383 unsigned int schedstat_argc = schedstat_events_exposed() ? 3384 ARRAY_SIZE(schedstat_args) : 0; 3385 3386 struct tep_event *waking_event; 3387 3388 /* 3389 * +2 for either "-e", "sched:sched_wakeup" or 3390 * "-e", "sched:sched_waking" 3391 */ 3392 rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1; 3393 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 3394 3395 if (rec_argv == NULL) 3396 return -ENOMEM; 3397 3398 for (i = 0; i < ARRAY_SIZE(record_args); i++) 3399 rec_argv[i] = strdup(record_args[i]); 3400 3401 rec_argv[i++] = "-e"; 3402 waking_event = trace_event__tp_format("sched", "sched_waking"); 3403 if (!IS_ERR(waking_event)) 3404 rec_argv[i++] = strdup("sched:sched_waking"); 3405 else 3406 rec_argv[i++] = strdup("sched:sched_wakeup"); 3407 3408 for (j = 0; j < schedstat_argc; j++) 3409 rec_argv[i++] = strdup(schedstat_args[j]); 3410 3411 for (j = 1; j < (unsigned int)argc; j++, i++) 3412 rec_argv[i] = argv[j]; 3413 3414 BUG_ON(i != rec_argc); 3415 3416 return cmd_record(i, rec_argv); 3417} 3418 3419int cmd_sched(int argc, const char **argv) 3420{ 3421 static const char default_sort_order[] = "avg, max, switch, runtime"; 3422 struct perf_sched sched = { 3423 .tool = { 3424 .sample = perf_sched__process_tracepoint_sample, 3425 .comm = perf_sched__process_comm, 3426 .namespaces = perf_event__process_namespaces, 3427 .lost = perf_event__process_lost, 3428 .fork = perf_sched__process_fork_event, 3429 .ordered_events = true, 3430 }, 3431 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), 3432 .sort_list = LIST_HEAD_INIT(sched.sort_list), 3433 .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, 3434 .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, 3435 .sort_order = default_sort_order, 3436 .replay_repeat = 10, 3437 .profile_cpu = -1, 3438 .next_shortname1 = 'A', 3439 .next_shortname2 = '0', 3440 .skip_merge = 0, 3441 .show_callchain = 1, 3442 .max_stack = 5, 3443 }; 3444 const struct option sched_options[] = { 3445 OPT_STRING('i', "input", &input_name, "file", 3446 "input file name"), 3447 OPT_INCR('v', "verbose", &verbose, 3448 "be more verbose (show symbol address, etc)"), 3449 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 3450 "dump raw trace in ASCII"), 3451 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 3452 OPT_END() 3453 }; 3454 const struct option latency_options[] = { 3455 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 3456 "sort by key(s): runtime, switch, avg, max"), 3457 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 3458 "CPU to profile on"), 3459 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 3460 "latency stats per pid instead of per comm"), 3461 OPT_PARENT(sched_options) 3462 }; 3463 const struct option replay_options[] = { 3464 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 3465 "repeat the workload replay N times (-1: infinite)"), 3466 OPT_PARENT(sched_options) 3467 }; 3468 const struct option map_options[] = { 3469 OPT_BOOLEAN(0, "compact", &sched.map.comp, 3470 "map output in compact mode"), 3471 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", 3472 "highlight given pids in map"), 3473 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", 3474 "highlight given CPUs in map"), 3475 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 3476 "display given CPUs in map"), 3477 OPT_PARENT(sched_options) 3478 }; 3479 const struct option timehist_options[] = { 3480 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 3481 "file", "vmlinux pathname"), 3482 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 3483 "file", "kallsyms pathname"), 3484 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, 3485 "Display call chains if present (default on)"), 3486 OPT_UINTEGER(0, "max-stack", &sched.max_stack, 3487 "Maximum number of functions to display backtrace."), 3488 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 3489 "Look for files with symbols relative to this directory"), 3490 OPT_BOOLEAN('s', "summary", &sched.summary_only, 3491 "Show only syscall summary with statistics"), 3492 OPT_BOOLEAN('S', "with-summary", &sched.summary, 3493 "Show all syscalls and summary with statistics"), 3494 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), 3495 OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"), 3496 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), 3497 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), 3498 OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), 3499 OPT_STRING(0, "time", &sched.time_str, "str", 3500 "Time span for analysis (start,stop)"), 3501 OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), 3502 OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]", 3503 "analyze events only for given process id(s)"), 3504 OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]", 3505 "analyze events only for given thread id(s)"), 3506 OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 3507 OPT_PARENT(sched_options) 3508 }; 3509 3510 const char * const latency_usage[] = { 3511 "perf sched latency [<options>]", 3512 NULL 3513 }; 3514 const char * const replay_usage[] = { 3515 "perf sched replay [<options>]", 3516 NULL 3517 }; 3518 const char * const map_usage[] = { 3519 "perf sched map [<options>]", 3520 NULL 3521 }; 3522 const char * const timehist_usage[] = { 3523 "perf sched timehist [<options>]", 3524 NULL 3525 }; 3526 const char *const sched_subcommands[] = { "record", "latency", "map", 3527 "replay", "script", 3528 "timehist", NULL }; 3529 const char *sched_usage[] = { 3530 NULL, 3531 NULL 3532 }; 3533 struct trace_sched_handler lat_ops = { 3534 .wakeup_event = latency_wakeup_event, 3535 .switch_event = latency_switch_event, 3536 .runtime_event = latency_runtime_event, 3537 .migrate_task_event = latency_migrate_task_event, 3538 }; 3539 struct trace_sched_handler map_ops = { 3540 .switch_event = map_switch_event, 3541 }; 3542 struct trace_sched_handler replay_ops = { 3543 .wakeup_event = replay_wakeup_event, 3544 .switch_event = replay_switch_event, 3545 .fork_event = replay_fork_event, 3546 }; 3547 unsigned int i; 3548 int ret; 3549 3550 for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) 3551 sched.curr_pid[i] = -1; 3552 3553 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, 3554 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3555 if (!argc) 3556 usage_with_options(sched_usage, sched_options); 3557 3558 /* 3559 * Aliased to 'perf script' for now: 3560 */ 3561 if (!strcmp(argv[0], "script")) 3562 return cmd_script(argc, argv); 3563 3564 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 3565 return __cmd_record(argc, argv); 3566 } else if (!strncmp(argv[0], "lat", 3)) { 3567 sched.tp_handler = &lat_ops; 3568 if (argc > 1) { 3569 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 3570 if (argc) 3571 usage_with_options(latency_usage, latency_options); 3572 } 3573 setup_sorting(&sched, latency_options, latency_usage); 3574 return perf_sched__lat(&sched); 3575 } else if (!strcmp(argv[0], "map")) { 3576 if (argc) { 3577 argc = parse_options(argc, argv, map_options, map_usage, 0); 3578 if (argc) 3579 usage_with_options(map_usage, map_options); 3580 } 3581 sched.tp_handler = &map_ops; 3582 setup_sorting(&sched, latency_options, latency_usage); 3583 return perf_sched__map(&sched); 3584 } else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) { 3585 sched.tp_handler = &replay_ops; 3586 if (argc) { 3587 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 3588 if (argc) 3589 usage_with_options(replay_usage, replay_options); 3590 } 3591 return perf_sched__replay(&sched); 3592 } else if (!strcmp(argv[0], "timehist")) { 3593 if (argc) { 3594 argc = parse_options(argc, argv, timehist_options, 3595 timehist_usage, 0); 3596 if (argc) 3597 usage_with_options(timehist_usage, timehist_options); 3598 } 3599 if ((sched.show_wakeups || sched.show_next) && 3600 sched.summary_only) { 3601 pr_err(" Error: -s and -[n|w] are mutually exclusive.\n"); 3602 parse_options_usage(timehist_usage, timehist_options, "s", true); 3603 if (sched.show_wakeups) 3604 parse_options_usage(NULL, timehist_options, "w", true); 3605 if (sched.show_next) 3606 parse_options_usage(NULL, timehist_options, "n", true); 3607 return -EINVAL; 3608 } 3609 ret = symbol__validate_sym_arguments(); 3610 if (ret) 3611 return ret; 3612 3613 return perf_sched__timehist(&sched); 3614 } else { 3615 usage_with_options(sched_usage, sched_options); 3616 } 3617 3618 return 0; 3619}