cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

builtin-ftrace.c (29242B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * builtin-ftrace.c
      4 *
      5 * Copyright (c) 2013  LG Electronics,  Namhyung Kim <namhyung@kernel.org>
      6 * Copyright (c) 2020  Changbin Du <changbin.du@gmail.com>, significant enhancement.
      7 */
      8
      9#include "builtin.h"
     10
     11#include <errno.h>
     12#include <unistd.h>
     13#include <signal.h>
     14#include <stdlib.h>
     15#include <fcntl.h>
     16#include <math.h>
     17#include <poll.h>
     18#include <ctype.h>
     19#include <linux/capability.h>
     20#include <linux/string.h>
     21
     22#include "debug.h"
     23#include <subcmd/pager.h>
     24#include <subcmd/parse-options.h>
     25#include <api/fs/tracing_path.h>
     26#include "evlist.h"
     27#include "target.h"
     28#include "cpumap.h"
     29#include "thread_map.h"
     30#include "strfilter.h"
     31#include "util/cap.h"
     32#include "util/config.h"
     33#include "util/ftrace.h"
     34#include "util/units.h"
     35#include "util/parse-sublevel-options.h"
     36
     37#define DEFAULT_TRACER  "function_graph"
     38
     39static volatile int workload_exec_errno;
     40static bool done;
     41
     42static void sig_handler(int sig __maybe_unused)
     43{
     44	done = true;
     45}
     46
     47/*
     48 * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
     49 * we asked by setting its exec_error to the function below,
     50 * ftrace__workload_exec_failed_signal.
     51 *
     52 * XXX We need to handle this more appropriately, emitting an error, etc.
     53 */
     54static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
     55						siginfo_t *info __maybe_unused,
     56						void *ucontext __maybe_unused)
     57{
     58	workload_exec_errno = info->si_value.sival_int;
     59	done = true;
     60}
     61
     62static int __write_tracing_file(const char *name, const char *val, bool append)
     63{
     64	char *file;
     65	int fd, ret = -1;
     66	ssize_t size = strlen(val);
     67	int flags = O_WRONLY;
     68	char errbuf[512];
     69	char *val_copy;
     70
     71	file = get_tracing_file(name);
     72	if (!file) {
     73		pr_debug("cannot get tracing file: %s\n", name);
     74		return -1;
     75	}
     76
     77	if (append)
     78		flags |= O_APPEND;
     79	else
     80		flags |= O_TRUNC;
     81
     82	fd = open(file, flags);
     83	if (fd < 0) {
     84		pr_debug("cannot open tracing file: %s: %s\n",
     85			 name, str_error_r(errno, errbuf, sizeof(errbuf)));
     86		goto out;
     87	}
     88
     89	/*
     90	 * Copy the original value and append a '\n'. Without this,
     91	 * the kernel can hide possible errors.
     92	 */
     93	val_copy = strdup(val);
     94	if (!val_copy)
     95		goto out_close;
     96	val_copy[size] = '\n';
     97
     98	if (write(fd, val_copy, size + 1) == size + 1)
     99		ret = 0;
    100	else
    101		pr_debug("write '%s' to tracing/%s failed: %s\n",
    102			 val, name, str_error_r(errno, errbuf, sizeof(errbuf)));
    103
    104	free(val_copy);
    105out_close:
    106	close(fd);
    107out:
    108	put_tracing_file(file);
    109	return ret;
    110}
    111
    112static int write_tracing_file(const char *name, const char *val)
    113{
    114	return __write_tracing_file(name, val, false);
    115}
    116
    117static int append_tracing_file(const char *name, const char *val)
    118{
    119	return __write_tracing_file(name, val, true);
    120}
    121
    122static int read_tracing_file_to_stdout(const char *name)
    123{
    124	char buf[4096];
    125	char *file;
    126	int fd;
    127	int ret = -1;
    128
    129	file = get_tracing_file(name);
    130	if (!file) {
    131		pr_debug("cannot get tracing file: %s\n", name);
    132		return -1;
    133	}
    134
    135	fd = open(file, O_RDONLY);
    136	if (fd < 0) {
    137		pr_debug("cannot open tracing file: %s: %s\n",
    138			 name, str_error_r(errno, buf, sizeof(buf)));
    139		goto out;
    140	}
    141
    142	/* read contents to stdout */
    143	while (true) {
    144		int n = read(fd, buf, sizeof(buf));
    145		if (n == 0)
    146			break;
    147		else if (n < 0)
    148			goto out_close;
    149
    150		if (fwrite(buf, n, 1, stdout) != 1)
    151			goto out_close;
    152	}
    153	ret = 0;
    154
    155out_close:
    156	close(fd);
    157out:
    158	put_tracing_file(file);
    159	return ret;
    160}
    161
    162static int read_tracing_file_by_line(const char *name,
    163				     void (*cb)(char *str, void *arg),
    164				     void *cb_arg)
    165{
    166	char *line = NULL;
    167	size_t len = 0;
    168	char *file;
    169	FILE *fp;
    170
    171	file = get_tracing_file(name);
    172	if (!file) {
    173		pr_debug("cannot get tracing file: %s\n", name);
    174		return -1;
    175	}
    176
    177	fp = fopen(file, "r");
    178	if (fp == NULL) {
    179		pr_debug("cannot open tracing file: %s\n", name);
    180		put_tracing_file(file);
    181		return -1;
    182	}
    183
    184	while (getline(&line, &len, fp) != -1) {
    185		cb(line, cb_arg);
    186	}
    187
    188	if (line)
    189		free(line);
    190
    191	fclose(fp);
    192	put_tracing_file(file);
    193	return 0;
    194}
    195
    196static int write_tracing_file_int(const char *name, int value)
    197{
    198	char buf[16];
    199
    200	snprintf(buf, sizeof(buf), "%d", value);
    201	if (write_tracing_file(name, buf) < 0)
    202		return -1;
    203
    204	return 0;
    205}
    206
    207static int write_tracing_option_file(const char *name, const char *val)
    208{
    209	char *file;
    210	int ret;
    211
    212	if (asprintf(&file, "options/%s", name) < 0)
    213		return -1;
    214
    215	ret = __write_tracing_file(file, val, false);
    216	free(file);
    217	return ret;
    218}
    219
    220static int reset_tracing_cpu(void);
    221static void reset_tracing_filters(void);
    222
    223static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused)
    224{
    225	write_tracing_option_file("function-fork", "0");
    226	write_tracing_option_file("func_stack_trace", "0");
    227	write_tracing_option_file("sleep-time", "1");
    228	write_tracing_option_file("funcgraph-irqs", "1");
    229	write_tracing_option_file("funcgraph-proc", "0");
    230	write_tracing_option_file("funcgraph-abstime", "0");
    231	write_tracing_option_file("latency-format", "0");
    232	write_tracing_option_file("irq-info", "0");
    233}
    234
    235static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
    236{
    237	if (write_tracing_file("tracing_on", "0") < 0)
    238		return -1;
    239
    240	if (write_tracing_file("current_tracer", "nop") < 0)
    241		return -1;
    242
    243	if (write_tracing_file("set_ftrace_pid", " ") < 0)
    244		return -1;
    245
    246	if (reset_tracing_cpu() < 0)
    247		return -1;
    248
    249	if (write_tracing_file("max_graph_depth", "0") < 0)
    250		return -1;
    251
    252	if (write_tracing_file("tracing_thresh", "0") < 0)
    253		return -1;
    254
    255	reset_tracing_filters();
    256	reset_tracing_options(ftrace);
    257	return 0;
    258}
    259
    260static int set_tracing_pid(struct perf_ftrace *ftrace)
    261{
    262	int i;
    263	char buf[16];
    264
    265	if (target__has_cpu(&ftrace->target))
    266		return 0;
    267
    268	for (i = 0; i < perf_thread_map__nr(ftrace->evlist->core.threads); i++) {
    269		scnprintf(buf, sizeof(buf), "%d",
    270			  perf_thread_map__pid(ftrace->evlist->core.threads, i));
    271		if (append_tracing_file("set_ftrace_pid", buf) < 0)
    272			return -1;
    273	}
    274	return 0;
    275}
    276
    277static int set_tracing_cpumask(struct perf_cpu_map *cpumap)
    278{
    279	char *cpumask;
    280	size_t mask_size;
    281	int ret;
    282	int last_cpu;
    283
    284	last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu;
    285	mask_size = last_cpu / 4 + 2; /* one more byte for EOS */
    286	mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
    287
    288	cpumask = malloc(mask_size);
    289	if (cpumask == NULL) {
    290		pr_debug("failed to allocate cpu mask\n");
    291		return -1;
    292	}
    293
    294	cpu_map__snprint_mask(cpumap, cpumask, mask_size);
    295
    296	ret = write_tracing_file("tracing_cpumask", cpumask);
    297
    298	free(cpumask);
    299	return ret;
    300}
    301
    302static int set_tracing_cpu(struct perf_ftrace *ftrace)
    303{
    304	struct perf_cpu_map *cpumap = ftrace->evlist->core.user_requested_cpus;
    305
    306	if (!target__has_cpu(&ftrace->target))
    307		return 0;
    308
    309	return set_tracing_cpumask(cpumap);
    310}
    311
    312static int set_tracing_func_stack_trace(struct perf_ftrace *ftrace)
    313{
    314	if (!ftrace->func_stack_trace)
    315		return 0;
    316
    317	if (write_tracing_option_file("func_stack_trace", "1") < 0)
    318		return -1;
    319
    320	return 0;
    321}
    322
    323static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace)
    324{
    325	if (!ftrace->func_irq_info)
    326		return 0;
    327
    328	if (write_tracing_option_file("irq-info", "1") < 0)
    329		return -1;
    330
    331	return 0;
    332}
    333
    334static int reset_tracing_cpu(void)
    335{
    336	struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL);
    337	int ret;
    338
    339	ret = set_tracing_cpumask(cpumap);
    340	perf_cpu_map__put(cpumap);
    341	return ret;
    342}
    343
    344static int __set_tracing_filter(const char *filter_file, struct list_head *funcs)
    345{
    346	struct filter_entry *pos;
    347
    348	list_for_each_entry(pos, funcs, list) {
    349		if (append_tracing_file(filter_file, pos->name) < 0)
    350			return -1;
    351	}
    352
    353	return 0;
    354}
    355
    356static int set_tracing_filters(struct perf_ftrace *ftrace)
    357{
    358	int ret;
    359
    360	ret = __set_tracing_filter("set_ftrace_filter", &ftrace->filters);
    361	if (ret < 0)
    362		return ret;
    363
    364	ret = __set_tracing_filter("set_ftrace_notrace", &ftrace->notrace);
    365	if (ret < 0)
    366		return ret;
    367
    368	ret = __set_tracing_filter("set_graph_function", &ftrace->graph_funcs);
    369	if (ret < 0)
    370		return ret;
    371
    372	/* old kernels do not have this filter */
    373	__set_tracing_filter("set_graph_notrace", &ftrace->nograph_funcs);
    374
    375	return ret;
    376}
    377
    378static void reset_tracing_filters(void)
    379{
    380	write_tracing_file("set_ftrace_filter", " ");
    381	write_tracing_file("set_ftrace_notrace", " ");
    382	write_tracing_file("set_graph_function", " ");
    383	write_tracing_file("set_graph_notrace", " ");
    384}
    385
    386static int set_tracing_depth(struct perf_ftrace *ftrace)
    387{
    388	if (ftrace->graph_depth == 0)
    389		return 0;
    390
    391	if (ftrace->graph_depth < 0) {
    392		pr_err("invalid graph depth: %d\n", ftrace->graph_depth);
    393		return -1;
    394	}
    395
    396	if (write_tracing_file_int("max_graph_depth", ftrace->graph_depth) < 0)
    397		return -1;
    398
    399	return 0;
    400}
    401
    402static int set_tracing_percpu_buffer_size(struct perf_ftrace *ftrace)
    403{
    404	int ret;
    405
    406	if (ftrace->percpu_buffer_size == 0)
    407		return 0;
    408
    409	ret = write_tracing_file_int("buffer_size_kb",
    410				     ftrace->percpu_buffer_size / 1024);
    411	if (ret < 0)
    412		return ret;
    413
    414	return 0;
    415}
    416
    417static int set_tracing_trace_inherit(struct perf_ftrace *ftrace)
    418{
    419	if (!ftrace->inherit)
    420		return 0;
    421
    422	if (write_tracing_option_file("function-fork", "1") < 0)
    423		return -1;
    424
    425	return 0;
    426}
    427
    428static int set_tracing_sleep_time(struct perf_ftrace *ftrace)
    429{
    430	if (!ftrace->graph_nosleep_time)
    431		return 0;
    432
    433	if (write_tracing_option_file("sleep-time", "0") < 0)
    434		return -1;
    435
    436	return 0;
    437}
    438
    439static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace)
    440{
    441	if (!ftrace->graph_noirqs)
    442		return 0;
    443
    444	if (write_tracing_option_file("funcgraph-irqs", "0") < 0)
    445		return -1;
    446
    447	return 0;
    448}
    449
    450static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace)
    451{
    452	if (!ftrace->graph_verbose)
    453		return 0;
    454
    455	if (write_tracing_option_file("funcgraph-proc", "1") < 0)
    456		return -1;
    457
    458	if (write_tracing_option_file("funcgraph-abstime", "1") < 0)
    459		return -1;
    460
    461	if (write_tracing_option_file("latency-format", "1") < 0)
    462		return -1;
    463
    464	return 0;
    465}
    466
    467static int set_tracing_thresh(struct perf_ftrace *ftrace)
    468{
    469	int ret;
    470
    471	if (ftrace->graph_thresh == 0)
    472		return 0;
    473
    474	ret = write_tracing_file_int("tracing_thresh", ftrace->graph_thresh);
    475	if (ret < 0)
    476		return ret;
    477
    478	return 0;
    479}
    480
    481static int set_tracing_options(struct perf_ftrace *ftrace)
    482{
    483	if (set_tracing_pid(ftrace) < 0) {
    484		pr_err("failed to set ftrace pid\n");
    485		return -1;
    486	}
    487
    488	if (set_tracing_cpu(ftrace) < 0) {
    489		pr_err("failed to set tracing cpumask\n");
    490		return -1;
    491	}
    492
    493	if (set_tracing_func_stack_trace(ftrace) < 0) {
    494		pr_err("failed to set tracing option func_stack_trace\n");
    495		return -1;
    496	}
    497
    498	if (set_tracing_func_irqinfo(ftrace) < 0) {
    499		pr_err("failed to set tracing option irq-info\n");
    500		return -1;
    501	}
    502
    503	if (set_tracing_filters(ftrace) < 0) {
    504		pr_err("failed to set tracing filters\n");
    505		return -1;
    506	}
    507
    508	if (set_tracing_depth(ftrace) < 0) {
    509		pr_err("failed to set graph depth\n");
    510		return -1;
    511	}
    512
    513	if (set_tracing_percpu_buffer_size(ftrace) < 0) {
    514		pr_err("failed to set tracing per-cpu buffer size\n");
    515		return -1;
    516	}
    517
    518	if (set_tracing_trace_inherit(ftrace) < 0) {
    519		pr_err("failed to set tracing option function-fork\n");
    520		return -1;
    521	}
    522
    523	if (set_tracing_sleep_time(ftrace) < 0) {
    524		pr_err("failed to set tracing option sleep-time\n");
    525		return -1;
    526	}
    527
    528	if (set_tracing_funcgraph_irqs(ftrace) < 0) {
    529		pr_err("failed to set tracing option funcgraph-irqs\n");
    530		return -1;
    531	}
    532
    533	if (set_tracing_funcgraph_verbose(ftrace) < 0) {
    534		pr_err("failed to set tracing option funcgraph-proc/funcgraph-abstime\n");
    535		return -1;
    536	}
    537
    538	if (set_tracing_thresh(ftrace) < 0) {
    539		pr_err("failed to set tracing thresh\n");
    540		return -1;
    541	}
    542
    543	return 0;
    544}
    545
    546static void select_tracer(struct perf_ftrace *ftrace)
    547{
    548	bool graph = !list_empty(&ftrace->graph_funcs) ||
    549		     !list_empty(&ftrace->nograph_funcs);
    550	bool func = !list_empty(&ftrace->filters) ||
    551		    !list_empty(&ftrace->notrace);
    552
    553	/* The function_graph has priority over function tracer. */
    554	if (graph)
    555		ftrace->tracer = "function_graph";
    556	else if (func)
    557		ftrace->tracer = "function";
    558	/* Otherwise, the default tracer is used. */
    559
    560	pr_debug("%s tracer is used\n", ftrace->tracer);
    561}
    562
    563static int __cmd_ftrace(struct perf_ftrace *ftrace)
    564{
    565	char *trace_file;
    566	int trace_fd;
    567	char buf[4096];
    568	struct pollfd pollfd = {
    569		.events = POLLIN,
    570	};
    571
    572	if (!(perf_cap__capable(CAP_PERFMON) ||
    573	      perf_cap__capable(CAP_SYS_ADMIN))) {
    574		pr_err("ftrace only works for %s!\n",
    575#ifdef HAVE_LIBCAP_SUPPORT
    576		"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
    577#else
    578		"root"
    579#endif
    580		);
    581		return -1;
    582	}
    583
    584	select_tracer(ftrace);
    585
    586	if (reset_tracing_files(ftrace) < 0) {
    587		pr_err("failed to reset ftrace\n");
    588		goto out;
    589	}
    590
    591	/* reset ftrace buffer */
    592	if (write_tracing_file("trace", "0") < 0)
    593		goto out;
    594
    595	if (set_tracing_options(ftrace) < 0)
    596		goto out_reset;
    597
    598	if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
    599		pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
    600		goto out_reset;
    601	}
    602
    603	setup_pager();
    604
    605	trace_file = get_tracing_file("trace_pipe");
    606	if (!trace_file) {
    607		pr_err("failed to open trace_pipe\n");
    608		goto out_reset;
    609	}
    610
    611	trace_fd = open(trace_file, O_RDONLY);
    612
    613	put_tracing_file(trace_file);
    614
    615	if (trace_fd < 0) {
    616		pr_err("failed to open trace_pipe\n");
    617		goto out_reset;
    618	}
    619
    620	fcntl(trace_fd, F_SETFL, O_NONBLOCK);
    621	pollfd.fd = trace_fd;
    622
    623	/* display column headers */
    624	read_tracing_file_to_stdout("trace");
    625
    626	if (!ftrace->initial_delay) {
    627		if (write_tracing_file("tracing_on", "1") < 0) {
    628			pr_err("can't enable tracing\n");
    629			goto out_close_fd;
    630		}
    631	}
    632
    633	evlist__start_workload(ftrace->evlist);
    634
    635	if (ftrace->initial_delay) {
    636		usleep(ftrace->initial_delay * 1000);
    637		if (write_tracing_file("tracing_on", "1") < 0) {
    638			pr_err("can't enable tracing\n");
    639			goto out_close_fd;
    640		}
    641	}
    642
    643	while (!done) {
    644		if (poll(&pollfd, 1, -1) < 0)
    645			break;
    646
    647		if (pollfd.revents & POLLIN) {
    648			int n = read(trace_fd, buf, sizeof(buf));
    649			if (n < 0)
    650				break;
    651			if (fwrite(buf, n, 1, stdout) != 1)
    652				break;
    653		}
    654	}
    655
    656	write_tracing_file("tracing_on", "0");
    657
    658	if (workload_exec_errno) {
    659		const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
    660		/* flush stdout first so below error msg appears at the end. */
    661		fflush(stdout);
    662		pr_err("workload failed: %s\n", emsg);
    663		goto out_close_fd;
    664	}
    665
    666	/* read remaining buffer contents */
    667	while (true) {
    668		int n = read(trace_fd, buf, sizeof(buf));
    669		if (n <= 0)
    670			break;
    671		if (fwrite(buf, n, 1, stdout) != 1)
    672			break;
    673	}
    674
    675out_close_fd:
    676	close(trace_fd);
    677out_reset:
    678	reset_tracing_files(ftrace);
    679out:
    680	return (done && !workload_exec_errno) ? 0 : -1;
    681}
    682
    683static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
    684			   bool use_nsec)
    685{
    686	char *p, *q;
    687	char *unit;
    688	double num;
    689	int i;
    690
    691	/* ensure NUL termination */
    692	buf[len] = '\0';
    693
    694	/* handle data line by line */
    695	for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
    696		*q = '\0';
    697		/* move it to the line buffer */
    698		strcat(linebuf, p);
    699
    700		/*
    701		 * parse trace output to get function duration like in
    702		 *
    703		 * # tracer: function_graph
    704		 * #
    705		 * # CPU  DURATION                  FUNCTION CALLS
    706		 * # |     |   |                     |   |   |   |
    707		 *  1) + 10.291 us   |  do_filp_open();
    708		 *  1)   4.889 us    |  do_filp_open();
    709		 *  1)   6.086 us    |  do_filp_open();
    710		 *
    711		 */
    712		if (linebuf[0] == '#')
    713			goto next;
    714
    715		/* ignore CPU */
    716		p = strchr(linebuf, ')');
    717		if (p == NULL)
    718			p = linebuf;
    719
    720		while (*p && !isdigit(*p) && (*p != '|'))
    721			p++;
    722
    723		/* no duration */
    724		if (*p == '\0' || *p == '|')
    725			goto next;
    726
    727		num = strtod(p, &unit);
    728		if (!unit || strncmp(unit, " us", 3))
    729			goto next;
    730
    731		if (use_nsec)
    732			num *= 1000;
    733
    734		i = log2(num);
    735		if (i < 0)
    736			i = 0;
    737		if (i >= NUM_BUCKET)
    738			i = NUM_BUCKET - 1;
    739
    740		buckets[i]++;
    741
    742next:
    743		/* empty the line buffer for the next output  */
    744		linebuf[0] = '\0';
    745	}
    746
    747	/* preserve any remaining output (before newline) */
    748	strcat(linebuf, p);
    749}
    750
    751static void display_histogram(int buckets[], bool use_nsec)
    752{
    753	int i;
    754	int total = 0;
    755	int bar_total = 46;  /* to fit in 80 column */
    756	char bar[] = "###############################################";
    757	int bar_len;
    758
    759	for (i = 0; i < NUM_BUCKET; i++)
    760		total += buckets[i];
    761
    762	if (total == 0) {
    763		printf("No data found\n");
    764		return;
    765	}
    766
    767	printf("# %14s | %10s | %-*s |\n",
    768	       "  DURATION    ", "COUNT", bar_total, "GRAPH");
    769
    770	bar_len = buckets[0] * bar_total / total;
    771	printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
    772	       0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
    773
    774	for (i = 1; i < NUM_BUCKET - 1; i++) {
    775		int start = (1 << (i - 1));
    776		int stop = 1 << i;
    777		const char *unit = use_nsec ? "ns" : "us";
    778
    779		if (start >= 1024) {
    780			start >>= 10;
    781			stop >>= 10;
    782			unit = use_nsec ? "us" : "ms";
    783		}
    784		bar_len = buckets[i] * bar_total / total;
    785		printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
    786		       start, stop, unit, buckets[i], bar_len, bar,
    787		       bar_total - bar_len, "");
    788	}
    789
    790	bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
    791	printf("  %4d - %-4s %s | %10d | %.*s%*s |\n",
    792	       1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
    793	       bar_len, bar, bar_total - bar_len, "");
    794
    795}
    796
    797static int prepare_func_latency(struct perf_ftrace *ftrace)
    798{
    799	char *trace_file;
    800	int fd;
    801
    802	if (ftrace->target.use_bpf)
    803		return perf_ftrace__latency_prepare_bpf(ftrace);
    804
    805	if (reset_tracing_files(ftrace) < 0) {
    806		pr_err("failed to reset ftrace\n");
    807		return -1;
    808	}
    809
    810	/* reset ftrace buffer */
    811	if (write_tracing_file("trace", "0") < 0)
    812		return -1;
    813
    814	if (set_tracing_options(ftrace) < 0)
    815		return -1;
    816
    817	/* force to use the function_graph tracer to track duration */
    818	if (write_tracing_file("current_tracer", "function_graph") < 0) {
    819		pr_err("failed to set current_tracer to function_graph\n");
    820		return -1;
    821	}
    822
    823	trace_file = get_tracing_file("trace_pipe");
    824	if (!trace_file) {
    825		pr_err("failed to open trace_pipe\n");
    826		return -1;
    827	}
    828
    829	fd = open(trace_file, O_RDONLY);
    830	if (fd < 0)
    831		pr_err("failed to open trace_pipe\n");
    832
    833	put_tracing_file(trace_file);
    834	return fd;
    835}
    836
    837static int start_func_latency(struct perf_ftrace *ftrace)
    838{
    839	if (ftrace->target.use_bpf)
    840		return perf_ftrace__latency_start_bpf(ftrace);
    841
    842	if (write_tracing_file("tracing_on", "1") < 0) {
    843		pr_err("can't enable tracing\n");
    844		return -1;
    845	}
    846
    847	return 0;
    848}
    849
    850static int stop_func_latency(struct perf_ftrace *ftrace)
    851{
    852	if (ftrace->target.use_bpf)
    853		return perf_ftrace__latency_stop_bpf(ftrace);
    854
    855	write_tracing_file("tracing_on", "0");
    856	return 0;
    857}
    858
    859static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
    860{
    861	if (ftrace->target.use_bpf)
    862		return perf_ftrace__latency_read_bpf(ftrace, buckets);
    863
    864	return 0;
    865}
    866
    867static int cleanup_func_latency(struct perf_ftrace *ftrace)
    868{
    869	if (ftrace->target.use_bpf)
    870		return perf_ftrace__latency_cleanup_bpf(ftrace);
    871
    872	reset_tracing_files(ftrace);
    873	return 0;
    874}
    875
    876static int __cmd_latency(struct perf_ftrace *ftrace)
    877{
    878	int trace_fd;
    879	char buf[4096];
    880	char line[256];
    881	struct pollfd pollfd = {
    882		.events = POLLIN,
    883	};
    884	int buckets[NUM_BUCKET] = { };
    885
    886	if (!(perf_cap__capable(CAP_PERFMON) ||
    887	      perf_cap__capable(CAP_SYS_ADMIN))) {
    888		pr_err("ftrace only works for %s!\n",
    889#ifdef HAVE_LIBCAP_SUPPORT
    890		"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
    891#else
    892		"root"
    893#endif
    894		);
    895		return -1;
    896	}
    897
    898	trace_fd = prepare_func_latency(ftrace);
    899	if (trace_fd < 0)
    900		goto out;
    901
    902	fcntl(trace_fd, F_SETFL, O_NONBLOCK);
    903	pollfd.fd = trace_fd;
    904
    905	if (start_func_latency(ftrace) < 0)
    906		goto out;
    907
    908	evlist__start_workload(ftrace->evlist);
    909
    910	line[0] = '\0';
    911	while (!done) {
    912		if (poll(&pollfd, 1, -1) < 0)
    913			break;
    914
    915		if (pollfd.revents & POLLIN) {
    916			int n = read(trace_fd, buf, sizeof(buf) - 1);
    917			if (n < 0)
    918				break;
    919
    920			make_histogram(buckets, buf, n, line, ftrace->use_nsec);
    921		}
    922	}
    923
    924	stop_func_latency(ftrace);
    925
    926	if (workload_exec_errno) {
    927		const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
    928		pr_err("workload failed: %s\n", emsg);
    929		goto out;
    930	}
    931
    932	/* read remaining buffer contents */
    933	while (!ftrace->target.use_bpf) {
    934		int n = read(trace_fd, buf, sizeof(buf) - 1);
    935		if (n <= 0)
    936			break;
    937		make_histogram(buckets, buf, n, line, ftrace->use_nsec);
    938	}
    939
    940	read_func_latency(ftrace, buckets);
    941
    942	display_histogram(buckets, ftrace->use_nsec);
    943
    944out:
    945	close(trace_fd);
    946	cleanup_func_latency(ftrace);
    947
    948	return (done && !workload_exec_errno) ? 0 : -1;
    949}
    950
    951static int perf_ftrace_config(const char *var, const char *value, void *cb)
    952{
    953	struct perf_ftrace *ftrace = cb;
    954
    955	if (!strstarts(var, "ftrace."))
    956		return 0;
    957
    958	if (strcmp(var, "ftrace.tracer"))
    959		return -1;
    960
    961	if (!strcmp(value, "function_graph") ||
    962	    !strcmp(value, "function")) {
    963		ftrace->tracer = value;
    964		return 0;
    965	}
    966
    967	pr_err("Please select \"function_graph\" (default) or \"function\"\n");
    968	return -1;
    969}
    970
    971static void list_function_cb(char *str, void *arg)
    972{
    973	struct strfilter *filter = (struct strfilter *)arg;
    974
    975	if (strfilter__compare(filter, str))
    976		printf("%s", str);
    977}
    978
    979static int opt_list_avail_functions(const struct option *opt __maybe_unused,
    980				    const char *str, int unset)
    981{
    982	struct strfilter *filter;
    983	const char *err = NULL;
    984	int ret;
    985
    986	if (unset || !str)
    987		return -1;
    988
    989	filter = strfilter__new(str, &err);
    990	if (!filter)
    991		return err ? -EINVAL : -ENOMEM;
    992
    993	ret = strfilter__or(filter, str, &err);
    994	if (ret == -EINVAL) {
    995		pr_err("Filter parse error at %td.\n", err - str + 1);
    996		pr_err("Source: \"%s\"\n", str);
    997		pr_err("         %*c\n", (int)(err - str + 1), '^');
    998		strfilter__delete(filter);
    999		return ret;
   1000	}
   1001
   1002	ret = read_tracing_file_by_line("available_filter_functions",
   1003					list_function_cb, filter);
   1004	strfilter__delete(filter);
   1005	if (ret < 0)
   1006		return ret;
   1007
   1008	exit(0);
   1009}
   1010
   1011static int parse_filter_func(const struct option *opt, const char *str,
   1012			     int unset __maybe_unused)
   1013{
   1014	struct list_head *head = opt->value;
   1015	struct filter_entry *entry;
   1016
   1017	entry = malloc(sizeof(*entry) + strlen(str) + 1);
   1018	if (entry == NULL)
   1019		return -ENOMEM;
   1020
   1021	strcpy(entry->name, str);
   1022	list_add_tail(&entry->list, head);
   1023
   1024	return 0;
   1025}
   1026
   1027static void delete_filter_func(struct list_head *head)
   1028{
   1029	struct filter_entry *pos, *tmp;
   1030
   1031	list_for_each_entry_safe(pos, tmp, head, list) {
   1032		list_del_init(&pos->list);
   1033		free(pos);
   1034	}
   1035}
   1036
   1037static int parse_buffer_size(const struct option *opt,
   1038			     const char *str, int unset)
   1039{
   1040	unsigned long *s = (unsigned long *)opt->value;
   1041	static struct parse_tag tags_size[] = {
   1042		{ .tag  = 'B', .mult = 1       },
   1043		{ .tag  = 'K', .mult = 1 << 10 },
   1044		{ .tag  = 'M', .mult = 1 << 20 },
   1045		{ .tag  = 'G', .mult = 1 << 30 },
   1046		{ .tag  = 0 },
   1047	};
   1048	unsigned long val;
   1049
   1050	if (unset) {
   1051		*s = 0;
   1052		return 0;
   1053	}
   1054
   1055	val = parse_tag_value(str, tags_size);
   1056	if (val != (unsigned long) -1) {
   1057		if (val < 1024) {
   1058			pr_err("buffer size too small, must larger than 1KB.");
   1059			return -1;
   1060		}
   1061		*s = val;
   1062		return 0;
   1063	}
   1064
   1065	return -1;
   1066}
   1067
   1068static int parse_func_tracer_opts(const struct option *opt,
   1069				  const char *str, int unset)
   1070{
   1071	int ret;
   1072	struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
   1073	struct sublevel_option func_tracer_opts[] = {
   1074		{ .name = "call-graph",	.value_ptr = &ftrace->func_stack_trace },
   1075		{ .name = "irq-info",	.value_ptr = &ftrace->func_irq_info },
   1076		{ .name = NULL, }
   1077	};
   1078
   1079	if (unset)
   1080		return 0;
   1081
   1082	ret = perf_parse_sublevel_options(str, func_tracer_opts);
   1083	if (ret)
   1084		return ret;
   1085
   1086	return 0;
   1087}
   1088
   1089static int parse_graph_tracer_opts(const struct option *opt,
   1090				  const char *str, int unset)
   1091{
   1092	int ret;
   1093	struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
   1094	struct sublevel_option graph_tracer_opts[] = {
   1095		{ .name = "nosleep-time",	.value_ptr = &ftrace->graph_nosleep_time },
   1096		{ .name = "noirqs",		.value_ptr = &ftrace->graph_noirqs },
   1097		{ .name = "verbose",		.value_ptr = &ftrace->graph_verbose },
   1098		{ .name = "thresh",		.value_ptr = &ftrace->graph_thresh },
   1099		{ .name = "depth",		.value_ptr = &ftrace->graph_depth },
   1100		{ .name = NULL, }
   1101	};
   1102
   1103	if (unset)
   1104		return 0;
   1105
   1106	ret = perf_parse_sublevel_options(str, graph_tracer_opts);
   1107	if (ret)
   1108		return ret;
   1109
   1110	return 0;
   1111}
   1112
   1113enum perf_ftrace_subcommand {
   1114	PERF_FTRACE_NONE,
   1115	PERF_FTRACE_TRACE,
   1116	PERF_FTRACE_LATENCY,
   1117};
   1118
   1119int cmd_ftrace(int argc, const char **argv)
   1120{
   1121	int ret;
   1122	int (*cmd_func)(struct perf_ftrace *) = NULL;
   1123	struct perf_ftrace ftrace = {
   1124		.tracer = DEFAULT_TRACER,
   1125		.target = { .uid = UINT_MAX, },
   1126	};
   1127	const struct option common_options[] = {
   1128	OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
   1129		   "Trace on existing process id"),
   1130	/* TODO: Add short option -t after -t/--tracer can be removed. */
   1131	OPT_STRING(0, "tid", &ftrace.target.tid, "tid",
   1132		   "Trace on existing thread id (exclusive to --pid)"),
   1133	OPT_INCR('v', "verbose", &verbose,
   1134		 "Be more verbose"),
   1135	OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide,
   1136		    "System-wide collection from all CPUs"),
   1137	OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
   1138		    "List of cpus to monitor"),
   1139	OPT_END()
   1140	};
   1141	const struct option ftrace_options[] = {
   1142	OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
   1143		   "Tracer to use: function_graph(default) or function"),
   1144	OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
   1145			     "Show available functions to filter",
   1146			     opt_list_avail_functions, "*"),
   1147	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
   1148		     "Trace given functions using function tracer",
   1149		     parse_filter_func),
   1150	OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
   1151		     "Do not trace given functions", parse_filter_func),
   1152	OPT_CALLBACK(0, "func-opts", &ftrace, "options",
   1153		     "Function tracer options, available options: call-graph,irq-info",
   1154		     parse_func_tracer_opts),
   1155	OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
   1156		     "Trace given functions using function_graph tracer",
   1157		     parse_filter_func),
   1158	OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
   1159		     "Set nograph filter on given functions", parse_filter_func),
   1160	OPT_CALLBACK(0, "graph-opts", &ftrace, "options",
   1161		     "Graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=<n>,depth=<n>",
   1162		     parse_graph_tracer_opts),
   1163	OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
   1164		     "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
   1165	OPT_BOOLEAN(0, "inherit", &ftrace.inherit,
   1166		    "Trace children processes"),
   1167	OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
   1168		     "Number of milliseconds to wait before starting tracing after program start"),
   1169	OPT_PARENT(common_options),
   1170	};
   1171	const struct option latency_options[] = {
   1172	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
   1173		     "Show latency of given function", parse_filter_func),
   1174#ifdef HAVE_BPF_SKEL
   1175	OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
   1176		    "Use BPF to measure function latency"),
   1177#endif
   1178	OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec,
   1179		    "Use nano-second histogram"),
   1180	OPT_PARENT(common_options),
   1181	};
   1182	const struct option *options = ftrace_options;
   1183
   1184	const char * const ftrace_usage[] = {
   1185		"perf ftrace [<options>] [<command>]",
   1186		"perf ftrace [<options>] -- [<command>] [<options>]",
   1187		"perf ftrace {trace|latency} [<options>] [<command>]",
   1188		"perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
   1189		NULL
   1190	};
   1191	enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
   1192
   1193	INIT_LIST_HEAD(&ftrace.filters);
   1194	INIT_LIST_HEAD(&ftrace.notrace);
   1195	INIT_LIST_HEAD(&ftrace.graph_funcs);
   1196	INIT_LIST_HEAD(&ftrace.nograph_funcs);
   1197
   1198	signal(SIGINT, sig_handler);
   1199	signal(SIGUSR1, sig_handler);
   1200	signal(SIGCHLD, sig_handler);
   1201	signal(SIGPIPE, sig_handler);
   1202
   1203	ret = perf_config(perf_ftrace_config, &ftrace);
   1204	if (ret < 0)
   1205		return -1;
   1206
   1207	if (argc > 1) {
   1208		if (!strcmp(argv[1], "trace")) {
   1209			subcmd = PERF_FTRACE_TRACE;
   1210		} else if (!strcmp(argv[1], "latency")) {
   1211			subcmd = PERF_FTRACE_LATENCY;
   1212			options = latency_options;
   1213		}
   1214
   1215		if (subcmd != PERF_FTRACE_NONE) {
   1216			argc--;
   1217			argv++;
   1218		}
   1219	}
   1220	/* for backward compatibility */
   1221	if (subcmd == PERF_FTRACE_NONE)
   1222		subcmd = PERF_FTRACE_TRACE;
   1223
   1224	argc = parse_options(argc, argv, options, ftrace_usage,
   1225			    PARSE_OPT_STOP_AT_NON_OPTION);
   1226	if (argc < 0) {
   1227		ret = -EINVAL;
   1228		goto out_delete_filters;
   1229	}
   1230
   1231	switch (subcmd) {
   1232	case PERF_FTRACE_TRACE:
   1233		if (!argc && target__none(&ftrace.target))
   1234			ftrace.target.system_wide = true;
   1235		cmd_func = __cmd_ftrace;
   1236		break;
   1237	case PERF_FTRACE_LATENCY:
   1238		if (list_empty(&ftrace.filters)) {
   1239			pr_err("Should provide a function to measure\n");
   1240			parse_options_usage(ftrace_usage, options, "T", 1);
   1241			ret = -EINVAL;
   1242			goto out_delete_filters;
   1243		}
   1244		cmd_func = __cmd_latency;
   1245		break;
   1246	case PERF_FTRACE_NONE:
   1247	default:
   1248		pr_err("Invalid subcommand\n");
   1249		ret = -EINVAL;
   1250		goto out_delete_filters;
   1251	}
   1252
   1253	ret = target__validate(&ftrace.target);
   1254	if (ret) {
   1255		char errbuf[512];
   1256
   1257		target__strerror(&ftrace.target, ret, errbuf, 512);
   1258		pr_err("%s\n", errbuf);
   1259		goto out_delete_filters;
   1260	}
   1261
   1262	ftrace.evlist = evlist__new();
   1263	if (ftrace.evlist == NULL) {
   1264		ret = -ENOMEM;
   1265		goto out_delete_filters;
   1266	}
   1267
   1268	ret = evlist__create_maps(ftrace.evlist, &ftrace.target);
   1269	if (ret < 0)
   1270		goto out_delete_evlist;
   1271
   1272	if (argc) {
   1273		ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target,
   1274					       argv, false,
   1275					       ftrace__workload_exec_failed_signal);
   1276		if (ret < 0)
   1277			goto out_delete_evlist;
   1278	}
   1279
   1280	ret = cmd_func(&ftrace);
   1281
   1282out_delete_evlist:
   1283	evlist__delete(ftrace.evlist);
   1284
   1285out_delete_filters:
   1286	delete_filter_func(&ftrace.filters);
   1287	delete_filter_func(&ftrace.notrace);
   1288	delete_filter_func(&ftrace.graph_funcs);
   1289	delete_filter_func(&ftrace.nograph_funcs);
   1290
   1291	return ret;
   1292}