summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-07-04 11:00:38 +0200
committerIngo Molnar <mingo@elte.hu>2009-07-04 11:00:42 +0200
commitd7e57676e3ed7ab9b2c7c4bcb7873e51eacbdb84 (patch)
treef7433f38cd407a0c35a8cbf2b7e3fd756087bce7 /tools
parentfeaa0457ec8351cae855edc9a3052ac49322538e (diff)
parent746a99a5af60ee676afa2ba469ccd1373493c7e7 (diff)
downloadcachepc-linux-d7e57676e3ed7ab9b2c7c4bcb7873e51eacbdb84.tar.gz
cachepc-linux-d7e57676e3ed7ab9b2c7c4bcb7873e51eacbdb84.zip
Merge branch 'linus' into x86/cleanups
Merge reason: We were on an older pre-rc1 base, move to almost-rc2. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/CREDITS30
-rw-r--r--tools/perf/Documentation/perf-report.txt14
-rw-r--r--tools/perf/Documentation/perf-stat.txt6
-rw-r--r--tools/perf/Makefile14
-rw-r--r--tools/perf/builtin-annotate.c270
-rw-r--r--tools/perf/builtin-record.c246
-rw-r--r--tools/perf/builtin-report.c629
-rw-r--r--tools/perf/builtin-stat.c415
-rw-r--r--tools/perf/builtin-top.c33
-rw-r--r--tools/perf/perf.h22
-rw-r--r--tools/perf/util/callchain.c174
-rw-r--r--tools/perf/util/callchain.h33
-rw-r--r--tools/perf/util/ctype.c17
-rw-r--r--tools/perf/util/header.c242
-rw-r--r--tools/perf/util/header.h37
-rw-r--r--tools/perf/util/help.c15
-rw-r--r--tools/perf/util/pager.c5
-rw-r--r--tools/perf/util/parse-events.c167
-rw-r--r--tools/perf/util/run-command.c95
-rw-r--r--tools/perf/util/run-command.h5
-rw-r--r--tools/perf/util/strbuf.c2
-rw-r--r--tools/perf/util/string.c2
-rw-r--r--tools/perf/util/string.h4
-rw-r--r--tools/perf/util/strlist.c184
-rw-r--r--tools/perf/util/strlist.h32
-rw-r--r--tools/perf/util/symbol.c36
-rw-r--r--tools/perf/util/symbol.h17
-rw-r--r--tools/perf/util/types.h17
-rw-r--r--tools/perf/util/util.h33
29 files changed, 2200 insertions, 596 deletions
diff --git a/tools/perf/CREDITS b/tools/perf/CREDITS
new file mode 100644
index 000000000000..c2ddcb3acbd0
--- /dev/null
+++ b/tools/perf/CREDITS
@@ -0,0 +1,30 @@
+Most of the infrastructure that 'perf' uses here has been reused
+from the Git project, as of version:
+
+ 66996ec: Sync with 1.6.2.4
+
+Here is an (incomplete!) list of main contributors to those files
+in util/* and elsewhere:
+
+ Alex Riesen
+ Christian Couder
+ Dmitry Potapov
+ Jeff King
+ Johannes Schindelin
+ Johannes Sixt
+ Junio C Hamano
+ Linus Torvalds
+ Matthias Kestenholz
+ Michal Ostrowski
+ Miklos Vajna
+ Petr Baudis
+ Pierre Habouzit
+ René Scharfe
+ Samuel Tardieu
+ Shawn O. Pearce
+ Steffen Prohaska
+ Steve Haslam
+
+Thanks guys!
+
+The full history of the files can be found in the upstream Git commits.
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 52d3fc6846a9..8aa3f8c88707 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -13,13 +13,25 @@ SYNOPSIS
DESCRIPTION
-----------
This command displays the performance counter profile information recorded
-via perf report.
+via perf record.
OPTIONS
-------
-i::
--input=::
Input file name. (default: perf.data)
+-d::
+--dsos=::
+ Only consider symbols in these dsos. CSV that understands
+ file://filename entries.
+-C::
+--comms=::
+ Only consider symbols in these comms. CSV that understands
+ file://filename entries.
+-S::
+--symbols=::
+ Only consider these symbols. CSV that understands
+ file://filename entries.
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index c368a72721d7..0d74346d21ab 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
SYNOPSIS
--------
[verse]
-'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
-'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>]
DESCRIPTION
-----------
@@ -40,7 +40,7 @@ OPTIONS
-a::
system-wide collection
--l::
+-S::
scale counter values
EXAMPLES
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0cbd5d6874ec..9c6d0ae3708e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -157,10 +157,15 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
+# If we're on a 64-bit kernel, use -m64
+ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M))
+ M64 := -m64
+endif
+
# CFLAGS and LDFLAGS are for the users to override from the command line.
-CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6
-LDFLAGS = -lpthread -lrt -lelf
+CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6
+LDFLAGS = -lpthread -lrt -lelf -lm
ALL_CFLAGS = $(CFLAGS)
ALL_LDFLAGS = $(LDFLAGS)
STRIP ?= strip
@@ -285,6 +290,7 @@ LIB_FILE=libperf.a
LIB_H += ../../include/linux/perf_counter.h
LIB_H += perf.h
+LIB_H += util/types.h
LIB_H += util/list.h
LIB_H += util/rbtree.h
LIB_H += util/levenshtein.h
@@ -295,6 +301,7 @@ LIB_H += util/util.h
LIB_H += util/help.h
LIB_H += util/strbuf.h
LIB_H += util/string.h
+LIB_H += util/strlist.h
LIB_H += util/run-command.h
LIB_H += util/sigchain.h
LIB_H += util/symbol.h
@@ -316,12 +323,15 @@ LIB_OBJS += util/run-command.o
LIB_OBJS += util/quote.o
LIB_OBJS += util/strbuf.o
LIB_OBJS += util/string.o
+LIB_OBJS += util/strlist.o
LIB_OBJS += util/usage.o
LIB_OBJS += util/wrapper.o
LIB_OBJS += util/sigchain.o
LIB_OBJS += util/symbol.o
LIB_OBJS += util/color.o
LIB_OBJS += util/pager.o
+LIB_OBJS += util/header.o
+LIB_OBJS += util/callchain.o
BUILTIN_OBJS += builtin-annotate.o
BUILTIN_OBJS += builtin-help.o
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b1ed5f766cb3..722c0f54e549 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -25,6 +25,10 @@
#define SHOW_USER 2
#define SHOW_HV 4
+#define MIN_GREEN 0.5
+#define MIN_RED 5.0
+
+
static char const *input_name = "perf.data";
static char *vmlinux = "vmlinux";
@@ -39,40 +43,42 @@ static int dump_trace = 0;
static int verbose;
+static int print_line;
+
static unsigned long page_size;
static unsigned long mmap_window = 32;
struct ip_event {
struct perf_event_header header;
- __u64 ip;
- __u32 pid, tid;
+ u64 ip;
+ u32 pid, tid;
};
struct mmap_event {
struct perf_event_header header;
- __u32 pid, tid;
- __u64 start;
- __u64 len;
- __u64 pgoff;
+ u32 pid, tid;
+ u64 start;
+ u64 len;
+ u64 pgoff;
char filename[PATH_MAX];
};
struct comm_event {
struct perf_event_header header;
- __u32 pid, tid;
+ u32 pid, tid;
char comm[16];
};
struct fork_event {
struct perf_event_header header;
- __u32 pid, ppid;
+ u32 pid, ppid;
};
struct period_event {
struct perf_event_header header;
- __u64 time;
- __u64 id;
- __u64 sample_period;
+ u64 time;
+ u64 id;
+ u64 sample_period;
};
typedef union event_union {
@@ -84,6 +90,13 @@ typedef union event_union {
struct period_event period;
} event_t;
+
+struct sym_ext {
+ struct rb_node node;
+ double percent;
+ char *path;
+};
+
static LIST_HEAD(dsos);
static struct dso *kernel_dso;
static struct dso *vdso;
@@ -145,7 +158,7 @@ static void dsos__fprintf(FILE *fp)
dso__fprintf(pos, fp);
}
-static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip)
+static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip)
{
return dso__find_symbol(kernel_dso, ip);
}
@@ -178,19 +191,19 @@ static int load_kernel(void)
struct map {
struct list_head node;
- __u64 start;
- __u64 end;
- __u64 pgoff;
- __u64 (*map_ip)(struct map *, __u64);
+ u64 start;
+ u64 end;
+ u64 pgoff;
+ u64 (*map_ip)(struct map *, u64);
struct dso *dso;
};
-static __u64 map__map_ip(struct map *map, __u64 ip)
+static u64 map__map_ip(struct map *map, u64 ip)
{
return ip - map->start + map->pgoff;
}
-static __u64 vdso__map_ip(struct map *map, __u64 ip)
+static u64 vdso__map_ip(struct map *map, u64 ip)
{
return ip;
}
@@ -373,7 +386,7 @@ static int thread__fork(struct thread *self, struct thread *parent)
return 0;
}
-static struct map *thread__find_map(struct thread *self, __u64 ip)
+static struct map *thread__find_map(struct thread *self, u64 ip)
{
struct map *pos;
@@ -414,7 +427,7 @@ struct hist_entry {
struct map *map;
struct dso *dso;
struct symbol *sym;
- __u64 ip;
+ u64 ip;
char level;
uint32_t count;
@@ -519,7 +532,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self)
if (self->dso)
return fprintf(fp, "%-25s", self->dso->name);
- return fprintf(fp, "%016llx ", (__u64)self->ip);
+ return fprintf(fp, "%016llx ", (u64)self->ip);
}
static struct sort_entry sort_dso = {
@@ -533,7 +546,7 @@ static struct sort_entry sort_dso = {
static int64_t
sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
{
- __u64 ip_l, ip_r;
+ u64 ip_l, ip_r;
if (left->sym == right->sym)
return 0;
@@ -550,13 +563,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
size_t ret = 0;
if (verbose)
- ret += fprintf(fp, "%#018llx ", (__u64)self->ip);
+ ret += fprintf(fp, "%#018llx ", (u64)self->ip);
if (self->sym) {
ret += fprintf(fp, "[%c] %s",
self->dso == kernel_dso ? 'k' : '.', self->sym->name);
} else {
- ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+ ret += fprintf(fp, "%#016llx", (u64)self->ip);
}
return ret;
@@ -647,7 +660,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
/*
* collect histogram counts
*/
-static void hist_hit(struct hist_entry *he, __u64 ip)
+static void hist_hit(struct hist_entry *he, u64 ip)
{
unsigned int sym_size, offset;
struct symbol *sym = he->sym;
@@ -676,7 +689,7 @@ static void hist_hit(struct hist_entry *he, __u64 ip)
static int
hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
- struct symbol *sym, __u64 ip, char level)
+ struct symbol *sym, u64 ip, char level)
{
struct rb_node **p = &hist.rb_node;
struct rb_node *parent = NULL;
@@ -842,13 +855,13 @@ static unsigned long total = 0,
total_unknown = 0;
static int
-process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+process_sample_event(event_t *event, unsigned long offset, unsigned long head)
{
char level;
int show = 0;
struct dso *dso = NULL;
struct thread *thread = threads__findnew(event->ip.pid);
- __u64 ip = event->ip.ip;
+ u64 ip = event->ip.ip;
struct map *map = NULL;
dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
@@ -1000,10 +1013,10 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head)
static int
process_event(event_t *event, unsigned long offset, unsigned long head)
{
- if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
- return process_overflow_event(event, offset, head);
-
switch (event->header.type) {
+ case PERF_EVENT_SAMPLE:
+ return process_sample_event(event, offset, head);
+
case PERF_EVENT_MMAP:
return process_mmap_event(event, offset, head);
@@ -1030,13 +1043,33 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
return 0;
}
+static char *get_color(double percent)
+{
+ char *color = PERF_COLOR_NORMAL;
+
+ /*
+ * We color high-overhead entries in red, mid-overhead
+ * entries in green - and keep the low overhead places
+ * normal:
+ */
+ if (percent >= MIN_RED)
+ color = PERF_COLOR_RED;
+ else {
+ if (percent > MIN_GREEN)
+ color = PERF_COLOR_GREEN;
+ }
+ return color;
+}
+
static int
-parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len)
+parse_line(FILE *file, struct symbol *sym, u64 start, u64 len)
{
char *line = NULL, *tmp, *tmp2;
+ static const char *prev_line;
+ static const char *prev_color;
unsigned int offset;
size_t line_len;
- __u64 line_ip;
+ u64 line_ip;
int ret;
char *c;
@@ -1073,27 +1106,36 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len)
}
if (line_ip != -1) {
+ const char *path = NULL;
unsigned int hits = 0;
double percent = 0.0;
- char *color = PERF_COLOR_NORMAL;
+ char *color;
+ struct sym_ext *sym_ext = sym->priv;
offset = line_ip - start;
if (offset < len)
hits = sym->hist[offset];
- if (sym->hist_sum)
+ if (offset < len && sym_ext) {
+ path = sym_ext[offset].path;
+ percent = sym_ext[offset].percent;
+ } else if (sym->hist_sum)
percent = 100.0 * hits / sym->hist_sum;
+ color = get_color(percent);
+
/*
- * We color high-overhead entries in red, mid-overhead
- * entries in green - and keep the low overhead places
- * normal:
+ * Also color the filename and line if needed, with
+ * the same color than the percentage. Don't print it
+ * twice for close colored ip with the same filename:line
*/
- if (percent >= 5.0)
- color = PERF_COLOR_RED;
- else {
- if (percent > 0.5)
- color = PERF_COLOR_GREEN;
+ if (path) {
+ if (!prev_line || strcmp(prev_line, path)
+ || color != prev_color) {
+ color_fprintf(stdout, color, " %s", path);
+ prev_line = path;
+ prev_color = color;
+ }
}
color_fprintf(stdout, color, " %7.2f", percent);
@@ -1109,10 +1151,125 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len)
return 0;
}
+static struct rb_root root_sym_ext;
+
+static void insert_source_line(struct sym_ext *sym_ext)
+{
+ struct sym_ext *iter;
+ struct rb_node **p = &root_sym_ext.rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct sym_ext, node);
+
+ if (sym_ext->percent > iter->percent)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&sym_ext->node, parent, p);
+ rb_insert_color(&sym_ext->node, &root_sym_ext);
+}
+
+static void free_source_line(struct symbol *sym, int len)
+{
+ struct sym_ext *sym_ext = sym->priv;
+ int i;
+
+ if (!sym_ext)
+ return;
+
+ for (i = 0; i < len; i++)
+ free(sym_ext[i].path);
+ free(sym_ext);
+
+ sym->priv = NULL;
+ root_sym_ext = RB_ROOT;
+}
+
+/* Get the filename:line for the colored entries */
+static void
+get_source_line(struct symbol *sym, u64 start, int len, char *filename)
+{
+ int i;
+ char cmd[PATH_MAX * 2];
+ struct sym_ext *sym_ext;
+
+ if (!sym->hist_sum)
+ return;
+
+ sym->priv = calloc(len, sizeof(struct sym_ext));
+ if (!sym->priv)
+ return;
+
+ sym_ext = sym->priv;
+
+ for (i = 0; i < len; i++) {
+ char *path = NULL;
+ size_t line_len;
+ u64 offset;
+ FILE *fp;
+
+ sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum;
+ if (sym_ext[i].percent <= 0.5)
+ continue;
+
+ offset = start + i;
+ sprintf(cmd, "addr2line -e %s %016llx", filename, offset);
+ fp = popen(cmd, "r");
+ if (!fp)
+ continue;
+
+ if (getline(&path, &line_len, fp) < 0 || !line_len)
+ goto next;
+
+ sym_ext[i].path = malloc(sizeof(char) * line_len + 1);
+ if (!sym_ext[i].path)
+ goto next;
+
+ strcpy(sym_ext[i].path, path);
+ insert_source_line(&sym_ext[i]);
+
+ next:
+ pclose(fp);
+ }
+}
+
+static void print_summary(char *filename)
+{
+ struct sym_ext *sym_ext;
+ struct rb_node *node;
+
+ printf("\nSorted summary for file %s\n", filename);
+ printf("----------------------------------------------\n\n");
+
+ if (RB_EMPTY_ROOT(&root_sym_ext)) {
+ printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
+ return;
+ }
+
+ node = rb_first(&root_sym_ext);
+ while (node) {
+ double percent;
+ char *color;
+ char *path;
+
+ sym_ext = rb_entry(node, struct sym_ext, node);
+ percent = sym_ext->percent;
+ color = get_color(percent);
+ path = sym_ext->path;
+
+ color_fprintf(stdout, color, " %7.2f %s", percent, path);
+ node = rb_next(node);
+ }
+}
+
static void annotate_sym(struct dso *dso, struct symbol *sym)
{
char *filename = dso->name;
- __u64 start, end, len;
+ u64 start, end, len;
char command[PATH_MAX*2];
FILE *file;
@@ -1121,13 +1278,6 @@ static void annotate_sym(struct dso *dso, struct symbol *sym)
if (dso == kernel_dso)
filename = vmlinux;
- printf("\n------------------------------------------------\n");
- printf(" Percent | Source code & Disassembly of %s\n", filename);
- printf("------------------------------------------------\n");
-
- if (verbose >= 2)
- printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
-
start = sym->obj_start;
if (!start)
start = sym->start;
@@ -1135,7 +1285,19 @@ static void annotate_sym(struct dso *dso, struct symbol *sym)
end = start + sym->end - sym->start + 1;
len = sym->end - sym->start;
- sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename);
+ if (print_line) {
+ get_source_line(sym, start, len, filename);
+ print_summary(filename);
+ }
+
+ printf("\n\n------------------------------------------------\n");
+ printf(" Percent | Source code & Disassembly of %s\n", filename);
+ printf("------------------------------------------------\n");
+
+ if (verbose >= 2)
+ printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
+
+ sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (u64)start, (u64)end, filename);
if (verbose >= 3)
printf("doing: %s\n", command);
@@ -1150,6 +1312,8 @@ static void annotate_sym(struct dso *dso, struct symbol *sym)
}
pclose(file);
+ if (print_line)
+ free_source_line(sym, len);
}
static void find_annotations(void)
@@ -1308,6 +1472,8 @@ static const struct option options[] = {
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
+ OPT_BOOLEAN('l', "print-line", &print_line,
+ "print matching source lines (may be slow)"),
OPT_END()
};
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0f5771f615da..d18546f37d7c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -14,6 +14,8 @@
#include "util/parse-events.h"
#include "util/string.h"
+#include "util/header.h"
+
#include <unistd.h>
#include <sched.h>
@@ -37,33 +39,40 @@ static pid_t target_pid = -1;
static int inherit = 1;
static int force = 0;
static int append_file = 0;
+static int call_graph = 0;
static int verbose = 0;
+static int inherit_stat = 0;
+static int no_samples = 0;
static long samples;
static struct timeval last_read;
static struct timeval this_read;
-static __u64 bytes_written;
+static u64 bytes_written;
static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
static int nr_poll;
static int nr_cpu;
+static int file_new = 1;
+
+struct perf_header *header;
+
struct mmap_event {
struct perf_event_header header;
- __u32 pid;
- __u32 tid;
- __u64 start;
- __u64 len;
- __u64 pgoff;
+ u32 pid;
+ u32 tid;
+ u64 start;
+ u64 len;
+ u64 pgoff;
char filename[PATH_MAX];
};
struct comm_event {
struct perf_event_header header;
- __u32 pid;
- __u32 tid;
+ u32 pid;
+ u32 tid;
char comm[16];
};
@@ -77,10 +86,10 @@ struct mmap_data {
static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
-static unsigned int mmap_read_head(struct mmap_data *md)
+static unsigned long mmap_read_head(struct mmap_data *md)
{
struct perf_counter_mmap_page *pc = md->base;
- int head;
+ long head;
head = pc->data_head;
rmb();
@@ -88,6 +97,32 @@ static unsigned int mmap_read_head(struct mmap_data *md)
return head;
}
+static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
+{
+ struct perf_counter_mmap_page *pc = md->base;
+
+ /*
+ * ensure all reads are done before we write the tail out.
+ */
+ /* mb(); */
+ pc->data_tail = tail;
+}
+
+static void write_output(void *buf, size_t size)
+{
+ while (size) {
+ int ret = write(output, buf, size);
+
+ if (ret < 0)
+ die("failed to write");
+
+ size -= ret;
+ buf += ret;
+
+ bytes_written += ret;
+ }
+}
+
static void mmap_read(struct mmap_data *md)
{
unsigned int head = mmap_read_head(md);
@@ -108,7 +143,7 @@ static void mmap_read(struct mmap_data *md)
* In either case, truncate and restart at head.
*/
diff = head - old;
- if (diff > md->mask / 2 || diff < 0) {
+ if (diff < 0) {
struct timeval iv;
unsigned long msecs;
@@ -136,36 +171,17 @@ static void mmap_read(struct mmap_data *md)
size = md->mask + 1 - (old & md->mask);
old += size;
- while (size) {
- int ret = write(output, buf, size);
-
- if (ret < 0)
- die("failed to write");
-
- size -= ret;
- buf += ret;
-
- bytes_written += ret;
- }
+ write_output(buf, size);
}
buf = &data[old & md->mask];
size = head - old;
old += size;
- while (size) {
- int ret = write(output, buf, size);
-
- if (ret < 0)
- die("failed to write");
-
- size -= ret;
- buf += ret;
-
- bytes_written += ret;
- }
+ write_output(buf, size);
md->prev = old;
+ mmap_write_tail(md, old);
}
static volatile int done = 0;
@@ -191,7 +207,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
struct comm_event comm_ev;
char filename[PATH_MAX];
char bf[BUFSIZ];
- int fd, ret;
+ int fd;
size_t size;
char *field, *sep;
DIR *tasks;
@@ -201,8 +217,12 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
fd = open(filename, O_RDONLY);
if (fd < 0) {
- fprintf(stderr, "couldn't open %s\n", filename);
- exit(EXIT_FAILURE);
+ /*
+ * We raced with a task exiting - just return:
+ */
+ if (verbose)
+ fprintf(stderr, "couldn't open %s\n", filename);
+ return;
}
if (read(fd, bf, sizeof(bf)) < 0) {
fprintf(stderr, "couldn't read %s\n", filename);
@@ -223,17 +243,13 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
comm_ev.pid = pid;
comm_ev.header.type = PERF_EVENT_COMM;
- size = ALIGN(size, sizeof(__u64));
+ size = ALIGN(size, sizeof(u64));
comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
if (!full) {
comm_ev.tid = pid;
- ret = write(output, &comm_ev, comm_ev.header.size);
- if (ret < 0) {
- perror("failed to write");
- exit(-1);
- }
+ write_output(&comm_ev, comm_ev.header.size);
return;
}
@@ -248,11 +264,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
comm_ev.tid = pid;
- ret = write(output, &comm_ev, comm_ev.header.size);
- if (ret < 0) {
- perror("failed to write");
- exit(-1);
- }
+ write_output(&comm_ev, comm_ev.header.size);
}
closedir(tasks);
return;
@@ -272,8 +284,12 @@ static void pid_synthesize_mmap_samples(pid_t pid)
fp = fopen(filename, "r");
if (fp == NULL) {
- fprintf(stderr, "couldn't open %s\n", filename);
- exit(EXIT_FAILURE);
+ /*
+ * We raced with a task exiting - just return:
+ */
+ if (verbose)
+ fprintf(stderr, "couldn't open %s\n", filename);
+ return;
}
while (1) {
char bf[BUFSIZ], *pbf = bf;
@@ -295,33 +311,29 @@ static void pid_synthesize_mmap_samples(pid_t pid)
continue;
pbf += n + 3;
if (*pbf == 'x') { /* vm_exec */
- char *execname = strrchr(bf, ' ');
+ char *execname = strchr(bf, '/');
- if (execname == NULL || execname[1] != '/')
+ if (execname == NULL)
continue;
- execname += 1;
size = strlen(execname);
execname[size - 1] = '\0'; /* Remove \n */
memcpy(mmap_ev.filename, execname, size);
- size = ALIGN(size, sizeof(__u64));
+ size = ALIGN(size, sizeof(u64));
mmap_ev.len -= mmap_ev.start;
mmap_ev.header.size = (sizeof(mmap_ev) -
(sizeof(mmap_ev.filename) - size));
mmap_ev.pid = pid;
mmap_ev.tid = pid;
- if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
- perror("failed to write");
- exit(-1);
- }
+ write_output(&mmap_ev, mmap_ev.header.size);
}
}
fclose(fp);
}
-static void synthesize_samples(void)
+static void synthesize_all(void)
{
DIR *proc;
struct dirent dirent, *next;
@@ -345,24 +357,58 @@ static void synthesize_samples(void)
static int group_fd;
+static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr)
+{
+ struct perf_header_attr *h_attr;
+
+ if (nr < header->attrs) {
+ h_attr = header->attr[nr];
+ } else {
+ h_attr = perf_header_attr__new(a);
+ perf_header__add_attr(header, h_attr);
+ }
+
+ return h_attr;
+}
+
static void create_counter(int counter, int cpu, pid_t pid)
{
struct perf_counter_attr *attr = attrs + counter;
- int track = 1;
+ struct perf_header_attr *h_attr;
+ int track = !counter; /* only the first counter needs these */
+ struct {
+ u64 count;
+ u64 time_enabled;
+ u64 time_running;
+ u64 id;
+ } read_data;
+
+ attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING |
+ PERF_FORMAT_ID;
attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+
if (freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD;
attr->freq = 1;
attr->sample_freq = freq;
}
+
+ if (no_samples)
+ attr->sample_freq = 0;
+
+ if (inherit_stat)
+ attr->inherit_stat = 1;
+
+ if (call_graph)
+ attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
+
attr->mmap = track;
attr->comm = track;
attr->inherit = (cpu < 0) && inherit;
attr->disabled = 1;
- track = 0; /* only the first counter needs these */
-
try_again:
fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
@@ -393,6 +439,22 @@ try_again:
exit(-1);
}
+ h_attr = get_header_attr(attr, counter);
+
+ if (!file_new) {
+ if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
+ fprintf(stderr, "incompatible append\n");
+ exit(-1);
+ }
+ }
+
+ if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) {
+ perror("Unable to read perf file descriptor\n");
+ exit(-1);
+ }
+
+ perf_header_attr__add_id(h_attr, read_data.id);
+
assert(fd[nr_cpu][counter] >= 0);
fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
@@ -410,7 +472,7 @@ try_again:
mmap_array[nr_cpu][counter].prev = 0;
mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
+ PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0);
if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
error("failed to mmap with %d (%s)\n", errno, strerror(errno));
exit(-1);
@@ -423,11 +485,6 @@ static void open_counters(int cpu, pid_t pid)
{
int counter;
- if (pid > 0) {
- pid_synthesize_comm_event(pid, 0);
- pid_synthesize_mmap_samples(pid);
- }
-
group_fd = -1;
for (counter = 0; counter < nr_counters; counter++)
create_counter(counter, cpu, pid);
@@ -435,11 +492,18 @@ static void open_counters(int cpu, pid_t pid)
nr_cpu++;
}
+static void atexit_header(void)
+{
+ header->data_size += bytes_written;
+
+ perf_header__write(header, output);
+}
+
static int __cmd_record(int argc, const char **argv)
{
int i, counter;
struct stat st;
- pid_t pid;
+ pid_t pid = 0;
int flags;
int ret;
@@ -448,6 +512,10 @@ static int __cmd_record(int argc, const char **argv)
assert(nr_cpus <= MAX_NR_CPUS);
assert(nr_cpus >= 0);
+ atexit(sig_atexit);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGINT, sig_handler);
+
if (!stat(output_name, &st) && !force && !append_file) {
fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
output_name);
@@ -456,7 +524,7 @@ static int __cmd_record(int argc, const char **argv)
flags = O_CREAT|O_RDWR;
if (append_file)
- flags |= O_APPEND;
+ file_new = 0;
else
flags |= O_TRUNC;
@@ -466,14 +534,30 @@ static int __cmd_record(int argc, const char **argv)
exit(-1);
}
+ if (!file_new)
+ header = perf_header__read(output);
+ else
+ header = perf_header__new();
+
+ atexit(atexit_header);
+
if (!system_wide) {
- open_counters(-1, target_pid != -1 ? target_pid : getpid());
+ pid = target_pid;
+ if (pid == -1)
+ pid = getpid();
+
+ open_counters(-1, pid);
} else for (i = 0; i < nr_cpus; i++)
open_counters(i, target_pid);
- atexit(sig_atexit);
- signal(SIGCHLD, sig_handler);
- signal(SIGINT, sig_handler);
+ if (file_new)
+ perf_header__write(header, output);
+
+ if (!system_wide) {
+ pid_synthesize_comm_event(pid, 0);
+ pid_synthesize_mmap_samples(pid);
+ } else
+ synthesize_all();
if (target_pid == -1 && argc) {
pid = fork();
@@ -498,10 +582,7 @@ static int __cmd_record(int argc, const char **argv)
}
}
- if (system_wide)
- synthesize_samples();
-
- while (!done) {
+ for (;;) {
int hits = samples;
for (i = 0; i < nr_cpu; i++) {
@@ -509,8 +590,11 @@ static int __cmd_record(int argc, const char **argv)
mmap_read(&mmap_array[i][counter]);
}
- if (hits == samples)
+ if (hits == samples) {
+ if (done)
+ break;
ret = poll(event_array, nr_poll, 100);
+ }
}
/*
@@ -555,8 +639,14 @@ static const struct option options[] = {
"profile at this frequency"),
OPT_INTEGER('m', "mmap-pages", &mmap_pages,
"number of mmap data pages"),
+ OPT_BOOLEAN('g', "call-graph", &call_graph,
+ "do call-graph (stack chain/backtrace) recording"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('s', "stat", &inherit_stat,
+ "per thread counts"),
+ OPT_BOOLEAN('n', "no-samples", &no_samples,
+ "don't sample"),
OPT_END()
};
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 82fa93b4db99..135b7837e6bf 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -15,8 +15,11 @@
#include "util/rbtree.h"
#include "util/symbol.h"
#include "util/string.h"
+#include "util/callchain.h"
+#include "util/strlist.h"
#include "perf.h"
+#include "util/header.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
@@ -30,51 +33,78 @@ static char *vmlinux = NULL;
static char default_sort_order[] = "comm,dso";
static char *sort_order = default_sort_order;
+static char *dso_list_str, *comm_list_str, *sym_list_str;
+static struct strlist *dso_list, *comm_list, *sym_list;
static int input;
static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
static int dump_trace = 0;
#define dprintf(x...) do { if (dump_trace) printf(x); } while (0)
+#define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0)
static int verbose;
+#define eprintf(x...) do { if (verbose) fprintf(stderr, x); } while (0)
+
static int full_paths;
static unsigned long page_size;
static unsigned long mmap_window = 32;
+static char default_parent_pattern[] = "^sys_|^do_page_fault";
+static char *parent_pattern = default_parent_pattern;
+static regex_t parent_regex;
+
+static int exclude_other = 1;
+static int callchain;
+
+static u64 sample_type;
+
struct ip_event {
struct perf_event_header header;
- __u64 ip;
- __u32 pid, tid;
- __u64 period;
+ u64 ip;
+ u32 pid, tid;
+ unsigned char __more_data[];
};
struct mmap_event {
struct perf_event_header header;
- __u32 pid, tid;
- __u64 start;
- __u64 len;
- __u64 pgoff;
+ u32 pid, tid;
+ u64 start;
+ u64 len;
+ u64 pgoff;
char filename[PATH_MAX];
};
struct comm_event {
struct perf_event_header header;
- __u32 pid, tid;
+ u32 pid, tid;
char comm[16];
};
struct fork_event {
struct perf_event_header header;
- __u32 pid, ppid;
+ u32 pid, ppid;
};
struct period_event {
struct perf_event_header header;
- __u64 time;
- __u64 id;
- __u64 sample_period;
+ u64 time;
+ u64 id;
+ u64 sample_period;
+};
+
+struct lost_event {
+ struct perf_event_header header;
+ u64 id;
+ u64 lost;
+};
+
+struct read_event {
+ struct perf_event_header header;
+ u32 pid,tid;
+ u64 value;
+ u64 format[3];
};
typedef union event_union {
@@ -84,6 +114,8 @@ typedef union event_union {
struct comm_event comm;
struct fork_event fork;
struct period_event period;
+ struct lost_event lost;
+ struct read_event read;
} event_t;
static LIST_HEAD(dsos);
@@ -119,15 +151,11 @@ static struct dso *dsos__findnew(const char *name)
nr = dso__load(dso, NULL, verbose);
if (nr < 0) {
- if (verbose)
- fprintf(stderr, "Failed to open: %s\n", name);
+ eprintf("Failed to open: %s\n", name);
goto out_delete_dso;
}
- if (!nr && verbose) {
- fprintf(stderr,
- "No symbols found in: %s, maybe install a debug package?\n",
- name);
- }
+ if (!nr)
+ eprintf("No symbols found in: %s, maybe install a debug package?\n", name);
dsos__add(dso);
@@ -146,7 +174,7 @@ static void dsos__fprintf(FILE *fp)
dso__fprintf(pos, fp);
}
-static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip)
+static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip)
{
return dso__find_symbol(kernel_dso, ip);
}
@@ -193,26 +221,26 @@ static int strcommon(const char *pathname)
struct map {
struct list_head node;
- __u64 start;
- __u64 end;
- __u64 pgoff;
- __u64 (*map_ip)(struct map *, __u64);
+ u64 start;
+ u64 end;
+ u64 pgoff;
+ u64 (*map_ip)(struct map *, u64);
struct dso *dso;
};
-static __u64 map__map_ip(struct map *map, __u64 ip)
+static u64 map__map_ip(struct map *map, u64 ip)
{
return ip - map->start + map->pgoff;
}
-static __u64 vdso__map_ip(struct map *map, __u64 ip)
+static u64 vdso__map_ip(struct map *map, u64 ip)
{
return ip;
}
static inline int is_anon_memory(const char *filename)
{
- return strcmp(filename, "//anon") == 0;
+ return strcmp(filename, "//anon") == 0;
}
static struct map *map__new(struct mmap_event *event)
@@ -383,9 +411,27 @@ static void thread__insert_map(struct thread *self, struct map *map)
list_for_each_entry_safe(pos, tmp, &self->maps, node) {
if (map__overlap(pos, map)) {
- list_del_init(&pos->node);
- /* XXX leaks dsos */
- free(pos);
+ if (verbose >= 2) {
+ printf("overlapping maps:\n");
+ map__fprintf(map, stdout);
+ map__fprintf(pos, stdout);
+ }
+
+ if (map->start <= pos->start && map->end > pos->start)
+ pos->start = map->end;
+
+ if (map->end >= pos->end && map->start < pos->end)
+ pos->end = map->start;
+
+ if (verbose >= 2) {
+ printf("after collision:\n");
+ map__fprintf(pos, stdout);
+ }
+
+ if (pos->start >= pos->end) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
}
}
@@ -412,7 +458,7 @@ static int thread__fork(struct thread *self, struct thread *parent)
return 0;
}
-static struct map *thread__find_map(struct thread *self, __u64 ip)
+static struct map *thread__find_map(struct thread *self, u64 ip)
{
struct map *pos;
@@ -447,16 +493,19 @@ static size_t threads__fprintf(FILE *fp)
static struct rb_root hist;
struct hist_entry {
- struct rb_node rb_node;
-
- struct thread *thread;
- struct map *map;
- struct dso *dso;
- struct symbol *sym;
- __u64 ip;
- char level;
-
- __u64 count;
+ struct rb_node rb_node;
+
+ struct thread *thread;
+ struct map *map;
+ struct dso *dso;
+ struct symbol *sym;
+ struct symbol *parent;
+ u64 ip;
+ char level;
+ struct callchain_node callchain;
+ struct rb_root sorted_chain;
+
+ u64 count;
};
/*
@@ -473,6 +522,16 @@ struct sort_entry {
size_t (*print)(FILE *fp, struct hist_entry *);
};
+static int64_t cmp_null(void *l, void *r)
+{
+ if (!l && !r)
+ return 0;
+ else if (!l)
+ return -1;
+ else
+ return 1;
+}
+
/* --sort pid */
static int64_t
@@ -507,14 +566,8 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
char *comm_l = left->thread->comm;
char *comm_r = right->thread->comm;
- if (!comm_l || !comm_r) {
- if (!comm_l && !comm_r)
- return 0;
- else if (!comm_l)
- return -1;
- else
- return 1;
- }
+ if (!comm_l || !comm_r)
+ return cmp_null(comm_l, comm_r);
return strcmp(comm_l, comm_r);
}
@@ -540,14 +593,8 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
struct dso *dso_l = left->dso;
struct dso *dso_r = right->dso;
- if (!dso_l || !dso_r) {
- if (!dso_l && !dso_r)
- return 0;
- else if (!dso_l)
- return -1;
- else
- return 1;
- }
+ if (!dso_l || !dso_r)
+ return cmp_null(dso_l, dso_r);
return strcmp(dso_l->name, dso_r->name);
}
@@ -558,7 +605,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self)
if (self->dso)
return fprintf(fp, "%-25s", self->dso->name);
- return fprintf(fp, "%016llx ", (__u64)self->ip);
+ return fprintf(fp, "%016llx ", (u64)self->ip);
}
static struct sort_entry sort_dso = {
@@ -572,7 +619,7 @@ static struct sort_entry sort_dso = {
static int64_t
sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
{
- __u64 ip_l, ip_r;
+ u64 ip_l, ip_r;
if (left->sym == right->sym)
return 0;
@@ -589,13 +636,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
size_t ret = 0;
if (verbose)
- ret += fprintf(fp, "%#018llx ", (__u64)self->ip);
+ ret += fprintf(fp, "%#018llx ", (u64)self->ip);
if (self->sym) {
ret += fprintf(fp, "[%c] %s",
self->dso == kernel_dso ? 'k' : '.', self->sym->name);
} else {
- ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+ ret += fprintf(fp, "%#016llx", (u64)self->ip);
}
return ret;
@@ -607,7 +654,38 @@ static struct sort_entry sort_sym = {
.print = sort__sym_print,
};
+/* --sort parent */
+
+static int64_t
+sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct symbol *sym_l = left->parent;
+ struct symbol *sym_r = right->parent;
+
+ if (!sym_l || !sym_r)
+ return cmp_null(sym_l, sym_r);
+
+ return strcmp(sym_l->name, sym_r->name);
+}
+
+static size_t
+sort__parent_print(FILE *fp, struct hist_entry *self)
+{
+ size_t ret = 0;
+
+ ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]");
+
+ return ret;
+}
+
+static struct sort_entry sort_parent = {
+ .header = "Parent symbol ",
+ .cmp = sort__parent_cmp,
+ .print = sort__parent_print,
+};
+
static int sort__need_collapse = 0;
+static int sort__has_parent = 0;
struct sort_dimension {
char *name;
@@ -620,6 +698,7 @@ static struct sort_dimension sort_dimensions[] = {
{ .name = "comm", .entry = &sort_comm, },
{ .name = "dso", .entry = &sort_dso, },
{ .name = "symbol", .entry = &sort_sym, },
+ { .name = "parent", .entry = &sort_parent, },
};
static LIST_HEAD(hist_entry__sort_list);
@@ -640,6 +719,19 @@ static int sort_dimension__add(char *tok)
if (sd->entry->collapse)
sort__need_collapse = 1;
+ if (sd->entry == &sort_parent) {
+ int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
+ if (ret) {
+ char err[BUFSIZ];
+
+ regerror(ret, &parent_regex, err, sizeof(err));
+ fprintf(stderr, "Invalid regex: %s\n%s",
+ parent_pattern, err);
+ exit(-1);
+ }
+ sort__has_parent = 1;
+ }
+
list_add_tail(&sd->entry->list, &hist_entry__sort_list);
sd->taken = 1;
@@ -684,11 +776,56 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
}
static size_t
-hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples)
+callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples)
+{
+ struct callchain_list *chain;
+ size_t ret = 0;
+
+ if (!self)
+ return 0;
+
+ ret += callchain__fprintf(fp, self->parent, total_samples);
+
+
+ list_for_each_entry(chain, &self->val, list)
+ ret += fprintf(fp, " %p\n", (void *)chain->ip);
+
+ return ret;
+}
+
+static size_t
+hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
+ u64 total_samples)
+{
+ struct rb_node *rb_node;
+ struct callchain_node *chain;
+ size_t ret = 0;
+
+ rb_node = rb_first(&self->sorted_chain);
+ while (rb_node) {
+ double percent;
+
+ chain = rb_entry(rb_node, struct callchain_node, rb_node);
+ percent = chain->hit * 100.0 / total_samples;
+ ret += fprintf(fp, " %6.2f%%\n", percent);
+ ret += callchain__fprintf(fp, chain, total_samples);
+ ret += fprintf(fp, "\n");
+ rb_node = rb_next(rb_node);
+ }
+
+ return ret;
+}
+
+
+static size_t
+hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
{
struct sort_entry *se;
size_t ret;
+ if (exclude_other && !self->parent)
+ return 0;
+
if (total_samples) {
double percent = self->count * 100.0 / total_samples;
char *color = PERF_COLOR_NORMAL;
@@ -711,22 +848,89 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples)
ret = fprintf(fp, "%12Ld ", self->count);
list_for_each_entry(se, &hist_entry__sort_list, list) {
+ if (exclude_other && (se == &sort_parent))
+ continue;
+
fprintf(fp, " ");
ret += se->print(fp, self);
}
ret += fprintf(fp, "\n");
+ if (callchain)
+ hist_entry_callchain__fprintf(fp, self, total_samples);
+
return ret;
}
/*
+ *
+ */
+
+static struct symbol *
+resolve_symbol(struct thread *thread, struct map **mapp,
+ struct dso **dsop, u64 *ipp)
+{
+ struct dso *dso = dsop ? *dsop : NULL;
+ struct map *map = mapp ? *mapp : NULL;
+ u64 ip = *ipp;
+
+ if (!thread)
+ return NULL;
+
+ if (dso)
+ goto got_dso;
+
+ if (map)
+ goto got_map;
+
+ map = thread__find_map(thread, ip);
+ if (map != NULL) {
+ if (mapp)
+ *mapp = map;
+got_map:
+ ip = map->map_ip(map, ip);
+
+ dso = map->dso;
+ } else {
+ /*
+ * If this is outside of all known maps,
+ * and is a negative address, try to look it
+ * up in the kernel dso, as it might be a
+ * vsyscall (which executes in user-mode):
+ */
+ if ((long long)ip < 0)
+ dso = kernel_dso;
+ }
+ dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+ dprintf(" ...... map: %Lx -> %Lx\n", *ipp, ip);
+ *ipp = ip;
+
+ if (dsop)
+ *dsop = dso;
+
+ if (!dso)
+ return NULL;
+got_dso:
+ return dso->find_symbol(dso, ip);
+}
+
+static int call__match(struct symbol *sym)
+{
+ if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+ return 1;
+
+ return 0;
+}
+
+/*
* collect histogram counts
*/
static int
hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
- struct symbol *sym, __u64 ip, char level, __u64 count)
+ struct symbol *sym, u64 ip, struct ip_callchain *chain,
+ char level, u64 count)
{
struct rb_node **p = &hist.rb_node;
struct rb_node *parent = NULL;
@@ -739,9 +943,42 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
.ip = ip,
.level = level,
.count = count,
+ .parent = NULL,
+ .sorted_chain = RB_ROOT
};
int cmp;
+ if (sort__has_parent && chain) {
+ u64 context = PERF_CONTEXT_MAX;
+ int i;
+
+ for (i = 0; i < chain->nr; i++) {
+ u64 ip = chain->ips[i];
+ struct dso *dso = NULL;
+ struct symbol *sym;
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ context = ip;
+ continue;
+ }
+
+ switch (context) {
+ case PERF_CONTEXT_KERNEL:
+ dso = kernel_dso;
+ break;
+ default:
+ break;
+ }
+
+ sym = resolve_symbol(thread, NULL, &dso, &ip);
+
+ if (sym && call__match(sym)) {
+ entry.parent = sym;
+ break;
+ }
+ }
+ }
+
while (*p != NULL) {
parent = *p;
he = rb_entry(parent, struct hist_entry, rb_node);
@@ -750,6 +987,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
if (!cmp) {
he->count += count;
+ if (callchain)
+ append_chain(&he->callchain, chain);
return 0;
}
@@ -763,6 +1002,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
if (!he)
return -ENOMEM;
*he = entry;
+ if (callchain) {
+ callchain_init(&he->callchain);
+ append_chain(&he->callchain, chain);
+ }
rb_link_node(&he->rb_node, parent, p);
rb_insert_color(&he->rb_node, &hist);
@@ -839,6 +1082,9 @@ static void output__insert_entry(struct hist_entry *he)
struct rb_node *parent = NULL;
struct hist_entry *iter;
+ if (callchain)
+ sort_chain_to_rbtree(&he->sorted_chain, &he->callchain);
+
while (*p != NULL) {
parent = *p;
iter = rb_entry(parent, struct hist_entry, rb_node);
@@ -873,7 +1119,7 @@ static void output__resort(void)
}
}
-static size_t output__fprintf(FILE *fp, __u64 total_samples)
+static size_t output__fprintf(FILE *fp, u64 total_samples)
{
struct hist_entry *pos;
struct sort_entry *se;
@@ -882,18 +1128,24 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples)
fprintf(fp, "\n");
fprintf(fp, "#\n");
- fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples);
+ fprintf(fp, "# (%Ld samples)\n", (u64)total_samples);
fprintf(fp, "#\n");
fprintf(fp, "# Overhead");
- list_for_each_entry(se, &hist_entry__sort_list, list)
+ list_for_each_entry(se, &hist_entry__sort_list, list) {
+ if (exclude_other && (se == &sort_parent))
+ continue;
fprintf(fp, " %s", se->header);
+ }
fprintf(fp, "\n");
fprintf(fp, "# ........");
list_for_each_entry(se, &hist_entry__sort_list, list) {
int i;
+ if (exclude_other && (se == &sort_parent))
+ continue;
+
fprintf(fp, " ");
for (i = 0; i < strlen(se->header); i++)
fprintf(fp, ".");
@@ -907,7 +1159,8 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples)
ret += hist_entry__fprintf(fp, pos, total_samples);
}
- if (!strcmp(sort_order, default_sort_order)) {
+ if (sort_order == default_sort_order &&
+ parent_pattern == default_parent_pattern) {
fprintf(fp, "#\n");
fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n");
fprintf(fp, "#\n");
@@ -932,23 +1185,41 @@ static unsigned long total = 0,
total_mmap = 0,
total_comm = 0,
total_fork = 0,
- total_unknown = 0;
+ total_unknown = 0,
+ total_lost = 0;
+
+static int validate_chain(struct ip_callchain *chain, event_t *event)
+{
+ unsigned int chain_size;
+
+ chain_size = event->header.size;
+ chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
+
+ if (chain->nr*sizeof(u64) > chain_size)
+ return -1;
+
+ return 0;
+}
static int
-process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+process_sample_event(event_t *event, unsigned long offset, unsigned long head)
{
char level;
int show = 0;
struct dso *dso = NULL;
struct thread *thread = threads__findnew(event->ip.pid);
- __u64 ip = event->ip.ip;
- __u64 period = 1;
+ u64 ip = event->ip.ip;
+ u64 period = 1;
struct map *map = NULL;
+ void *more_data = event->ip.__more_data;
+ struct ip_callchain *chain = NULL;
- if (event->header.type & PERF_SAMPLE_PERIOD)
- period = event->ip.period;
+ if (sample_type & PERF_SAMPLE_PERIOD) {
+ period = *(u64 *)more_data;
+ more_data += sizeof(u64);
+ }
- dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n",
+ dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n",
(void *)(offset + head),
(void *)(long)(event->header.size),
event->header.misc,
@@ -956,14 +1227,35 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
(void *)(long)ip,
(long long)period);
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+ int i;
+
+ chain = (void *)more_data;
+
+ dprintf("... chain: nr:%Lu\n", chain->nr);
+
+ if (validate_chain(chain, event) < 0) {
+ eprintf("call-chain problem with event, skipping it.\n");
+ return 0;
+ }
+
+ if (dump_trace) {
+ for (i = 0; i < chain->nr; i++)
+ dprintf("..... %2d: %016Lx\n", i, chain->ips[i]);
+ }
+ }
+
dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
if (thread == NULL) {
- fprintf(stderr, "problem processing %d event, skipping it.\n",
+ eprintf("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
}
+ if (comm_list && !strlist__has_entry(comm_list, thread->comm))
+ return 0;
+
if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
show = SHOW_KERNEL;
level = 'k';
@@ -977,22 +1269,6 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
show = SHOW_USER;
level = '.';
- map = thread__find_map(thread, ip);
- if (map != NULL) {
- ip = map->map_ip(map, ip);
- dso = map->dso;
- } else {
- /*
- * If this is outside of all known maps,
- * and is a negative address, try to look it
- * up in the kernel dso, as it might be a
- * vsyscall (which executes in user-mode):
- */
- if ((long long)ip < 0)
- dso = kernel_dso;
- }
- dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
-
} else {
show = SHOW_HV;
level = 'H';
@@ -1000,14 +1276,16 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
}
if (show & show_mask) {
- struct symbol *sym = NULL;
+ struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
- if (dso)
- sym = dso->find_symbol(dso, ip);
+ if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name))
+ return 0;
+
+ if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
+ return 0;
- if (hist_entry__add(thread, map, dso, sym, ip, level, period)) {
- fprintf(stderr,
- "problem incrementing symbol count, skipping event\n");
+ if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
+ eprintf("problem incrementing symbol count, skipping event\n");
return -1;
}
}
@@ -1096,12 +1374,77 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head)
}
static int
+process_lost_event(event_t *event, unsigned long offset, unsigned long head)
+{
+ dprintf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n",
+ (void *)(offset + head),
+ (void *)(long)(event->header.size),
+ event->lost.id,
+ event->lost.lost);
+
+ total_lost += event->lost.lost;
+
+ return 0;
+}
+
+static void trace_event(event_t *event)
+{
+ unsigned char *raw_event = (void *)event;
+ char *color = PERF_COLOR_BLUE;
+ int i, j;
+
+ if (!dump_trace)
+ return;
+
+ dprintf(".");
+ cdprintf("\n. ... raw event: size %d bytes\n", event->header.size);
+
+ for (i = 0; i < event->header.size; i++) {
+ if ((i & 15) == 0) {
+ dprintf(".");
+ cdprintf(" %04x: ", i);
+ }
+
+ cdprintf(" %02x", raw_event[i]);
+
+ if (((i & 15) == 15) || i == event->header.size-1) {
+ cdprintf(" ");
+ for (j = 0; j < 15-(i & 15); j++)
+ cdprintf(" ");
+ for (j = 0; j < (i & 15); j++) {
+ if (isprint(raw_event[i-15+j]))
+ cdprintf("%c", raw_event[i-15+j]);
+ else
+ cdprintf(".");
+ }
+ cdprintf("\n");
+ }
+ }
+ dprintf(".\n");
+}
+
+static int
+process_read_event(event_t *event, unsigned long offset, unsigned long head)
+{
+ dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n",
+ (void *)(offset + head),
+ (void *)(long)(event->header.size),
+ event->read.pid,
+ event->read.tid,
+ event->read.value);
+
+ return 0;
+}
+
+static int
process_event(event_t *event, unsigned long offset, unsigned long head)
{
- if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
- return process_overflow_event(event, offset, head);
+ trace_event(event);
switch (event->header.type) {
+ case PERF_EVENT_SAMPLE:
+ return process_sample_event(event, offset, head);
+
case PERF_EVENT_MMAP:
return process_mmap_event(event, offset, head);
@@ -1113,6 +1456,13 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
case PERF_EVENT_PERIOD:
return process_period_event(event, offset, head);
+
+ case PERF_EVENT_LOST:
+ return process_lost_event(event, offset, head);
+
+ case PERF_EVENT_READ:
+ return process_read_event(event, offset, head);
+
/*
* We dont process them right now but they are fine:
*/
@@ -1128,11 +1478,30 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
return 0;
}
+static struct perf_header *header;
+
+static u64 perf_header__sample_type(void)
+{
+ u64 sample_type = 0;
+ int i;
+
+ for (i = 0; i < header->attrs; i++) {
+ struct perf_header_attr *attr = header->attr[i];
+
+ if (!sample_type)
+ sample_type = attr->attr.sample_type;
+ else if (sample_type != attr->attr.sample_type)
+ die("non matching sample_type");
+ }
+
+ return sample_type;
+}
+
static int __cmd_report(void)
{
int ret, rc = EXIT_FAILURE;
unsigned long offset = 0;
- unsigned long head = 0;
+ unsigned long head, shift;
struct stat stat;
event_t *event;
uint32_t size;
@@ -1160,6 +1529,16 @@ static int __cmd_report(void)
exit(0);
}
+ header = perf_header__read(input);
+ head = header->data_offset;
+
+ sample_type = perf_header__sample_type();
+
+ if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+ fprintf(stderr, "selected --sort parent, but no callchain data\n");
+ exit(-1);
+ }
+
if (load_kernel() < 0) {
perror("failed to load kernel symbols");
return EXIT_FAILURE;
@@ -1175,6 +1554,11 @@ static int __cmd_report(void)
cwd = NULL;
cwdlen = 0;
}
+
+ shift = page_size * (head / page_size);
+ offset += shift;
+ head -= shift;
+
remap:
buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
MAP_SHARED, input, offset);
@@ -1191,9 +1575,10 @@ more:
size = 8;
if (head + event->header.size >= page_size * mmap_window) {
- unsigned long shift = page_size * (head / page_size);
int ret;
+ shift = page_size * (head / page_size);
+
ret = munmap(buf, page_size * mmap_window);
assert(ret == 0);
@@ -1204,7 +1589,7 @@ more:
size = event->header.size;
- dprintf("%p [%p]: event: %d\n",
+ dprintf("\n%p [%p]: event: %d\n",
(void *)(offset + head),
(void *)(long)event->header.size,
event->header.type);
@@ -1231,9 +1616,13 @@ more:
head += size;
+ if (offset + head >= header->data_offset + header->data_size)
+ goto done;
+
if (offset + head < stat.st_size)
goto more;
+done:
rc = EXIT_SUCCESS;
close(input);
@@ -1241,6 +1630,7 @@ more:
dprintf(" mmap events: %10ld\n", total_mmap);
dprintf(" comm events: %10ld\n", total_comm);
dprintf(" fork events: %10ld\n", total_fork);
+ dprintf(" lost events: %10ld\n", total_lost);
dprintf(" unknown events: %10ld\n", total_unknown);
if (dump_trace)
@@ -1273,9 +1663,20 @@ static const struct option options[] = {
"dump raw trace in ASCII"),
OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"),
+ "sort by key(s): pid, comm, dso, symbol, parent"),
OPT_BOOLEAN('P', "full-paths", &full_paths,
"Don't shorten the pathnames taking into account the cwd"),
+ OPT_STRING('p', "parent", &parent_pattern, "regex",
+ "regex filter to identify parent, see: '--sort parent'"),
+ OPT_BOOLEAN('x', "exclude-other", &exclude_other,
+ "Only display entries with parent-match"),
+ OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"),
+ OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]",
+ "only consider symbols in these dsos"),
+ OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]",
+ "only consider symbols in these comms"),
+ OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]",
+ "only consider these symbols"),
OPT_END()
};
@@ -1294,6 +1695,19 @@ static void setup_sorting(void)
free(str);
}
+static void setup_list(struct strlist **list, const char *list_str,
+ const char *list_name)
+{
+ if (list_str) {
+ *list = strlist__new(true, list_str);
+ if (!*list) {
+ fprintf(stderr, "problems parsing %s list\n",
+ list_name);
+ exit(129);
+ }
+ }
+}
+
int cmd_report(int argc, const char **argv, const char *prefix)
{
symbol__init();
@@ -1304,12 +1718,21 @@ int cmd_report(int argc, const char **argv, const char *prefix)
setup_sorting();
+ if (parent_pattern != default_parent_pattern)
+ sort_dimension__add("parent");
+ else
+ exclude_other = 0;
+
/*
* Any (unrecognized) arguments left?
*/
if (argc)
usage_with_options(report_usage, options);
+ setup_list(&dso_list, dso_list_str, "dso");
+ setup_list(&comm_list, comm_list_str, "comm");
+ setup_list(&sym_list, sym_list_str, "symbol");
+
setup_pager();
return __cmd_report();
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c43e4a97dc42..2e03524a1de0 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -32,6 +32,7 @@
* Wu Fengguang <fengguang.wu@intel.com>
* Mike Galbraith <efault@gmx.de>
* Paul Mackerras <paulus@samba.org>
+ * Jaswinder Singh Rajput <jaswinder@kernel.org>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
@@ -43,8 +44,9 @@
#include "util/parse-events.h"
#include <sys/prctl.h>
+#include <math.h>
-static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
+static struct perf_counter_attr default_attrs[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
@@ -58,35 +60,46 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
};
+#define MAX_RUN 100
+
static int system_wide = 0;
-static int inherit = 1;
static int verbose = 0;
+static int nr_cpus = 0;
+static int run_idx = 0;
+
+static int run_count = 1;
+static int inherit = 1;
+static int scale = 1;
+static int target_pid = -1;
+static int null_run = 0;
static int fd[MAX_NR_CPUS][MAX_COUNTERS];
-static int target_pid = -1;
-static int nr_cpus = 0;
-static unsigned int page_size;
+static u64 runtime_nsecs[MAX_RUN];
+static u64 walltime_nsecs[MAX_RUN];
+static u64 runtime_cycles[MAX_RUN];
-static int scale = 1;
+static u64 event_res[MAX_RUN][MAX_COUNTERS][3];
+static u64 event_scaled[MAX_RUN][MAX_COUNTERS];
-static const unsigned int default_count[] = {
- 1000000,
- 1000000,
- 10000,
- 10000,
- 1000000,
- 10000,
-};
+static u64 event_res_avg[MAX_COUNTERS][3];
+static u64 event_res_noise[MAX_COUNTERS][3];
+
+static u64 event_scaled_avg[MAX_COUNTERS];
-static __u64 event_res[MAX_COUNTERS][3];
-static __u64 event_scaled[MAX_COUNTERS];
+static u64 runtime_nsecs_avg;
+static u64 runtime_nsecs_noise;
-static __u64 runtime_nsecs;
-static __u64 walltime_nsecs;
-static __u64 runtime_cycles;
+static u64 walltime_nsecs_avg;
+static u64 walltime_nsecs_noise;
-static void create_perf_stat_counter(int counter)
+static u64 runtime_cycles_avg;
+static u64 runtime_cycles_noise;
+
+#define ERR_PERF_OPEN \
+"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
+
+static void create_perf_stat_counter(int counter, int pid)
{
struct perf_counter_attr *attr = attrs + counter;
@@ -96,20 +109,21 @@ static void create_perf_stat_counter(int counter)
if (system_wide) {
int cpu;
- for (cpu = 0; cpu < nr_cpus; cpu ++) {
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
- if (fd[cpu][counter] < 0 && verbose) {
- printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno));
- }
+ if (fd[cpu][counter] < 0 && verbose)
+ fprintf(stderr, ERR_PERF_OPEN, counter,
+ fd[cpu][counter], strerror(errno));
}
} else {
- attr->inherit = inherit;
- attr->disabled = 1;
-
- fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
- if (fd[0][counter] < 0 && verbose) {
- printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno));
- }
+ attr->inherit = inherit;
+ attr->disabled = 1;
+ attr->enable_on_exec = 1;
+
+ fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
+ if (fd[0][counter] < 0 && verbose)
+ fprintf(stderr, ERR_PERF_OPEN, counter,
+ fd[0][counter], strerror(errno));
}
}
@@ -135,22 +149,24 @@ static inline int nsec_counter(int counter)
*/
static void read_counter(int counter)
{
- __u64 *count, single_count[3];
+ u64 *count, single_count[3];
ssize_t res;
int cpu, nv;
int scaled;
- count = event_res[counter];
+ count = event_res[run_idx][counter];
count[0] = count[1] = count[2] = 0;
nv = scale ? 3 : 1;
- for (cpu = 0; cpu < nr_cpus; cpu ++) {
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
if (fd[cpu][counter] < 0)
continue;
- res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
- assert(res == nv * sizeof(__u64));
+ res = read(fd[cpu][counter], single_count, nv * sizeof(u64));
+ assert(res == nv * sizeof(u64));
+ close(fd[cpu][counter]);
+ fd[cpu][counter] = -1;
count[0] += single_count[0];
if (scale) {
@@ -162,13 +178,13 @@ static void read_counter(int counter)
scaled = 0;
if (scale) {
if (count[2] == 0) {
- event_scaled[counter] = -1;
+ event_scaled[run_idx][counter] = -1;
count[0] = 0;
return;
}
if (count[2] < count[1]) {
- event_scaled[counter] = 1;
+ event_scaled[run_idx][counter] = 1;
count[0] = (unsigned long long)
((double)count[0] * count[1] / count[2] + 0.5);
}
@@ -178,10 +194,129 @@ static void read_counter(int counter)
*/
if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
- runtime_nsecs = count[0];
+ runtime_nsecs[run_idx] = count[0];
if (attrs[counter].type == PERF_TYPE_HARDWARE &&
attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
- runtime_cycles = count[0];
+ runtime_cycles[run_idx] = count[0];
+}
+
+static int run_perf_stat(int argc, const char **argv)
+{
+ unsigned long long t0, t1;
+ int status = 0;
+ int counter;
+ int pid;
+ int child_ready_pipe[2], go_pipe[2];
+ char buf;
+
+ if (!system_wide)
+ nr_cpus = 1;
+
+ if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) {
+ perror("failed to create pipes");
+ exit(1);
+ }
+
+ if ((pid = fork()) < 0)
+ perror("failed to fork");
+
+ if (!pid) {
+ close(child_ready_pipe[0]);
+ close(go_pipe[1]);
+ fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+ /*
+ * Do a dummy execvp to get the PLT entry resolved,
+ * so we avoid the resolver overhead on the real
+ * execvp call.
+ */
+ execvp("", (char **)argv);
+
+ /*
+ * Tell the parent we're ready to go
+ */
+ close(child_ready_pipe[1]);
+
+ /*
+ * Wait until the parent tells us to go.
+ */
+ read(go_pipe[0], &buf, 1);
+
+ execvp(argv[0], (char **)argv);
+
+ perror(argv[0]);
+ exit(-1);
+ }
+
+ /*
+ * Wait for the child to be ready to exec.
+ */
+ close(child_ready_pipe[1]);
+ close(go_pipe[0]);
+ read(child_ready_pipe[0], &buf, 1);
+ close(child_ready_pipe[0]);
+
+ for (counter = 0; counter < nr_counters; counter++)
+ create_perf_stat_counter(counter, pid);
+
+ /*
+ * Enable counters and exec the command:
+ */
+ t0 = rdclock();
+
+ close(go_pipe[1]);
+ wait(&status);
+
+ t1 = rdclock();
+
+ walltime_nsecs[run_idx] = t1 - t0;
+
+ for (counter = 0; counter < nr_counters; counter++)
+ read_counter(counter);
+
+ return WEXITSTATUS(status);
+}
+
+static void print_noise(u64 *count, u64 *noise)
+{
+ if (run_count > 1)
+ fprintf(stderr, " ( +- %7.3f%% )",
+ (double)noise[0]/(count[0]+1)*100.0);
+}
+
+static void nsec_printout(int counter, u64 *count, u64 *noise)
+{
+ double msecs = (double)count[0] / 1000000;
+
+ fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter));
+
+ if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+ attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
+
+ if (walltime_nsecs_avg)
+ fprintf(stderr, " # %10.3f CPUs ",
+ (double)count[0] / (double)walltime_nsecs_avg);
+ }
+ print_noise(count, noise);
+}
+
+static void abs_printout(int counter, u64 *count, u64 *noise)
+{
+ fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter));
+
+ if (runtime_cycles_avg &&
+ attrs[counter].type == PERF_TYPE_HARDWARE &&
+ attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
+
+ fprintf(stderr, " # %10.3f IPC ",
+ (double)count[0] / (double)runtime_cycles_avg);
+ } else {
+ if (runtime_nsecs_avg) {
+ fprintf(stderr, " # %10.3f M/sec",
+ (double)count[0]/runtime_nsecs_avg*1000.0);
+ }
+ }
+ print_noise(count, noise);
}
/*
@@ -189,87 +324,123 @@ static void read_counter(int counter)
*/
static void print_counter(int counter)
{
- __u64 *count;
+ u64 *count, *noise;
int scaled;
- count = event_res[counter];
- scaled = event_scaled[counter];
+ count = event_res_avg[counter];
+ noise = event_res_noise[counter];
+ scaled = event_scaled_avg[counter];
if (scaled == -1) {
- fprintf(stderr, " %14s %-20s\n",
+ fprintf(stderr, " %14s %-24s\n",
"<not counted>", event_name(counter));
return;
}
- if (nsec_counter(counter)) {
- double msecs = (double)count[0] / 1000000;
-
- fprintf(stderr, " %14.6f %-20s",
- msecs, event_name(counter));
- if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
- attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
+ if (nsec_counter(counter))
+ nsec_printout(counter, count, noise);
+ else
+ abs_printout(counter, count, noise);
- if (walltime_nsecs)
- fprintf(stderr, " # %11.3f CPU utilization factor",
- (double)count[0] / (double)walltime_nsecs);
- }
- } else {
- fprintf(stderr, " %14Ld %-20s",
- count[0], event_name(counter));
- if (runtime_nsecs)
- fprintf(stderr, " # %11.3f M/sec",
- (double)count[0]/runtime_nsecs*1000.0);
- if (runtime_cycles &&
- attrs[counter].type == PERF_TYPE_HARDWARE &&
- attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
-
- fprintf(stderr, " # %1.3f per cycle",
- (double)count[0] / (double)runtime_cycles);
- }
- }
if (scaled)
fprintf(stderr, " (scaled from %.2f%%)",
(double) count[2] / count[1] * 100);
+
fprintf(stderr, "\n");
}
-static int do_perf_stat(int argc, const char **argv)
+/*
+ * normalize_noise noise values down to stddev:
+ */
+static void normalize_noise(u64 *val)
{
- unsigned long long t0, t1;
- int counter;
- int status;
- int pid;
- int i;
+ double res;
- if (!system_wide)
- nr_cpus = 1;
+ res = (double)*val / (run_count * sqrt((double)run_count));
- for (counter = 0; counter < nr_counters; counter++)
- create_perf_stat_counter(counter);
+ *val = (u64)res;
+}
- /*
- * Enable counters and exec the command:
- */
- t0 = rdclock();
- prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+static void update_avg(const char *name, int idx, u64 *avg, u64 *val)
+{
+ *avg += *val;
- if ((pid = fork()) < 0)
- perror("failed to fork");
+ if (verbose > 1)
+ fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val);
+}
+/*
+ * Calculate the averages and noises:
+ */
+static void calc_avg(void)
+{
+ int i, j;
+
+ if (verbose > 1)
+ fprintf(stderr, "\n");
+
+ for (i = 0; i < run_count; i++) {
+ update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i);
+ update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i);
+ update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i);
+
+ for (j = 0; j < nr_counters; j++) {
+ update_avg("counter/0", j,
+ event_res_avg[j]+0, event_res[i][j]+0);
+ update_avg("counter/1", j,
+ event_res_avg[j]+1, event_res[i][j]+1);
+ update_avg("counter/2", j,
+ event_res_avg[j]+2, event_res[i][j]+2);
+ if (event_scaled[i][j] != -1)
+ update_avg("scaled", j,
+ event_scaled_avg + j, event_scaled[i]+j);
+ else
+ event_scaled_avg[j] = -1;
+ }
+ }
+ runtime_nsecs_avg /= run_count;
+ walltime_nsecs_avg /= run_count;
+ runtime_cycles_avg /= run_count;
+
+ for (j = 0; j < nr_counters; j++) {
+ event_res_avg[j][0] /= run_count;
+ event_res_avg[j][1] /= run_count;
+ event_res_avg[j][2] /= run_count;
+ }
- if (!pid) {
- if (execvp(argv[0], (char **)argv)) {
- perror(argv[0]);
- exit(-1);
+ for (i = 0; i < run_count; i++) {
+ runtime_nsecs_noise +=
+ abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg));
+ walltime_nsecs_noise +=
+ abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg));
+ runtime_cycles_noise +=
+ abs((s64)(runtime_cycles[i] - runtime_cycles_avg));
+
+ for (j = 0; j < nr_counters; j++) {
+ event_res_noise[j][0] +=
+ abs((s64)(event_res[i][j][0] - event_res_avg[j][0]));
+ event_res_noise[j][1] +=
+ abs((s64)(event_res[i][j][1] - event_res_avg[j][1]));
+ event_res_noise[j][2] +=
+ abs((s64)(event_res[i][j][2] - event_res_avg[j][2]));
}
}
- while (wait(&status) >= 0)
- ;
+ normalize_noise(&runtime_nsecs_noise);
+ normalize_noise(&walltime_nsecs_noise);
+ normalize_noise(&runtime_cycles_noise);
- prctl(PR_TASK_PERF_COUNTERS_DISABLE);
- t1 = rdclock();
+ for (j = 0; j < nr_counters; j++) {
+ normalize_noise(&event_res_noise[j][0]);
+ normalize_noise(&event_res_noise[j][1]);
+ normalize_noise(&event_res_noise[j][2]);
+ }
+}
- walltime_nsecs = t1 - t0;
+static void print_stat(int argc, const char **argv)
+{
+ int i, counter;
+
+ calc_avg();
fflush(stdout);
@@ -279,22 +450,22 @@ static int do_perf_stat(int argc, const char **argv)
for (i = 1; i < argc; i++)
fprintf(stderr, " %s", argv[i]);
- fprintf(stderr, "\':\n");
- fprintf(stderr, "\n");
-
- for (counter = 0; counter < nr_counters; counter++)
- read_counter(counter);
+ fprintf(stderr, "\'");
+ if (run_count > 1)
+ fprintf(stderr, " (%d runs)", run_count);
+ fprintf(stderr, ":\n\n");
for (counter = 0; counter < nr_counters; counter++)
print_counter(counter);
-
fprintf(stderr, "\n");
- fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
- (double)(t1-t0)/1e6);
- fprintf(stderr, "\n");
-
- return 0;
+ fprintf(stderr, " %14.9f seconds time elapsed",
+ (double)walltime_nsecs_avg/1e9);
+ if (run_count > 1) {
+ fprintf(stderr, " ( +- %7.3f%% )",
+ 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg);
+ }
+ fprintf(stderr, "\n\n");
}
static volatile int signr = -1;
@@ -327,26 +498,33 @@ static const struct option options[] = {
OPT_INTEGER('p', "pid", &target_pid,
"stat events on existing pid"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
- "system-wide collection from all CPUs"),
+ "system-wide collection from all CPUs"),
OPT_BOOLEAN('S', "scale", &scale,
- "scale/normalize counters"),
+ "scale/normalize counters"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
+ OPT_INTEGER('r', "repeat", &run_count,
+ "repeat command and print average + stddev (max: 100)"),
+ OPT_BOOLEAN('n', "null", &null_run,
+ "null run - dont start any counters"),
OPT_END()
};
int cmd_stat(int argc, const char **argv, const char *prefix)
{
- page_size = sysconf(_SC_PAGE_SIZE);
-
- memcpy(attrs, default_attrs, sizeof(attrs));
+ int status;
argc = parse_options(argc, argv, options, stat_usage, 0);
if (!argc)
usage_with_options(stat_usage, options);
+ if (run_count <= 0 || run_count > MAX_RUN)
+ usage_with_options(stat_usage, options);
- if (!nr_counters)
- nr_counters = 8;
+ /* Set attrs and nr_counters if no event is selected and !null_run */
+ if (!null_run && !nr_counters) {
+ memcpy(attrs, default_attrs, sizeof(default_attrs));
+ nr_counters = ARRAY_SIZE(default_attrs);
+ }
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
@@ -363,5 +541,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
signal(SIGALRM, skip_signal);
signal(SIGABRT, skip_signal);
- return do_perf_stat(argc, argv);
+ status = 0;
+ for (run_idx = 0; run_idx < run_count; run_idx++) {
+ if (run_count != 1 && verbose)
+ fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
+ status = run_perf_stat(argc, argv);
+ }
+
+ print_stat(argc, argv);
+
+ return status;
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index fe338d3c5d7e..cf0d21f1ae10 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -54,7 +54,7 @@ static int system_wide = 0;
static int default_interval = 100000;
-static __u64 count_filter = 5;
+static u64 count_filter = 5;
static int print_entries = 15;
static int target_pid = -1;
@@ -79,8 +79,8 @@ static int dump_symtab;
* Symbols
*/
-static __u64 min_ip;
-static __u64 max_ip = -1ll;
+static u64 min_ip;
+static u64 max_ip = -1ll;
struct sym_entry {
struct rb_node rb_node;
@@ -194,7 +194,7 @@ static void print_sym_table(void)
100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
if (nr_counters == 1) {
- printf("%Ld", attrs[0].sample_period);
+ printf("%Ld", (u64)attrs[0].sample_period);
if (freq)
printf("Hz ");
else
@@ -372,7 +372,7 @@ out_delete_dso:
/*
* Binary search in the histogram table and record the hit:
*/
-static void record_ip(__u64 ip, int counter)
+static void record_ip(u64 ip, int counter)
{
struct symbol *sym = dso__find_symbol(kernel_dso, ip);
@@ -392,11 +392,11 @@ static void record_ip(__u64 ip, int counter)
samples--;
}
-static void process_event(__u64 ip, int counter)
+static void process_event(u64 ip, int counter, int user)
{
samples++;
- if (ip < min_ip || ip > max_ip) {
+ if (user) {
userspace_samples++;
return;
}
@@ -463,15 +463,15 @@ static void mmap_read_counter(struct mmap_data *md)
for (; old != head;) {
struct ip_event {
struct perf_event_header header;
- __u64 ip;
- __u32 pid, target_pid;
+ u64 ip;
+ u32 pid, target_pid;
};
struct mmap_event {
struct perf_event_header header;
- __u32 pid, target_pid;
- __u64 start;
- __u64 len;
- __u64 pgoff;
+ u32 pid, target_pid;
+ u64 start;
+ u64 len;
+ u64 pgoff;
char filename[PATH_MAX];
};
@@ -509,9 +509,10 @@ static void mmap_read_counter(struct mmap_data *md)
old += size;
- if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
- if (event->header.type & PERF_SAMPLE_IP)
- process_event(event->ip.ip, md->counter);
+ if (event->header.type == PERF_EVENT_SAMPLE) {
+ int user =
+ (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER;
+ process_event(event->ip.ip, md->counter, user);
}
}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 87a1aca4a424..8f729aedc1a3 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -13,12 +13,29 @@
#define cpu_relax() asm volatile ("" ::: "memory");
#endif
+#ifdef __s390__
+#include "../../arch/s390/include/asm/unistd.h"
+#define rmb() asm volatile("bcr 15,0" ::: "memory")
+#define cpu_relax() asm volatile("" ::: "memory");
+#endif
+
+#ifdef __sh__
+#include "../../arch/sh/include/asm/unistd.h"
+#if defined(__SH4A__) || defined(__SH5__)
+# define rmb() asm volatile("synco" ::: "memory")
+#else
+# define rmb() asm volatile("" ::: "memory")
+#endif
+#define cpu_relax() asm volatile("" ::: "memory")
+#endif
+
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include "../../include/linux/perf_counter.h"
+#include "util/types.h"
/*
* prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
@@ -65,4 +82,9 @@ sys_perf_counter_open(struct perf_counter_attr *attr,
#define MAX_COUNTERS 256
#define MAX_NR_CPUS 256
+struct ip_callchain {
+ u64 nr;
+ u64 ips[0];
+};
+
#endif
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
new file mode 100644
index 000000000000..ad3c28578961
--- /dev/null
+++ b/tools/perf/util/callchain.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Handle the callchains from the stream in an ad-hoc radix tree and then
+ * sort them in an rbtree.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include "callchain.h"
+
+
+static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct callchain_node *rnode;
+
+ while (*p) {
+ parent = *p;
+ rnode = rb_entry(parent, struct callchain_node, rb_node);
+
+ if (rnode->hit < chain->hit)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&chain->rb_node, parent, p);
+ rb_insert_color(&chain->rb_node, root);
+}
+
+/*
+ * Once we get every callchains from the stream, we can now
+ * sort them by hit
+ */
+void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node)
+{
+ struct callchain_node *child;
+
+ list_for_each_entry(child, &node->children, brothers)
+ sort_chain_to_rbtree(rb_root, child);
+
+ if (node->hit)
+ rb_insert_callchain(rb_root, node);
+}
+
+static struct callchain_node *create_child(struct callchain_node *parent)
+{
+ struct callchain_node *new;
+
+ new = malloc(sizeof(*new));
+ if (!new) {
+ perror("not enough memory to create child for code path tree");
+ return NULL;
+ }
+ new->parent = parent;
+ INIT_LIST_HEAD(&new->children);
+ INIT_LIST_HEAD(&new->val);
+ list_add_tail(&new->brothers, &parent->children);
+
+ return new;
+}
+
+static void
+fill_node(struct callchain_node *node, struct ip_callchain *chain, int start)
+{
+ int i;
+
+ for (i = start; i < chain->nr; i++) {
+ struct callchain_list *call;
+
+ call = malloc(sizeof(*chain));
+ if (!call) {
+ perror("not enough memory for the code path tree");
+ return;
+ }
+ call->ip = chain->ips[i];
+ list_add_tail(&call->list, &node->val);
+ }
+ node->val_nr = i - start;
+}
+
+static void add_child(struct callchain_node *parent, struct ip_callchain *chain)
+{
+ struct callchain_node *new;
+
+ new = create_child(parent);
+ fill_node(new, chain, parent->val_nr);
+
+ new->hit = 1;
+}
+
+static void
+split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
+ struct callchain_list *to_split, int idx)
+{
+ struct callchain_node *new;
+
+ /* split */
+ new = create_child(parent);
+ list_move_tail(&to_split->list, &new->val);
+ new->hit = parent->hit;
+ parent->hit = 0;
+ parent->val_nr = idx;
+
+ /* create the new one */
+ add_child(parent, chain);
+}
+
+static int
+__append_chain(struct callchain_node *root, struct ip_callchain *chain,
+ int start);
+
+static int
+__append_chain_children(struct callchain_node *root, struct ip_callchain *chain)
+{
+ struct callchain_node *rnode;
+
+ /* lookup in childrens */
+ list_for_each_entry(rnode, &root->children, brothers) {
+ int ret = __append_chain(rnode, chain, root->val_nr);
+ if (!ret)
+ return 0;
+ }
+ return -1;
+}
+
+static int
+__append_chain(struct callchain_node *root, struct ip_callchain *chain,
+ int start)
+{
+ struct callchain_list *cnode;
+ int i = start;
+ bool found = false;
+
+ /* lookup in the current node */
+ list_for_each_entry(cnode, &root->val, list) {
+ if (cnode->ip != chain->ips[i++])
+ break;
+ if (!found)
+ found = true;
+ if (i == chain->nr)
+ break;
+ }
+
+ /* matches not, relay on the parent */
+ if (!found)
+ return -1;
+
+ /* we match only a part of the node. Split it and add the new chain */
+ if (i < root->val_nr) {
+ split_add_child(root, chain, cnode, i);
+ return 0;
+ }
+
+ /* we match 100% of the path, increment the hit */
+ if (i == root->val_nr) {
+ root->hit++;
+ return 0;
+ }
+
+ return __append_chain_children(root, chain);
+}
+
+void append_chain(struct callchain_node *root, struct ip_callchain *chain)
+{
+ if (__append_chain_children(root, chain) == -1)
+ add_child(root, chain);
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
new file mode 100644
index 000000000000..fa1cd2f71fd3
--- /dev/null
+++ b/tools/perf/util/callchain.h
@@ -0,0 +1,33 @@
+#ifndef __PERF_CALLCHAIN_H
+#define __PERF_CALLCHAIN_H
+
+#include "../perf.h"
+#include "list.h"
+#include "rbtree.h"
+
+
+struct callchain_node {
+ struct callchain_node *parent;
+ struct list_head brothers;
+ struct list_head children;
+ struct list_head val;
+ struct rb_node rb_node;
+ int val_nr;
+ int hit;
+};
+
+struct callchain_list {
+ unsigned long ip;
+ struct list_head list;
+};
+
+static inline void callchain_init(struct callchain_node *node)
+{
+ INIT_LIST_HEAD(&node->brothers);
+ INIT_LIST_HEAD(&node->children);
+ INIT_LIST_HEAD(&node->val);
+}
+
+void append_chain(struct callchain_node *root, struct ip_callchain *chain);
+void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node);
+#endif
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index b90ec004f29c..0b791bd346bc 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c
@@ -11,16 +11,21 @@ enum {
D = GIT_DIGIT,
G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */
+ P = GIT_PRINT_EXTRA, /* printable - alpha - digit - glob - regex */
+
+ PS = GIT_SPACE | GIT_PRINT_EXTRA,
};
unsigned char sane_ctype[256] = {
+/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
+
0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
- S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */
- D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */
- 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
- A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */
- 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
- A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */
+ PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
+ D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
+ P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
+ A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P, /* 80.. 95 */
+ P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
+ A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */
/* Nothing in the 128.. range */
};
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
new file mode 100644
index 000000000000..450384b3bbe5
--- /dev/null
+++ b/tools/perf/util/header.c
@@ -0,0 +1,242 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "util.h"
+#include "header.h"
+
+/*
+ *
+ */
+
+struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr)
+{
+ struct perf_header_attr *self = malloc(sizeof(*self));
+
+ if (!self)
+ die("nomem");
+
+ self->attr = *attr;
+ self->ids = 0;
+ self->size = 1;
+ self->id = malloc(sizeof(u64));
+
+ if (!self->id)
+ die("nomem");
+
+ return self;
+}
+
+void perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
+{
+ int pos = self->ids;
+
+ self->ids++;
+ if (self->ids > self->size) {
+ self->size *= 2;
+ self->id = realloc(self->id, self->size * sizeof(u64));
+ if (!self->id)
+ die("nomem");
+ }
+ self->id[pos] = id;
+}
+
+/*
+ *
+ */
+
+struct perf_header *perf_header__new(void)
+{
+ struct perf_header *self = malloc(sizeof(*self));
+
+ if (!self)
+ die("nomem");
+
+ self->frozen = 0;
+
+ self->attrs = 0;
+ self->size = 1;
+ self->attr = malloc(sizeof(void *));
+
+ if (!self->attr)
+ die("nomem");
+
+ self->data_offset = 0;
+ self->data_size = 0;
+
+ return self;
+}
+
+void perf_header__add_attr(struct perf_header *self,
+ struct perf_header_attr *attr)
+{
+ int pos = self->attrs;
+
+ if (self->frozen)
+ die("frozen");
+
+ self->attrs++;
+ if (self->attrs > self->size) {
+ self->size *= 2;
+ self->attr = realloc(self->attr, self->size * sizeof(void *));
+ if (!self->attr)
+ die("nomem");
+ }
+ self->attr[pos] = attr;
+}
+
+static const char *__perf_magic = "PERFFILE";
+
+#define PERF_MAGIC (*(u64 *)__perf_magic)
+
+struct perf_file_section {
+ u64 offset;
+ u64 size;
+};
+
+struct perf_file_attr {
+ struct perf_counter_attr attr;
+ struct perf_file_section ids;
+};
+
+struct perf_file_header {
+ u64 magic;
+ u64 size;
+ u64 attr_size;
+ struct perf_file_section attrs;
+ struct perf_file_section data;
+};
+
+static void do_write(int fd, void *buf, size_t size)
+{
+ while (size) {
+ int ret = write(fd, buf, size);
+
+ if (ret < 0)
+ die("failed to write");
+
+ size -= ret;
+ buf += ret;
+ }
+}
+
+void perf_header__write(struct perf_header *self, int fd)
+{
+ struct perf_file_header f_header;
+ struct perf_file_attr f_attr;
+ struct perf_header_attr *attr;
+ int i;
+
+ lseek(fd, sizeof(f_header), SEEK_SET);
+
+
+ for (i = 0; i < self->attrs; i++) {
+ attr = self->attr[i];
+
+ attr->id_offset = lseek(fd, 0, SEEK_CUR);
+ do_write(fd, attr->id, attr->ids * sizeof(u64));
+ }
+
+
+ self->attr_offset = lseek(fd, 0, SEEK_CUR);
+
+ for (i = 0; i < self->attrs; i++) {
+ attr = self->attr[i];
+
+ f_attr = (struct perf_file_attr){
+ .attr = attr->attr,
+ .ids = {
+ .offset = attr->id_offset,
+ .size = attr->ids * sizeof(u64),
+ }
+ };
+ do_write(fd, &f_attr, sizeof(f_attr));
+ }
+
+
+ self->data_offset = lseek(fd, 0, SEEK_CUR);
+
+ f_header = (struct perf_file_header){
+ .magic = PERF_MAGIC,
+ .size = sizeof(f_header),
+ .attr_size = sizeof(f_attr),
+ .attrs = {
+ .offset = self->attr_offset,
+ .size = self->attrs * sizeof(f_attr),
+ },
+ .data = {
+ .offset = self->data_offset,
+ .size = self->data_size,
+ },
+ };
+
+ lseek(fd, 0, SEEK_SET);
+ do_write(fd, &f_header, sizeof(f_header));
+ lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+
+ self->frozen = 1;
+}
+
+static void do_read(int fd, void *buf, size_t size)
+{
+ while (size) {
+ int ret = read(fd, buf, size);
+
+ if (ret < 0)
+ die("failed to read");
+
+ size -= ret;
+ buf += ret;
+ }
+}
+
+struct perf_header *perf_header__read(int fd)
+{
+ struct perf_header *self = perf_header__new();
+ struct perf_file_header f_header;
+ struct perf_file_attr f_attr;
+ u64 f_id;
+
+ int nr_attrs, nr_ids, i, j;
+
+ lseek(fd, 0, SEEK_SET);
+ do_read(fd, &f_header, sizeof(f_header));
+
+ if (f_header.magic != PERF_MAGIC ||
+ f_header.size != sizeof(f_header) ||
+ f_header.attr_size != sizeof(f_attr))
+ die("incompatible file format");
+
+ nr_attrs = f_header.attrs.size / sizeof(f_attr);
+ lseek(fd, f_header.attrs.offset, SEEK_SET);
+
+ for (i = 0; i < nr_attrs; i++) {
+ struct perf_header_attr *attr;
+ off_t tmp = lseek(fd, 0, SEEK_CUR);
+
+ do_read(fd, &f_attr, sizeof(f_attr));
+
+ attr = perf_header_attr__new(&f_attr.attr);
+
+ nr_ids = f_attr.ids.size / sizeof(u64);
+ lseek(fd, f_attr.ids.offset, SEEK_SET);
+
+ for (j = 0; j < nr_ids; j++) {
+ do_read(fd, &f_id, sizeof(f_id));
+
+ perf_header_attr__add_id(attr, f_id);
+ }
+ perf_header__add_attr(self, attr);
+ lseek(fd, tmp, SEEK_SET);
+ }
+
+ self->data_offset = f_header.data.offset;
+ self->data_size = f_header.data.size;
+
+ lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+
+ self->frozen = 1;
+
+ return self;
+}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
new file mode 100644
index 000000000000..b5ef53ad4c7a
--- /dev/null
+++ b/tools/perf/util/header.h
@@ -0,0 +1,37 @@
+#ifndef _PERF_HEADER_H
+#define _PERF_HEADER_H
+
+#include "../../../include/linux/perf_counter.h"
+#include <sys/types.h>
+#include "types.h"
+
+struct perf_header_attr {
+ struct perf_counter_attr attr;
+ int ids, size;
+ u64 *id;
+ off_t id_offset;
+};
+
+struct perf_header {
+ int frozen;
+ int attrs, size;
+ struct perf_header_attr **attr;
+ off_t attr_offset;
+ u64 data_offset;
+ u64 data_size;
+};
+
+struct perf_header *perf_header__read(int fd);
+void perf_header__write(struct perf_header *self, int fd);
+
+void perf_header__add_attr(struct perf_header *self,
+ struct perf_header_attr *attr);
+
+struct perf_header_attr *
+perf_header_attr__new(struct perf_counter_attr *attr);
+void perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
+
+
+struct perf_header *perf_header__new(void);
+
+#endif /* _PERF_HEADER_H */
diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c
index 6653f7dd1d78..17a00e0df2c4 100644
--- a/tools/perf/util/help.c
+++ b/tools/perf/util/help.c
@@ -126,21 +126,6 @@ static int is_executable(const char *name)
!S_ISREG(st.st_mode))
return 0;
-#ifdef __MINGW32__
- /* cannot trust the executable bit, peek into the file instead */
- char buf[3] = { 0 };
- int n;
- int fd = open(name, O_RDONLY);
- st.st_mode &= ~S_IXUSR;
- if (fd >= 0) {
- n = read(fd, buf, 2);
- if (n == 2)
- /* DOS executables start with "MZ" */
- if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
- st.st_mode |= S_IXUSR;
- close(fd);
- }
-#endif
return st.st_mode & S_IXUSR;
}
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c
index a28bccae5458..1915de20dcac 100644
--- a/tools/perf/util/pager.c
+++ b/tools/perf/util/pager.c
@@ -9,7 +9,6 @@
static int spawned_pager;
-#ifndef __MINGW32__
static void pager_preexec(void)
{
/*
@@ -24,7 +23,6 @@ static void pager_preexec(void)
setenv("LESS", "FRSX", 0);
}
-#endif
static const char *pager_argv[] = { "sh", "-c", NULL, NULL };
static struct child_process pager_process;
@@ -70,9 +68,8 @@ void setup_pager(void)
pager_argv[2] = pager;
pager_process.argv = pager_argv;
pager_process.in = -1;
-#ifndef __MINGW32__
pager_process.preexec_cb = pager_preexec;
-#endif
+
if (start_command(&pager_process))
return;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5a72586e1df0..4d042f104cdc 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -13,35 +13,31 @@ int nr_counters;
struct perf_counter_attr attrs[MAX_COUNTERS];
struct event_symbol {
- __u8 type;
- __u64 config;
+ u8 type;
+ u64 config;
char *symbol;
+ char *alias;
};
-#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y
-#define CR(x, y) .type = PERF_TYPE_##x, .config = y
+#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
+#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
static struct event_symbol event_symbols[] = {
- { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", },
- { C(HARDWARE, HW_CPU_CYCLES), "cycles", },
- { C(HARDWARE, HW_INSTRUCTIONS), "instructions", },
- { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", },
- { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", },
- { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", },
- { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", },
- { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", },
- { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", },
-
- { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", },
- { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", },
- { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", },
- { C(SOFTWARE, SW_PAGE_FAULTS), "faults", },
- { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", },
- { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", },
- { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", },
- { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", },
- { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", },
- { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", },
+ { CHW(CPU_CYCLES), "cpu-cycles", "cycles" },
+ { CHW(INSTRUCTIONS), "instructions", "" },
+ { CHW(CACHE_REFERENCES), "cache-references", "" },
+ { CHW(CACHE_MISSES), "cache-misses", "" },
+ { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" },
+ { CHW(BRANCH_MISSES), "branch-misses", "" },
+ { CHW(BUS_CYCLES), "bus-cycles", "" },
+
+ { CSW(CPU_CLOCK), "cpu-clock", "" },
+ { CSW(TASK_CLOCK), "task-clock", "" },
+ { CSW(PAGE_FAULTS), "page-faults", "faults" },
+ { CSW(PAGE_FAULTS_MIN), "minor-faults", "" },
+ { CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
+ { CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
+ { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
};
#define __PERF_COUNTER_FIELD(config, name) \
@@ -63,8 +59,8 @@ static char *hw_event_names[] = {
};
static char *sw_event_names[] = {
- "cpu-clock-ticks",
- "task-clock-ticks",
+ "cpu-clock-msecs",
+ "task-clock-msecs",
"page-faults",
"context-switches",
"CPU-migrations",
@@ -74,29 +70,73 @@ static char *sw_event_names[] = {
#define MAX_ALIASES 8
-static char *hw_cache [][MAX_ALIASES] = {
- { "L1-data" , "l1-d", "l1d" },
- { "L1-instruction" , "l1-i", "l1i" },
- { "L2" , "l2" },
- { "Data-TLB" , "dtlb", "d-tlb" },
- { "Instruction-TLB" , "itlb", "i-tlb" },
- { "Branch" , "bpu" , "btb", "bpc" },
+static char *hw_cache[][MAX_ALIASES] = {
+ { "L1-d$", "l1-d", "l1d", "L1-data", },
+ { "L1-i$", "l1-i", "l1i", "L1-instruction", },
+ { "LLC", "L2" },
+ { "dTLB", "d-tlb", "Data-TLB", },
+ { "iTLB", "i-tlb", "Instruction-TLB", },
+ { "branch", "branches", "bpu", "btb", "bpc", },
};
-static char *hw_cache_op [][MAX_ALIASES] = {
- { "Load" , "read" },
- { "Store" , "write" },
- { "Prefetch" , "speculative-read", "speculative-load" },
+static char *hw_cache_op[][MAX_ALIASES] = {
+ { "load", "loads", "read", },
+ { "store", "stores", "write", },
+ { "prefetch", "prefetches", "speculative-read", "speculative-load", },
};
-static char *hw_cache_result [][MAX_ALIASES] = {
- { "Reference" , "ops", "access" },
- { "Miss" },
+static char *hw_cache_result[][MAX_ALIASES] = {
+ { "refs", "Reference", "ops", "access", },
+ { "misses", "miss", },
};
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define CACHE_READ (1 << C(OP_READ))
+#define CACHE_WRITE (1 << C(OP_WRITE))
+#define CACHE_PREFETCH (1 << C(OP_PREFETCH))
+#define COP(x) (1 << x)
+
+/*
+ * cache operartion stat
+ * L1I : Read and prefetch only
+ * ITLB and BPU : Read-only
+ */
+static unsigned long hw_cache_stat[C(MAX)] = {
+ [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(L1I)] = (CACHE_READ | CACHE_PREFETCH),
+ [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(ITLB)] = (CACHE_READ),
+ [C(BPU)] = (CACHE_READ),
+};
+
+static int is_cache_op_valid(u8 cache_type, u8 cache_op)
+{
+ if (hw_cache_stat[cache_type] & COP(cache_op))
+ return 1; /* valid */
+ else
+ return 0; /* invalid */
+}
+
+static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result)
+{
+ static char name[50];
+
+ if (cache_result) {
+ sprintf(name, "%s-%s-%s", hw_cache[cache_type][0],
+ hw_cache_op[cache_op][0],
+ hw_cache_result[cache_result][0]);
+ } else {
+ sprintf(name, "%s-%s", hw_cache[cache_type][0],
+ hw_cache_op[cache_op][1]);
+ }
+
+ return name;
+}
+
char *event_name(int counter)
{
- __u64 config = attrs[counter].config;
+ u64 config = attrs[counter].config;
int type = attrs[counter].type;
static char buf[32];
@@ -112,8 +152,7 @@ char *event_name(int counter)
return "unknown-hardware";
case PERF_TYPE_HW_CACHE: {
- __u8 cache_type, cache_op, cache_result;
- static char name[100];
+ u8 cache_type, cache_op, cache_result;
cache_type = (config >> 0) & 0xff;
if (cache_type > PERF_COUNT_HW_CACHE_MAX)
@@ -127,12 +166,10 @@ char *event_name(int counter)
if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
return "unknown-ext-hardware-cache-result";
- sprintf(name, "%s-Cache-%s-%ses",
- hw_cache[cache_type][0],
- hw_cache_op[cache_op][0],
- hw_cache_result[cache_result][0]);
+ if (!is_cache_op_valid(cache_type, cache_op))
+ return "invalid-cache";
- return name;
+ return event_cache_name(cache_type, cache_op, cache_result);
}
case PERF_TYPE_SOFTWARE:
@@ -163,7 +200,8 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size)
return -1;
}
-static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
+static int
+parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
{
int cache_type = -1, cache_op = 0, cache_result = 0;
@@ -182,6 +220,9 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a
if (cache_op == -1)
cache_op = PERF_COUNT_HW_CACHE_OP_READ;
+ if (!is_cache_op_valid(cache_type, cache_op))
+ return -EINVAL;
+
cache_result = parse_aliases(str, hw_cache_result,
PERF_COUNT_HW_CACHE_RESULT_MAX);
/*
@@ -196,13 +237,26 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a
return 0;
}
+static int check_events(const char *str, unsigned int i)
+{
+ if (!strncmp(str, event_symbols[i].symbol,
+ strlen(event_symbols[i].symbol)))
+ return 1;
+
+ if (strlen(event_symbols[i].alias))
+ if (!strncmp(str, event_symbols[i].alias,
+ strlen(event_symbols[i].alias)))
+ return 1;
+ return 0;
+}
+
/*
* Each event can have multiple symbolic names.
* Symbolic names are (almost) exactly matched.
*/
static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
{
- __u64 config, id;
+ u64 config, id;
int type;
unsigned int i;
const char *sep, *pstr;
@@ -235,9 +289,7 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
}
for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
- if (!strncmp(str, event_symbols[i].symbol,
- strlen(event_symbols[i].symbol))) {
-
+ if (check_events(str, i)) {
attr->type = event_symbols[i].type;
attr->config = event_symbols[i].config;
@@ -289,6 +341,7 @@ void print_events(void)
{
struct event_symbol *syms = event_symbols;
unsigned int i, type, prev_type = -1;
+ char name[40];
fprintf(stderr, "\n");
fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
@@ -301,14 +354,18 @@ void print_events(void)
if (type != prev_type)
fprintf(stderr, "\n");
- fprintf(stderr, " %-30s [%s]\n", syms->symbol,
+ if (strlen(syms->alias))
+ sprintf(name, "%s OR %s", syms->symbol, syms->alias);
+ else
+ strcpy(name, syms->symbol);
+ fprintf(stderr, " %-40s [%s]\n", name,
event_type_descriptors[type]);
prev_type = type;
}
fprintf(stderr, "\n");
- fprintf(stderr, " %-30s [raw hardware event descriptor]\n",
+ fprintf(stderr, " %-40s [raw hardware event descriptor]\n",
"rNNN");
fprintf(stderr, "\n");
diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c
index b2f5e854f40a..a3935343091a 100644
--- a/tools/perf/util/run-command.c
+++ b/tools/perf/util/run-command.c
@@ -65,7 +65,6 @@ int start_command(struct child_process *cmd)
cmd->err = fderr[0];
}
-#ifndef __MINGW32__
fflush(NULL);
cmd->pid = fork();
if (!cmd->pid) {
@@ -118,71 +117,6 @@ int start_command(struct child_process *cmd)
}
exit(127);
}
-#else
- int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */
- const char **sargv = cmd->argv;
- char **env = environ;
-
- if (cmd->no_stdin) {
- s0 = dup(0);
- dup_devnull(0);
- } else if (need_in) {
- s0 = dup(0);
- dup2(fdin[0], 0);
- } else if (cmd->in) {
- s0 = dup(0);
- dup2(cmd->in, 0);
- }
-
- if (cmd->no_stderr) {
- s2 = dup(2);
- dup_devnull(2);
- } else if (need_err) {
- s2 = dup(2);
- dup2(fderr[1], 2);
- }
-
- if (cmd->no_stdout) {
- s1 = dup(1);
- dup_devnull(1);
- } else if (cmd->stdout_to_stderr) {
- s1 = dup(1);
- dup2(2, 1);
- } else if (need_out) {
- s1 = dup(1);
- dup2(fdout[1], 1);
- } else if (cmd->out > 1) {
- s1 = dup(1);
- dup2(cmd->out, 1);
- }
-
- if (cmd->dir)
- die("chdir in start_command() not implemented");
- if (cmd->env) {
- env = copy_environ();
- for (; *cmd->env; cmd->env++)
- env = env_setenv(env, *cmd->env);
- }
-
- if (cmd->perf_cmd) {
- cmd->argv = prepare_perf_cmd(cmd->argv);
- }
-
- cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env);
-
- if (cmd->env)
- free_environ(env);
- if (cmd->perf_cmd)
- free(cmd->argv);
-
- cmd->argv = sargv;
- if (s0 >= 0)
- dup2(s0, 0), close(s0);
- if (s1 >= 0)
- dup2(s1, 1), close(s1);
- if (s2 >= 0)
- dup2(s2, 2), close(s2);
-#endif
if (cmd->pid < 0) {
int err = errno;
@@ -288,14 +222,6 @@ int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const
return run_command(&cmd);
}
-#ifdef __MINGW32__
-static __stdcall unsigned run_thread(void *data)
-{
- struct async *async = data;
- return async->proc(async->fd_for_proc, async->data);
-}
-#endif
-
int start_async(struct async *async)
{
int pipe_out[2];
@@ -304,7 +230,6 @@ int start_async(struct async *async)
return error("cannot create pipe: %s", strerror(errno));
async->out = pipe_out[0];
-#ifndef __MINGW32__
/* Flush stdio before fork() to avoid cloning buffers */
fflush(NULL);
@@ -319,33 +244,17 @@ int start_async(struct async *async)
exit(!!async->proc(pipe_out[1], async->data));
}
close(pipe_out[1]);
-#else
- async->fd_for_proc = pipe_out[1];
- async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL);
- if (!async->tid) {
- error("cannot create thread: %s", strerror(errno));
- close_pair(pipe_out);
- return -1;
- }
-#endif
+
return 0;
}
int finish_async(struct async *async)
{
-#ifndef __MINGW32__
int ret = 0;
if (wait_or_whine(async->pid))
ret = error("waitpid (async) failed");
-#else
- DWORD ret = 0;
- if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0)
- ret = error("waiting for thread failed: %lu", GetLastError());
- else if (!GetExitCodeThread(async->tid, &ret))
- ret = error("cannot get thread exit code: %lu", GetLastError());
- CloseHandle(async->tid);
-#endif
+
return ret;
}
diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h
index 328289f23669..cc1837deba88 100644
--- a/tools/perf/util/run-command.h
+++ b/tools/perf/util/run-command.h
@@ -79,12 +79,7 @@ struct async {
int (*proc)(int fd, void *data);
void *data;
int out; /* caller reads from here and closes it */
-#ifndef __MINGW32__
pid_t pid;
-#else
- HANDLE tid;
- int fd_for_proc;
-#endif
};
int start_async(struct async *async);
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index eaba09306802..464e7ca898cf 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -259,7 +259,7 @@ size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
res = fread(sb->buf + sb->len, 1, size, f);
if (res > 0)
strbuf_setlen(sb, sb->len + res);
- else if (res < 0 && oldalloc == 0)
+ else if (oldalloc == 0)
strbuf_release(sb);
return res;
}
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index ec33c0c7f4e2..c93eca9a7be3 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -15,7 +15,7 @@ static int hex(char ch)
* While we find nice hex chars, build a long_val.
* Return number of chars processed.
*/
-int hex2u64(const char *ptr, __u64 *long_val)
+int hex2u64(const char *ptr, u64 *long_val)
{
const char *p = ptr;
*long_val = 0;
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h
index 72812c1c9a7a..3dca2f654cd0 100644
--- a/tools/perf/util/string.h
+++ b/tools/perf/util/string.h
@@ -1,8 +1,8 @@
#ifndef _PERF_STRING_H_
#define _PERF_STRING_H_
-#include <linux/types.h>
+#include "types.h"
-int hex2u64(const char *ptr, __u64 *val);
+int hex2u64(const char *ptr, u64 *val);
#endif
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
new file mode 100644
index 000000000000..025a78edfffe
--- /dev/null
+++ b/tools/perf/util/strlist.c
@@ -0,0 +1,184 @@
+/*
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Licensed under the GPLv2.
+ */
+
+#include "strlist.h"
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static struct str_node *str_node__new(const char *s, bool dupstr)
+{
+ struct str_node *self = malloc(sizeof(*self));
+
+ if (self != NULL) {
+ if (dupstr) {
+ s = strdup(s);
+ if (s == NULL)
+ goto out_delete;
+ }
+ self->s = s;
+ }
+
+ return self;
+
+out_delete:
+ free(self);
+ return NULL;
+}
+
+static void str_node__delete(struct str_node *self, bool dupstr)
+{
+ if (dupstr)
+ free((void *)self->s);
+ free(self);
+}
+
+int strlist__add(struct strlist *self, const char *new_entry)
+{
+ struct rb_node **p = &self->entries.rb_node;
+ struct rb_node *parent = NULL;
+ struct str_node *sn;
+
+ while (*p != NULL) {
+ int rc;
+
+ parent = *p;
+ sn = rb_entry(parent, struct str_node, rb_node);
+ rc = strcmp(sn->s, new_entry);
+
+ if (rc > 0)
+ p = &(*p)->rb_left;
+ else if (rc < 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ sn = str_node__new(new_entry, self->dupstr);
+ if (sn == NULL)
+ return -ENOMEM;
+
+ rb_link_node(&sn->rb_node, parent, p);
+ rb_insert_color(&sn->rb_node, &self->entries);
+
+ return 0;
+}
+
+int strlist__load(struct strlist *self, const char *filename)
+{
+ char entry[1024];
+ int err;
+ FILE *fp = fopen(filename, "r");
+
+ if (fp == NULL)
+ return errno;
+
+ while (fgets(entry, sizeof(entry), fp) != NULL) {
+ const size_t len = strlen(entry);
+
+ if (len == 0)
+ continue;
+ entry[len - 1] = '\0';
+
+ err = strlist__add(self, entry);
+ if (err != 0)
+ goto out;
+ }
+
+ err = 0;
+out:
+ fclose(fp);
+ return err;
+}
+
+void strlist__remove(struct strlist *self, struct str_node *sn)
+{
+ rb_erase(&sn->rb_node, &self->entries);
+ str_node__delete(sn, self->dupstr);
+}
+
+bool strlist__has_entry(struct strlist *self, const char *entry)
+{
+ struct rb_node **p = &self->entries.rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p != NULL) {
+ struct str_node *sn;
+ int rc;
+
+ parent = *p;
+ sn = rb_entry(parent, struct str_node, rb_node);
+ rc = strcmp(sn->s, entry);
+
+ if (rc > 0)
+ p = &(*p)->rb_left;
+ else if (rc < 0)
+ p = &(*p)->rb_right;
+ else
+ return true;
+ }
+
+ return false;
+}
+
+static int strlist__parse_list_entry(struct strlist *self, const char *s)
+{
+ if (strncmp(s, "file://", 7) == 0)
+ return strlist__load(self, s + 7);
+
+ return strlist__add(self, s);
+}
+
+int strlist__parse_list(struct strlist *self, const char *s)
+{
+ char *sep;
+ int err;
+
+ while ((sep = strchr(s, ',')) != NULL) {
+ *sep = '\0';
+ err = strlist__parse_list_entry(self, s);
+ *sep = ',';
+ if (err != 0)
+ return err;
+ s = sep + 1;
+ }
+
+ return *s ? strlist__parse_list_entry(self, s) : 0;
+}
+
+struct strlist *strlist__new(bool dupstr, const char *slist)
+{
+ struct strlist *self = malloc(sizeof(*self));
+
+ if (self != NULL) {
+ self->entries = RB_ROOT;
+ self->dupstr = dupstr;
+ if (slist && strlist__parse_list(self, slist) != 0)
+ goto out_error;
+ }
+
+ return self;
+out_error:
+ free(self);
+ return NULL;
+}
+
+void strlist__delete(struct strlist *self)
+{
+ if (self != NULL) {
+ struct str_node *pos;
+ struct rb_node *next = rb_first(&self->entries);
+
+ while (next) {
+ pos = rb_entry(next, struct str_node, rb_node);
+ next = rb_next(&pos->rb_node);
+ strlist__remove(self, pos);
+ }
+ self->entries = RB_ROOT;
+ free(self);
+ }
+}
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
new file mode 100644
index 000000000000..2fb117fb4b67
--- /dev/null
+++ b/tools/perf/util/strlist.h
@@ -0,0 +1,32 @@
+#ifndef STRLIST_H_
+#define STRLIST_H_
+
+#include "rbtree.h"
+#include <stdbool.h>
+
+struct str_node {
+ struct rb_node rb_node;
+ const char *s;
+};
+
+struct strlist {
+ struct rb_root entries;
+ bool dupstr;
+};
+
+struct strlist *strlist__new(bool dupstr, const char *slist);
+void strlist__delete(struct strlist *self);
+
+void strlist__remove(struct strlist *self, struct str_node *sn);
+int strlist__load(struct strlist *self, const char *filename);
+int strlist__add(struct strlist *self, const char *str);
+
+bool strlist__has_entry(struct strlist *self, const char *entry);
+
+static inline bool strlist__empty(const struct strlist *self)
+{
+ return rb_first(&self->entries) == NULL;
+}
+
+int strlist__parse_list(struct strlist *self, const char *s);
+#endif /* STRLIST_H_ */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 49a55f813712..78c2efde01b7 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -9,9 +9,9 @@
const char *sym_hist_filter;
-static struct symbol *symbol__new(__u64 start, __u64 len,
+static struct symbol *symbol__new(u64 start, u64 len,
const char *name, unsigned int priv_size,
- __u64 obj_start, int verbose)
+ u64 obj_start, int verbose)
{
size_t namelen = strlen(name) + 1;
struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen);
@@ -21,14 +21,14 @@ static struct symbol *symbol__new(__u64 start, __u64 len,
if (verbose >= 2)
printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n",
- (__u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start);
+ (u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start);
self->obj_start= obj_start;
self->hist = NULL;
self->hist_sum = 0;
if (sym_hist_filter && !strcmp(name, sym_hist_filter))
- self->hist = calloc(sizeof(__u64), len);
+ self->hist = calloc(sizeof(u64), len);
if (priv_size) {
memset(self, 0, priv_size);
@@ -89,7 +89,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym)
{
struct rb_node **p = &self->syms.rb_node;
struct rb_node *parent = NULL;
- const __u64 ip = sym->start;
+ const u64 ip = sym->start;
struct symbol *s;
while (*p != NULL) {
@@ -104,7 +104,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym)
rb_insert_color(&sym->rb_node, &self->syms);
}
-struct symbol *dso__find_symbol(struct dso *self, __u64 ip)
+struct symbol *dso__find_symbol(struct dso *self, u64 ip)
{
struct rb_node *n;
@@ -151,7 +151,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb
goto out_failure;
while (!feof(file)) {
- __u64 start;
+ u64 start;
struct symbol *sym;
int line_len, len;
char symbol_type;
@@ -232,7 +232,7 @@ static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verb
goto out_failure;
while (!feof(file)) {
- __u64 start, size;
+ u64 start, size;
struct symbol *sym;
int line_len, len;
@@ -353,7 +353,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
{
uint32_t nr_rel_entries, idx;
GElf_Sym sym;
- __u64 plt_offset;
+ u64 plt_offset;
GElf_Shdr shdr_plt;
struct symbol *f;
GElf_Shdr shdr_rel_plt;
@@ -520,10 +520,12 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
nr_syms = shdr.sh_size / shdr.sh_entsize;
memset(&sym, 0, sizeof(sym));
-
+ self->prelinked = elf_section_by_name(elf, &ehdr, &shdr,
+ ".gnu.prelink_undo",
+ NULL) != NULL;
elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
struct symbol *f;
- __u64 obj_start;
+ u64 obj_start;
if (!elf_sym__is_function(&sym))
continue;
@@ -535,7 +537,13 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
gelf_getshdr(sec, &shdr);
obj_start = sym.st_value;
- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+ if (self->prelinked) {
+ if (verbose >= 2)
+ printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n",
+ (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset);
+
+ sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+ }
f = symbol__new(sym.st_value, sym.st_size,
elf_sym__name(&sym, symstrs),
@@ -569,6 +577,8 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
if (!name)
return -1;
+ self->prelinked = 0;
+
if (strncmp(self->name, "/tmp/perf-", 10) == 0)
return dso__load_perf_map(self, filter, verbose);
@@ -629,7 +639,7 @@ int dso__load_kernel(struct dso *self, const char *vmlinux,
if (vmlinux)
err = dso__load_vmlinux(self, vmlinux, filter, verbose);
- if (err)
+ if (err < 0)
err = dso__load_kallsyms(self, filter, verbose);
return err;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0d1292bd8270..2c48ace8203b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -2,24 +2,27 @@
#define _PERF_SYMBOL_ 1
#include <linux/types.h>
+#include "types.h"
#include "list.h"
#include "rbtree.h"
struct symbol {
struct rb_node rb_node;
- __u64 start;
- __u64 end;
- __u64 obj_start;
- __u64 hist_sum;
- __u64 *hist;
+ u64 start;
+ u64 end;
+ u64 obj_start;
+ u64 hist_sum;
+ u64 *hist;
+ void *priv;
char name[0];
};
struct dso {
struct list_head node;
struct rb_root syms;
+ struct symbol *(*find_symbol)(struct dso *, u64 ip);
unsigned int sym_priv_size;
- struct symbol *(*find_symbol)(struct dso *, __u64 ip);
+ unsigned char prelinked;
char name[0];
};
@@ -35,7 +38,7 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
return ((void *)sym) - self->sym_priv_size;
}
-struct symbol *dso__find_symbol(struct dso *self, __u64 ip);
+struct symbol *dso__find_symbol(struct dso *self, u64 ip);
int dso__load_kernel(struct dso *self, const char *vmlinux,
symbol_filter_t filter, int verbose);
diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h
new file mode 100644
index 000000000000..5e75f9005940
--- /dev/null
+++ b/tools/perf/util/types.h
@@ -0,0 +1,17 @@
+#ifndef _PERF_TYPES_H
+#define _PERF_TYPES_H
+
+/*
+ * We define u64 as unsigned long long for every architecture
+ * so that we can print it with %Lx without getting warnings.
+ */
+typedef unsigned long long u64;
+typedef signed long long s64;
+typedef unsigned int u32;
+typedef signed int s32;
+typedef unsigned short u16;
+typedef signed short s16;
+typedef unsigned char u8;
+typedef signed char s8;
+
+#endif /* _PERF_TYPES_H */
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 76590a16c271..b4be6071c105 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -67,7 +67,6 @@
#include <assert.h>
#include <regex.h>
#include <utime.h>
-#ifndef __MINGW32__
#include <sys/wait.h>
#include <sys/poll.h>
#include <sys/socket.h>
@@ -81,30 +80,11 @@
#include <netdb.h>
#include <pwd.h>
#include <inttypes.h>
-#if defined(__CYGWIN__)
-#undef _XOPEN_SOURCE
-#include <grp.h>
-#define _XOPEN_SOURCE 600
-#include "compat/cygwin.h"
-#else
-#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
-#include <grp.h>
-#define _ALL_SOURCE 1
-#endif
-#else /* __MINGW32__ */
-/* pull in Windows compatibility stuff */
-#include "compat/mingw.h"
-#endif /* __MINGW32__ */
#ifndef NO_ICONV
#include <iconv.h>
#endif
-#ifndef NO_OPENSSL
-#include <openssl/ssl.h>
-#include <openssl/err.h>
-#endif
-
/* On most systems <limits.h> would have given us this, but
* not on some systems (e.g. GNU/Hurd).
*/
@@ -332,17 +312,20 @@ static inline int has_extension(const char *filename, const char *ext)
#undef tolower
#undef toupper
extern unsigned char sane_ctype[256];
-#define GIT_SPACE 0x01
-#define GIT_DIGIT 0x02
-#define GIT_ALPHA 0x04
-#define GIT_GLOB_SPECIAL 0x08
-#define GIT_REGEX_SPECIAL 0x10
+#define GIT_SPACE 0x01
+#define GIT_DIGIT 0x02
+#define GIT_ALPHA 0x04
+#define GIT_GLOB_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
+#define GIT_PRINT_EXTRA 0x20
+#define GIT_PRINT 0x3E
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
#define isascii(x) (((x) & ~0x7f) == 0)
#define isspace(x) sane_istest(x,GIT_SPACE)
#define isdigit(x) sane_istest(x,GIT_DIGIT)
#define isalpha(x) sane_istest(x,GIT_ALPHA)
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
+#define isprint(x) sane_istest(x,GIT_PRINT)
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
#define tolower(x) sane_case((unsigned char)(x), 0x20)