cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

builtin-kmem.c (46718B)


      1// SPDX-License-Identifier: GPL-2.0
      2#include "builtin.h"
      3#include "perf.h"
      4
      5#include "util/dso.h"
      6#include "util/evlist.h"
      7#include "util/evsel.h"
      8#include "util/config.h"
      9#include "util/map.h"
     10#include "util/symbol.h"
     11#include "util/thread.h"
     12#include "util/header.h"
     13#include "util/session.h"
     14#include "util/tool.h"
     15#include "util/callchain.h"
     16#include "util/time-utils.h"
     17#include <linux/err.h>
     18
     19#include <subcmd/pager.h>
     20#include <subcmd/parse-options.h>
     21#include "util/trace-event.h"
     22#include "util/data.h"
     23#include "util/cpumap.h"
     24
     25#include "util/debug.h"
     26#include "util/string2.h"
     27
     28#include <linux/kernel.h>
     29#include <linux/rbtree.h>
     30#include <linux/string.h>
     31#include <linux/zalloc.h>
     32#include <errno.h>
     33#include <inttypes.h>
     34#include <locale.h>
     35#include <regex.h>
     36
     37#include <linux/ctype.h>
     38
     39static int	kmem_slab;
     40static int	kmem_page;
     41
     42static long	kmem_page_size;
     43static enum {
     44	KMEM_SLAB,
     45	KMEM_PAGE,
     46} kmem_default = KMEM_SLAB;  /* for backward compatibility */
     47
     48struct alloc_stat;
     49typedef int (*sort_fn_t)(void *, void *);
     50
     51static int			alloc_flag;
     52static int			caller_flag;
     53
     54static int			alloc_lines = -1;
     55static int			caller_lines = -1;
     56
     57static bool			raw_ip;
     58
     59struct alloc_stat {
     60	u64	call_site;
     61	u64	ptr;
     62	u64	bytes_req;
     63	u64	bytes_alloc;
     64	u64	last_alloc;
     65	u32	hit;
     66	u32	pingpong;
     67
     68	short	alloc_cpu;
     69
     70	struct rb_node node;
     71};
     72
     73static struct rb_root root_alloc_stat;
     74static struct rb_root root_alloc_sorted;
     75static struct rb_root root_caller_stat;
     76static struct rb_root root_caller_sorted;
     77
     78static unsigned long total_requested, total_allocated, total_freed;
     79static unsigned long nr_allocs, nr_cross_allocs;
     80
     81/* filters for controlling start and stop of time of analysis */
     82static struct perf_time_interval ptime;
     83const char *time_str;
     84
     85static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
     86			     int bytes_req, int bytes_alloc, int cpu)
     87{
     88	struct rb_node **node = &root_alloc_stat.rb_node;
     89	struct rb_node *parent = NULL;
     90	struct alloc_stat *data = NULL;
     91
     92	while (*node) {
     93		parent = *node;
     94		data = rb_entry(*node, struct alloc_stat, node);
     95
     96		if (ptr > data->ptr)
     97			node = &(*node)->rb_right;
     98		else if (ptr < data->ptr)
     99			node = &(*node)->rb_left;
    100		else
    101			break;
    102	}
    103
    104	if (data && data->ptr == ptr) {
    105		data->hit++;
    106		data->bytes_req += bytes_req;
    107		data->bytes_alloc += bytes_alloc;
    108	} else {
    109		data = malloc(sizeof(*data));
    110		if (!data) {
    111			pr_err("%s: malloc failed\n", __func__);
    112			return -1;
    113		}
    114		data->ptr = ptr;
    115		data->pingpong = 0;
    116		data->hit = 1;
    117		data->bytes_req = bytes_req;
    118		data->bytes_alloc = bytes_alloc;
    119
    120		rb_link_node(&data->node, parent, node);
    121		rb_insert_color(&data->node, &root_alloc_stat);
    122	}
    123	data->call_site = call_site;
    124	data->alloc_cpu = cpu;
    125	data->last_alloc = bytes_alloc;
    126
    127	return 0;
    128}
    129
    130static int insert_caller_stat(unsigned long call_site,
    131			      int bytes_req, int bytes_alloc)
    132{
    133	struct rb_node **node = &root_caller_stat.rb_node;
    134	struct rb_node *parent = NULL;
    135	struct alloc_stat *data = NULL;
    136
    137	while (*node) {
    138		parent = *node;
    139		data = rb_entry(*node, struct alloc_stat, node);
    140
    141		if (call_site > data->call_site)
    142			node = &(*node)->rb_right;
    143		else if (call_site < data->call_site)
    144			node = &(*node)->rb_left;
    145		else
    146			break;
    147	}
    148
    149	if (data && data->call_site == call_site) {
    150		data->hit++;
    151		data->bytes_req += bytes_req;
    152		data->bytes_alloc += bytes_alloc;
    153	} else {
    154		data = malloc(sizeof(*data));
    155		if (!data) {
    156			pr_err("%s: malloc failed\n", __func__);
    157			return -1;
    158		}
    159		data->call_site = call_site;
    160		data->pingpong = 0;
    161		data->hit = 1;
    162		data->bytes_req = bytes_req;
    163		data->bytes_alloc = bytes_alloc;
    164
    165		rb_link_node(&data->node, parent, node);
    166		rb_insert_color(&data->node, &root_caller_stat);
    167	}
    168
    169	return 0;
    170}
    171
    172static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *sample)
    173{
    174	unsigned long ptr = evsel__intval(evsel, sample, "ptr"),
    175		      call_site = evsel__intval(evsel, sample, "call_site");
    176	int bytes_req = evsel__intval(evsel, sample, "bytes_req"),
    177	    bytes_alloc = evsel__intval(evsel, sample, "bytes_alloc");
    178
    179	if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
    180	    insert_caller_stat(call_site, bytes_req, bytes_alloc))
    181		return -1;
    182
    183	total_requested += bytes_req;
    184	total_allocated += bytes_alloc;
    185
    186	nr_allocs++;
    187	return 0;
    188}
    189
    190static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample)
    191{
    192	int ret = evsel__process_alloc_event(evsel, sample);
    193
    194	if (!ret) {
    195		int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}),
    196		    node2 = evsel__intval(evsel, sample, "node");
    197
    198		if (node1 != node2)
    199			nr_cross_allocs++;
    200	}
    201
    202	return ret;
    203}
    204
    205static int ptr_cmp(void *, void *);
    206static int slab_callsite_cmp(void *, void *);
    207
    208static struct alloc_stat *search_alloc_stat(unsigned long ptr,
    209					    unsigned long call_site,
    210					    struct rb_root *root,
    211					    sort_fn_t sort_fn)
    212{
    213	struct rb_node *node = root->rb_node;
    214	struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
    215
    216	while (node) {
    217		struct alloc_stat *data;
    218		int cmp;
    219
    220		data = rb_entry(node, struct alloc_stat, node);
    221
    222		cmp = sort_fn(&key, data);
    223		if (cmp < 0)
    224			node = node->rb_left;
    225		else if (cmp > 0)
    226			node = node->rb_right;
    227		else
    228			return data;
    229	}
    230	return NULL;
    231}
    232
    233static int evsel__process_free_event(struct evsel *evsel, struct perf_sample *sample)
    234{
    235	unsigned long ptr = evsel__intval(evsel, sample, "ptr");
    236	struct alloc_stat *s_alloc, *s_caller;
    237
    238	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
    239	if (!s_alloc)
    240		return 0;
    241
    242	total_freed += s_alloc->last_alloc;
    243
    244	if ((short)sample->cpu != s_alloc->alloc_cpu) {
    245		s_alloc->pingpong++;
    246
    247		s_caller = search_alloc_stat(0, s_alloc->call_site,
    248					     &root_caller_stat,
    249					     slab_callsite_cmp);
    250		if (!s_caller)
    251			return -1;
    252		s_caller->pingpong++;
    253	}
    254	s_alloc->alloc_cpu = -1;
    255
    256	return 0;
    257}
    258
    259static u64 total_page_alloc_bytes;
    260static u64 total_page_free_bytes;
    261static u64 total_page_nomatch_bytes;
    262static u64 total_page_fail_bytes;
    263static unsigned long nr_page_allocs;
    264static unsigned long nr_page_frees;
    265static unsigned long nr_page_fails;
    266static unsigned long nr_page_nomatch;
    267
    268static bool use_pfn;
    269static bool live_page;
    270static struct perf_session *kmem_session;
    271
    272#define MAX_MIGRATE_TYPES  6
    273#define MAX_PAGE_ORDER     11
    274
    275static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
    276
    277struct page_stat {
    278	struct rb_node 	node;
    279	u64 		page;
    280	u64 		callsite;
    281	int 		order;
    282	unsigned 	gfp_flags;
    283	unsigned 	migrate_type;
    284	u64		alloc_bytes;
    285	u64 		free_bytes;
    286	int 		nr_alloc;
    287	int 		nr_free;
    288};
    289
    290static struct rb_root page_live_tree;
    291static struct rb_root page_alloc_tree;
    292static struct rb_root page_alloc_sorted;
    293static struct rb_root page_caller_tree;
    294static struct rb_root page_caller_sorted;
    295
    296struct alloc_func {
    297	u64 start;
    298	u64 end;
    299	char *name;
    300};
    301
    302static int nr_alloc_funcs;
    303static struct alloc_func *alloc_func_list;
    304
    305static int funcmp(const void *a, const void *b)
    306{
    307	const struct alloc_func *fa = a;
    308	const struct alloc_func *fb = b;
    309
    310	if (fa->start > fb->start)
    311		return 1;
    312	else
    313		return -1;
    314}
    315
    316static int callcmp(const void *a, const void *b)
    317{
    318	const struct alloc_func *fa = a;
    319	const struct alloc_func *fb = b;
    320
    321	if (fb->start <= fa->start && fa->end < fb->end)
    322		return 0;
    323
    324	if (fa->start > fb->start)
    325		return 1;
    326	else
    327		return -1;
    328}
    329
    330static int build_alloc_func_list(void)
    331{
    332	int ret;
    333	struct map *kernel_map;
    334	struct symbol *sym;
    335	struct rb_node *node;
    336	struct alloc_func *func;
    337	struct machine *machine = &kmem_session->machines.host;
    338	regex_t alloc_func_regex;
    339	static const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
    340
    341	ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
    342	if (ret) {
    343		char err[BUFSIZ];
    344
    345		regerror(ret, &alloc_func_regex, err, sizeof(err));
    346		pr_err("Invalid regex: %s\n%s", pattern, err);
    347		return -EINVAL;
    348	}
    349
    350	kernel_map = machine__kernel_map(machine);
    351	if (map__load(kernel_map) < 0) {
    352		pr_err("cannot load kernel map\n");
    353		return -ENOENT;
    354	}
    355
    356	map__for_each_symbol(kernel_map, sym, node) {
    357		if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
    358			continue;
    359
    360		func = realloc(alloc_func_list,
    361			       (nr_alloc_funcs + 1) * sizeof(*func));
    362		if (func == NULL)
    363			return -ENOMEM;
    364
    365		pr_debug("alloc func: %s\n", sym->name);
    366		func[nr_alloc_funcs].start = sym->start;
    367		func[nr_alloc_funcs].end   = sym->end;
    368		func[nr_alloc_funcs].name  = sym->name;
    369
    370		alloc_func_list = func;
    371		nr_alloc_funcs++;
    372	}
    373
    374	qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
    375
    376	regfree(&alloc_func_regex);
    377	return 0;
    378}
    379
    380/*
    381 * Find first non-memory allocation function from callchain.
    382 * The allocation functions are in the 'alloc_func_list'.
    383 */
    384static u64 find_callsite(struct evsel *evsel, struct perf_sample *sample)
    385{
    386	struct addr_location al;
    387	struct machine *machine = &kmem_session->machines.host;
    388	struct callchain_cursor_node *node;
    389
    390	if (alloc_func_list == NULL) {
    391		if (build_alloc_func_list() < 0)
    392			goto out;
    393	}
    394
    395	al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
    396	sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
    397
    398	callchain_cursor_commit(&callchain_cursor);
    399	while (true) {
    400		struct alloc_func key, *caller;
    401		u64 addr;
    402
    403		node = callchain_cursor_current(&callchain_cursor);
    404		if (node == NULL)
    405			break;
    406
    407		key.start = key.end = node->ip;
    408		caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
    409				 sizeof(key), callcmp);
    410		if (!caller) {
    411			/* found */
    412			if (node->ms.map)
    413				addr = map__unmap_ip(node->ms.map, node->ip);
    414			else
    415				addr = node->ip;
    416
    417			return addr;
    418		} else
    419			pr_debug3("skipping alloc function: %s\n", caller->name);
    420
    421		callchain_cursor_advance(&callchain_cursor);
    422	}
    423
    424out:
    425	pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
    426	return sample->ip;
    427}
    428
    429struct sort_dimension {
    430	const char		name[20];
    431	sort_fn_t		cmp;
    432	struct list_head	list;
    433};
    434
    435static LIST_HEAD(page_alloc_sort_input);
    436static LIST_HEAD(page_caller_sort_input);
    437
    438static struct page_stat *
    439__page_stat__findnew_page(struct page_stat *pstat, bool create)
    440{
    441	struct rb_node **node = &page_live_tree.rb_node;
    442	struct rb_node *parent = NULL;
    443	struct page_stat *data;
    444
    445	while (*node) {
    446		s64 cmp;
    447
    448		parent = *node;
    449		data = rb_entry(*node, struct page_stat, node);
    450
    451		cmp = data->page - pstat->page;
    452		if (cmp < 0)
    453			node = &parent->rb_left;
    454		else if (cmp > 0)
    455			node = &parent->rb_right;
    456		else
    457			return data;
    458	}
    459
    460	if (!create)
    461		return NULL;
    462
    463	data = zalloc(sizeof(*data));
    464	if (data != NULL) {
    465		data->page = pstat->page;
    466		data->order = pstat->order;
    467		data->gfp_flags = pstat->gfp_flags;
    468		data->migrate_type = pstat->migrate_type;
    469
    470		rb_link_node(&data->node, parent, node);
    471		rb_insert_color(&data->node, &page_live_tree);
    472	}
    473
    474	return data;
    475}
    476
    477static struct page_stat *page_stat__find_page(struct page_stat *pstat)
    478{
    479	return __page_stat__findnew_page(pstat, false);
    480}
    481
    482static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
    483{
    484	return __page_stat__findnew_page(pstat, true);
    485}
    486
    487static struct page_stat *
    488__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
    489{
    490	struct rb_node **node = &page_alloc_tree.rb_node;
    491	struct rb_node *parent = NULL;
    492	struct page_stat *data;
    493	struct sort_dimension *sort;
    494
    495	while (*node) {
    496		int cmp = 0;
    497
    498		parent = *node;
    499		data = rb_entry(*node, struct page_stat, node);
    500
    501		list_for_each_entry(sort, &page_alloc_sort_input, list) {
    502			cmp = sort->cmp(pstat, data);
    503			if (cmp)
    504				break;
    505		}
    506
    507		if (cmp < 0)
    508			node = &parent->rb_left;
    509		else if (cmp > 0)
    510			node = &parent->rb_right;
    511		else
    512			return data;
    513	}
    514
    515	if (!create)
    516		return NULL;
    517
    518	data = zalloc(sizeof(*data));
    519	if (data != NULL) {
    520		data->page = pstat->page;
    521		data->order = pstat->order;
    522		data->gfp_flags = pstat->gfp_flags;
    523		data->migrate_type = pstat->migrate_type;
    524
    525		rb_link_node(&data->node, parent, node);
    526		rb_insert_color(&data->node, &page_alloc_tree);
    527	}
    528
    529	return data;
    530}
    531
    532static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
    533{
    534	return __page_stat__findnew_alloc(pstat, false);
    535}
    536
    537static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
    538{
    539	return __page_stat__findnew_alloc(pstat, true);
    540}
    541
    542static struct page_stat *
    543__page_stat__findnew_caller(struct page_stat *pstat, bool create)
    544{
    545	struct rb_node **node = &page_caller_tree.rb_node;
    546	struct rb_node *parent = NULL;
    547	struct page_stat *data;
    548	struct sort_dimension *sort;
    549
    550	while (*node) {
    551		int cmp = 0;
    552
    553		parent = *node;
    554		data = rb_entry(*node, struct page_stat, node);
    555
    556		list_for_each_entry(sort, &page_caller_sort_input, list) {
    557			cmp = sort->cmp(pstat, data);
    558			if (cmp)
    559				break;
    560		}
    561
    562		if (cmp < 0)
    563			node = &parent->rb_left;
    564		else if (cmp > 0)
    565			node = &parent->rb_right;
    566		else
    567			return data;
    568	}
    569
    570	if (!create)
    571		return NULL;
    572
    573	data = zalloc(sizeof(*data));
    574	if (data != NULL) {
    575		data->callsite = pstat->callsite;
    576		data->order = pstat->order;
    577		data->gfp_flags = pstat->gfp_flags;
    578		data->migrate_type = pstat->migrate_type;
    579
    580		rb_link_node(&data->node, parent, node);
    581		rb_insert_color(&data->node, &page_caller_tree);
    582	}
    583
    584	return data;
    585}
    586
    587static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
    588{
    589	return __page_stat__findnew_caller(pstat, false);
    590}
    591
    592static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
    593{
    594	return __page_stat__findnew_caller(pstat, true);
    595}
    596
    597static bool valid_page(u64 pfn_or_page)
    598{
    599	if (use_pfn && pfn_or_page == -1UL)
    600		return false;
    601	if (!use_pfn && pfn_or_page == 0)
    602		return false;
    603	return true;
    604}
    605
    606struct gfp_flag {
    607	unsigned int flags;
    608	char *compact_str;
    609	char *human_readable;
    610};
    611
    612static struct gfp_flag *gfps;
    613static int nr_gfps;
    614
    615static int gfpcmp(const void *a, const void *b)
    616{
    617	const struct gfp_flag *fa = a;
    618	const struct gfp_flag *fb = b;
    619
    620	return fa->flags - fb->flags;
    621}
    622
    623/* see include/trace/events/mmflags.h */
    624static const struct {
    625	const char *original;
    626	const char *compact;
    627} gfp_compact_table[] = {
    628	{ "GFP_TRANSHUGE",		"THP" },
    629	{ "GFP_TRANSHUGE_LIGHT",	"THL" },
    630	{ "GFP_HIGHUSER_MOVABLE",	"HUM" },
    631	{ "GFP_HIGHUSER",		"HU" },
    632	{ "GFP_USER",			"U" },
    633	{ "GFP_KERNEL_ACCOUNT",		"KAC" },
    634	{ "GFP_KERNEL",			"K" },
    635	{ "GFP_NOFS",			"NF" },
    636	{ "GFP_ATOMIC",			"A" },
    637	{ "GFP_NOIO",			"NI" },
    638	{ "GFP_NOWAIT",			"NW" },
    639	{ "GFP_DMA",			"D" },
    640	{ "__GFP_HIGHMEM",		"HM" },
    641	{ "GFP_DMA32",			"D32" },
    642	{ "__GFP_HIGH",			"H" },
    643	{ "__GFP_ATOMIC",		"_A" },
    644	{ "__GFP_IO",			"I" },
    645	{ "__GFP_FS",			"F" },
    646	{ "__GFP_NOWARN",		"NWR" },
    647	{ "__GFP_RETRY_MAYFAIL",	"R" },
    648	{ "__GFP_NOFAIL",		"NF" },
    649	{ "__GFP_NORETRY",		"NR" },
    650	{ "__GFP_COMP",			"C" },
    651	{ "__GFP_ZERO",			"Z" },
    652	{ "__GFP_NOMEMALLOC",		"NMA" },
    653	{ "__GFP_MEMALLOC",		"MA" },
    654	{ "__GFP_HARDWALL",		"HW" },
    655	{ "__GFP_THISNODE",		"TN" },
    656	{ "__GFP_RECLAIMABLE",		"RC" },
    657	{ "__GFP_MOVABLE",		"M" },
    658	{ "__GFP_ACCOUNT",		"AC" },
    659	{ "__GFP_WRITE",		"WR" },
    660	{ "__GFP_RECLAIM",		"R" },
    661	{ "__GFP_DIRECT_RECLAIM",	"DR" },
    662	{ "__GFP_KSWAPD_RECLAIM",	"KR" },
    663};
    664
    665static size_t max_gfp_len;
    666
    667static char *compact_gfp_flags(char *gfp_flags)
    668{
    669	char *orig_flags = strdup(gfp_flags);
    670	char *new_flags = NULL;
    671	char *str, *pos = NULL;
    672	size_t len = 0;
    673
    674	if (orig_flags == NULL)
    675		return NULL;
    676
    677	str = strtok_r(orig_flags, "|", &pos);
    678	while (str) {
    679		size_t i;
    680		char *new;
    681		const char *cpt;
    682
    683		for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
    684			if (strcmp(gfp_compact_table[i].original, str))
    685				continue;
    686
    687			cpt = gfp_compact_table[i].compact;
    688			new = realloc(new_flags, len + strlen(cpt) + 2);
    689			if (new == NULL) {
    690				free(new_flags);
    691				free(orig_flags);
    692				return NULL;
    693			}
    694
    695			new_flags = new;
    696
    697			if (!len) {
    698				strcpy(new_flags, cpt);
    699			} else {
    700				strcat(new_flags, "|");
    701				strcat(new_flags, cpt);
    702				len++;
    703			}
    704
    705			len += strlen(cpt);
    706		}
    707
    708		str = strtok_r(NULL, "|", &pos);
    709	}
    710
    711	if (max_gfp_len < len)
    712		max_gfp_len = len;
    713
    714	free(orig_flags);
    715	return new_flags;
    716}
    717
    718static char *compact_gfp_string(unsigned long gfp_flags)
    719{
    720	struct gfp_flag key = {
    721		.flags = gfp_flags,
    722	};
    723	struct gfp_flag *gfp;
    724
    725	gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
    726	if (gfp)
    727		return gfp->compact_str;
    728
    729	return NULL;
    730}
    731
    732static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
    733			   unsigned int gfp_flags)
    734{
    735	struct tep_record record = {
    736		.cpu = sample->cpu,
    737		.data = sample->raw_data,
    738		.size = sample->raw_size,
    739	};
    740	struct trace_seq seq;
    741	char *str, *pos = NULL;
    742
    743	if (nr_gfps) {
    744		struct gfp_flag key = {
    745			.flags = gfp_flags,
    746		};
    747
    748		if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
    749			return 0;
    750	}
    751
    752	trace_seq_init(&seq);
    753	tep_print_event(evsel->tp_format->tep,
    754			&seq, &record, "%s", TEP_PRINT_INFO);
    755
    756	str = strtok_r(seq.buffer, " ", &pos);
    757	while (str) {
    758		if (!strncmp(str, "gfp_flags=", 10)) {
    759			struct gfp_flag *new;
    760
    761			new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
    762			if (new == NULL)
    763				return -ENOMEM;
    764
    765			gfps = new;
    766			new += nr_gfps++;
    767
    768			new->flags = gfp_flags;
    769			new->human_readable = strdup(str + 10);
    770			new->compact_str = compact_gfp_flags(str + 10);
    771			if (!new->human_readable || !new->compact_str)
    772				return -ENOMEM;
    773
    774			qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
    775		}
    776
    777		str = strtok_r(NULL, " ", &pos);
    778	}
    779
    780	trace_seq_destroy(&seq);
    781	return 0;
    782}
    783
    784static int evsel__process_page_alloc_event(struct evsel *evsel, struct perf_sample *sample)
    785{
    786	u64 page;
    787	unsigned int order = evsel__intval(evsel, sample, "order");
    788	unsigned int gfp_flags = evsel__intval(evsel, sample, "gfp_flags");
    789	unsigned int migrate_type = evsel__intval(evsel, sample,
    790						       "migratetype");
    791	u64 bytes = kmem_page_size << order;
    792	u64 callsite;
    793	struct page_stat *pstat;
    794	struct page_stat this = {
    795		.order = order,
    796		.gfp_flags = gfp_flags,
    797		.migrate_type = migrate_type,
    798	};
    799
    800	if (use_pfn)
    801		page = evsel__intval(evsel, sample, "pfn");
    802	else
    803		page = evsel__intval(evsel, sample, "page");
    804
    805	nr_page_allocs++;
    806	total_page_alloc_bytes += bytes;
    807
    808	if (!valid_page(page)) {
    809		nr_page_fails++;
    810		total_page_fail_bytes += bytes;
    811
    812		return 0;
    813	}
    814
    815	if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
    816		return -1;
    817
    818	callsite = find_callsite(evsel, sample);
    819
    820	/*
    821	 * This is to find the current page (with correct gfp flags and
    822	 * migrate type) at free event.
    823	 */
    824	this.page = page;
    825	pstat = page_stat__findnew_page(&this);
    826	if (pstat == NULL)
    827		return -ENOMEM;
    828
    829	pstat->nr_alloc++;
    830	pstat->alloc_bytes += bytes;
    831	pstat->callsite = callsite;
    832
    833	if (!live_page) {
    834		pstat = page_stat__findnew_alloc(&this);
    835		if (pstat == NULL)
    836			return -ENOMEM;
    837
    838		pstat->nr_alloc++;
    839		pstat->alloc_bytes += bytes;
    840		pstat->callsite = callsite;
    841	}
    842
    843	this.callsite = callsite;
    844	pstat = page_stat__findnew_caller(&this);
    845	if (pstat == NULL)
    846		return -ENOMEM;
    847
    848	pstat->nr_alloc++;
    849	pstat->alloc_bytes += bytes;
    850
    851	order_stats[order][migrate_type]++;
    852
    853	return 0;
    854}
    855
    856static int evsel__process_page_free_event(struct evsel *evsel, struct perf_sample *sample)
    857{
    858	u64 page;
    859	unsigned int order = evsel__intval(evsel, sample, "order");
    860	u64 bytes = kmem_page_size << order;
    861	struct page_stat *pstat;
    862	struct page_stat this = {
    863		.order = order,
    864	};
    865
    866	if (use_pfn)
    867		page = evsel__intval(evsel, sample, "pfn");
    868	else
    869		page = evsel__intval(evsel, sample, "page");
    870
    871	nr_page_frees++;
    872	total_page_free_bytes += bytes;
    873
    874	this.page = page;
    875	pstat = page_stat__find_page(&this);
    876	if (pstat == NULL) {
    877		pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
    878			  page, order);
    879
    880		nr_page_nomatch++;
    881		total_page_nomatch_bytes += bytes;
    882
    883		return 0;
    884	}
    885
    886	this.gfp_flags = pstat->gfp_flags;
    887	this.migrate_type = pstat->migrate_type;
    888	this.callsite = pstat->callsite;
    889
    890	rb_erase(&pstat->node, &page_live_tree);
    891	free(pstat);
    892
    893	if (live_page) {
    894		order_stats[this.order][this.migrate_type]--;
    895	} else {
    896		pstat = page_stat__find_alloc(&this);
    897		if (pstat == NULL)
    898			return -ENOMEM;
    899
    900		pstat->nr_free++;
    901		pstat->free_bytes += bytes;
    902	}
    903
    904	pstat = page_stat__find_caller(&this);
    905	if (pstat == NULL)
    906		return -ENOENT;
    907
    908	pstat->nr_free++;
    909	pstat->free_bytes += bytes;
    910
    911	if (live_page) {
    912		pstat->nr_alloc--;
    913		pstat->alloc_bytes -= bytes;
    914
    915		if (pstat->nr_alloc == 0) {
    916			rb_erase(&pstat->node, &page_caller_tree);
    917			free(pstat);
    918		}
    919	}
    920
    921	return 0;
    922}
    923
    924static bool perf_kmem__skip_sample(struct perf_sample *sample)
    925{
    926	/* skip sample based on time? */
    927	if (perf_time__skip_sample(&ptime, sample->time))
    928		return true;
    929
    930	return false;
    931}
    932
    933typedef int (*tracepoint_handler)(struct evsel *evsel,
    934				  struct perf_sample *sample);
    935
    936static int process_sample_event(struct perf_tool *tool __maybe_unused,
    937				union perf_event *event,
    938				struct perf_sample *sample,
    939				struct evsel *evsel,
    940				struct machine *machine)
    941{
    942	int err = 0;
    943	struct thread *thread = machine__findnew_thread(machine, sample->pid,
    944							sample->tid);
    945
    946	if (thread == NULL) {
    947		pr_debug("problem processing %d event, skipping it.\n",
    948			 event->header.type);
    949		return -1;
    950	}
    951
    952	if (perf_kmem__skip_sample(sample))
    953		return 0;
    954
    955	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
    956
    957	if (evsel->handler != NULL) {
    958		tracepoint_handler f = evsel->handler;
    959		err = f(evsel, sample);
    960	}
    961
    962	thread__put(thread);
    963
    964	return err;
    965}
    966
    967static struct perf_tool perf_kmem = {
    968	.sample		 = process_sample_event,
    969	.comm		 = perf_event__process_comm,
    970	.mmap		 = perf_event__process_mmap,
    971	.mmap2		 = perf_event__process_mmap2,
    972	.namespaces	 = perf_event__process_namespaces,
    973	.ordered_events	 = true,
    974};
    975
    976static double fragmentation(unsigned long n_req, unsigned long n_alloc)
    977{
    978	if (n_alloc == 0)
    979		return 0.0;
    980	else
    981		return 100.0 - (100.0 * n_req / n_alloc);
    982}
    983
    984static void __print_slab_result(struct rb_root *root,
    985				struct perf_session *session,
    986				int n_lines, int is_caller)
    987{
    988	struct rb_node *next;
    989	struct machine *machine = &session->machines.host;
    990
    991	printf("%.105s\n", graph_dotted_line);
    992	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
    993	printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
    994	printf("%.105s\n", graph_dotted_line);
    995
    996	next = rb_first(root);
    997
    998	while (next && n_lines--) {
    999		struct alloc_stat *data = rb_entry(next, struct alloc_stat,
   1000						   node);
   1001		struct symbol *sym = NULL;
   1002		struct map *map;
   1003		char buf[BUFSIZ];
   1004		u64 addr;
   1005
   1006		if (is_caller) {
   1007			addr = data->call_site;
   1008			if (!raw_ip)
   1009				sym = machine__find_kernel_symbol(machine, addr, &map);
   1010		} else
   1011			addr = data->ptr;
   1012
   1013		if (sym != NULL)
   1014			snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
   1015				 addr - map->unmap_ip(map, sym->start));
   1016		else
   1017			snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
   1018		printf(" %-34s |", buf);
   1019
   1020		printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
   1021		       (unsigned long long)data->bytes_alloc,
   1022		       (unsigned long)data->bytes_alloc / data->hit,
   1023		       (unsigned long long)data->bytes_req,
   1024		       (unsigned long)data->bytes_req / data->hit,
   1025		       (unsigned long)data->hit,
   1026		       (unsigned long)data->pingpong,
   1027		       fragmentation(data->bytes_req, data->bytes_alloc));
   1028
   1029		next = rb_next(next);
   1030	}
   1031
   1032	if (n_lines == -1)
   1033		printf(" ...                                | ...             | ...             | ...      | ...       | ...   \n");
   1034
   1035	printf("%.105s\n", graph_dotted_line);
   1036}
   1037
   1038static const char * const migrate_type_str[] = {
   1039	"UNMOVABL",
   1040	"RECLAIM",
   1041	"MOVABLE",
   1042	"RESERVED",
   1043	"CMA/ISLT",
   1044	"UNKNOWN",
   1045};
   1046
   1047static void __print_page_alloc_result(struct perf_session *session, int n_lines)
   1048{
   1049	struct rb_node *next = rb_first(&page_alloc_sorted);
   1050	struct machine *machine = &session->machines.host;
   1051	const char *format;
   1052	int gfp_len = max(strlen("GFP flags"), max_gfp_len);
   1053
   1054	printf("\n%.105s\n", graph_dotted_line);
   1055	printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
   1056	       use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
   1057	       gfp_len, "GFP flags");
   1058	printf("%.105s\n", graph_dotted_line);
   1059
   1060	if (use_pfn)
   1061		format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
   1062	else
   1063		format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
   1064
   1065	while (next && n_lines--) {
   1066		struct page_stat *data;
   1067		struct symbol *sym;
   1068		struct map *map;
   1069		char buf[32];
   1070		char *caller = buf;
   1071
   1072		data = rb_entry(next, struct page_stat, node);
   1073		sym = machine__find_kernel_symbol(machine, data->callsite, &map);
   1074		if (sym)
   1075			caller = sym->name;
   1076		else
   1077			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
   1078
   1079		printf(format, (unsigned long long)data->page,
   1080		       (unsigned long long)data->alloc_bytes / 1024,
   1081		       data->nr_alloc, data->order,
   1082		       migrate_type_str[data->migrate_type],
   1083		       gfp_len, compact_gfp_string(data->gfp_flags), caller);
   1084
   1085		next = rb_next(next);
   1086	}
   1087
   1088	if (n_lines == -1) {
   1089		printf(" ...              | ...              | ...       | ...   | ...      | %-*s | ...\n",
   1090		       gfp_len, "...");
   1091	}
   1092
   1093	printf("%.105s\n", graph_dotted_line);
   1094}
   1095
   1096static void __print_page_caller_result(struct perf_session *session, int n_lines)
   1097{
   1098	struct rb_node *next = rb_first(&page_caller_sorted);
   1099	struct machine *machine = &session->machines.host;
   1100	int gfp_len = max(strlen("GFP flags"), max_gfp_len);
   1101
   1102	printf("\n%.105s\n", graph_dotted_line);
   1103	printf(" %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
   1104	       live_page ? "Live" : "Total", gfp_len, "GFP flags");
   1105	printf("%.105s\n", graph_dotted_line);
   1106
   1107	while (next && n_lines--) {
   1108		struct page_stat *data;
   1109		struct symbol *sym;
   1110		struct map *map;
   1111		char buf[32];
   1112		char *caller = buf;
   1113
   1114		data = rb_entry(next, struct page_stat, node);
   1115		sym = machine__find_kernel_symbol(machine, data->callsite, &map);
   1116		if (sym)
   1117			caller = sym->name;
   1118		else
   1119			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
   1120
   1121		printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
   1122		       (unsigned long long)data->alloc_bytes / 1024,
   1123		       data->nr_alloc, data->order,
   1124		       migrate_type_str[data->migrate_type],
   1125		       gfp_len, compact_gfp_string(data->gfp_flags), caller);
   1126
   1127		next = rb_next(next);
   1128	}
   1129
   1130	if (n_lines == -1) {
   1131		printf(" ...              | ...       | ...   | ...      | %-*s | ...\n",
   1132		       gfp_len, "...");
   1133	}
   1134
   1135	printf("%.105s\n", graph_dotted_line);
   1136}
   1137
   1138static void print_gfp_flags(void)
   1139{
   1140	int i;
   1141
   1142	printf("#\n");
   1143	printf("# GFP flags\n");
   1144	printf("# ---------\n");
   1145	for (i = 0; i < nr_gfps; i++) {
   1146		printf("# %08x: %*s: %s\n", gfps[i].flags,
   1147		       (int) max_gfp_len, gfps[i].compact_str,
   1148		       gfps[i].human_readable);
   1149	}
   1150}
   1151
   1152static void print_slab_summary(void)
   1153{
   1154	printf("\nSUMMARY (SLAB allocator)");
   1155	printf("\n========================\n");
   1156	printf("Total bytes requested: %'lu\n", total_requested);
   1157	printf("Total bytes allocated: %'lu\n", total_allocated);
   1158	printf("Total bytes freed:     %'lu\n", total_freed);
   1159	if (total_allocated > total_freed) {
   1160		printf("Net total bytes allocated: %'lu\n",
   1161		total_allocated - total_freed);
   1162	}
   1163	printf("Total bytes wasted on internal fragmentation: %'lu\n",
   1164	       total_allocated - total_requested);
   1165	printf("Internal fragmentation: %f%%\n",
   1166	       fragmentation(total_requested, total_allocated));
   1167	printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
   1168}
   1169
   1170static void print_page_summary(void)
   1171{
   1172	int o, m;
   1173	u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
   1174	u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
   1175
   1176	printf("\nSUMMARY (page allocator)");
   1177	printf("\n========================\n");
   1178	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation requests",
   1179	       nr_page_allocs, total_page_alloc_bytes / 1024);
   1180	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free requests",
   1181	       nr_page_frees, total_page_free_bytes / 1024);
   1182	printf("\n");
   1183
   1184	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
   1185	       nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
   1186	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
   1187	       nr_page_allocs - nr_alloc_freed,
   1188	       (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
   1189	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
   1190	       nr_page_nomatch, total_page_nomatch_bytes / 1024);
   1191	printf("\n");
   1192
   1193	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation failures",
   1194	       nr_page_fails, total_page_fail_bytes / 1024);
   1195	printf("\n");
   1196
   1197	printf("%5s  %12s  %12s  %12s  %12s  %12s\n", "Order",  "Unmovable",
   1198	       "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
   1199	printf("%.5s  %.12s  %.12s  %.12s  %.12s  %.12s\n", graph_dotted_line,
   1200	       graph_dotted_line, graph_dotted_line, graph_dotted_line,
   1201	       graph_dotted_line, graph_dotted_line);
   1202
   1203	for (o = 0; o < MAX_PAGE_ORDER; o++) {
   1204		printf("%5d", o);
   1205		for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
   1206			if (order_stats[o][m])
   1207				printf("  %'12d", order_stats[o][m]);
   1208			else
   1209				printf("  %12c", '.');
   1210		}
   1211		printf("\n");
   1212	}
   1213}
   1214
   1215static void print_slab_result(struct perf_session *session)
   1216{
   1217	if (caller_flag)
   1218		__print_slab_result(&root_caller_sorted, session, caller_lines, 1);
   1219	if (alloc_flag)
   1220		__print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
   1221	print_slab_summary();
   1222}
   1223
   1224static void print_page_result(struct perf_session *session)
   1225{
   1226	if (caller_flag || alloc_flag)
   1227		print_gfp_flags();
   1228	if (caller_flag)
   1229		__print_page_caller_result(session, caller_lines);
   1230	if (alloc_flag)
   1231		__print_page_alloc_result(session, alloc_lines);
   1232	print_page_summary();
   1233}
   1234
   1235static void print_result(struct perf_session *session)
   1236{
   1237	if (kmem_slab)
   1238		print_slab_result(session);
   1239	if (kmem_page)
   1240		print_page_result(session);
   1241}
   1242
   1243static LIST_HEAD(slab_caller_sort);
   1244static LIST_HEAD(slab_alloc_sort);
   1245static LIST_HEAD(page_caller_sort);
   1246static LIST_HEAD(page_alloc_sort);
   1247
   1248static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
   1249			     struct list_head *sort_list)
   1250{
   1251	struct rb_node **new = &(root->rb_node);
   1252	struct rb_node *parent = NULL;
   1253	struct sort_dimension *sort;
   1254
   1255	while (*new) {
   1256		struct alloc_stat *this;
   1257		int cmp = 0;
   1258
   1259		this = rb_entry(*new, struct alloc_stat, node);
   1260		parent = *new;
   1261
   1262		list_for_each_entry(sort, sort_list, list) {
   1263			cmp = sort->cmp(data, this);
   1264			if (cmp)
   1265				break;
   1266		}
   1267
   1268		if (cmp > 0)
   1269			new = &((*new)->rb_left);
   1270		else
   1271			new = &((*new)->rb_right);
   1272	}
   1273
   1274	rb_link_node(&data->node, parent, new);
   1275	rb_insert_color(&data->node, root);
   1276}
   1277
   1278static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
   1279			       struct list_head *sort_list)
   1280{
   1281	struct rb_node *node;
   1282	struct alloc_stat *data;
   1283
   1284	for (;;) {
   1285		node = rb_first(root);
   1286		if (!node)
   1287			break;
   1288
   1289		rb_erase(node, root);
   1290		data = rb_entry(node, struct alloc_stat, node);
   1291		sort_slab_insert(root_sorted, data, sort_list);
   1292	}
   1293}
   1294
   1295static void sort_page_insert(struct rb_root *root, struct page_stat *data,
   1296			     struct list_head *sort_list)
   1297{
   1298	struct rb_node **new = &root->rb_node;
   1299	struct rb_node *parent = NULL;
   1300	struct sort_dimension *sort;
   1301
   1302	while (*new) {
   1303		struct page_stat *this;
   1304		int cmp = 0;
   1305
   1306		this = rb_entry(*new, struct page_stat, node);
   1307		parent = *new;
   1308
   1309		list_for_each_entry(sort, sort_list, list) {
   1310			cmp = sort->cmp(data, this);
   1311			if (cmp)
   1312				break;
   1313		}
   1314
   1315		if (cmp > 0)
   1316			new = &parent->rb_left;
   1317		else
   1318			new = &parent->rb_right;
   1319	}
   1320
   1321	rb_link_node(&data->node, parent, new);
   1322	rb_insert_color(&data->node, root);
   1323}
   1324
   1325static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
   1326			       struct list_head *sort_list)
   1327{
   1328	struct rb_node *node;
   1329	struct page_stat *data;
   1330
   1331	for (;;) {
   1332		node = rb_first(root);
   1333		if (!node)
   1334			break;
   1335
   1336		rb_erase(node, root);
   1337		data = rb_entry(node, struct page_stat, node);
   1338		sort_page_insert(root_sorted, data, sort_list);
   1339	}
   1340}
   1341
   1342static void sort_result(void)
   1343{
   1344	if (kmem_slab) {
   1345		__sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
   1346				   &slab_alloc_sort);
   1347		__sort_slab_result(&root_caller_stat, &root_caller_sorted,
   1348				   &slab_caller_sort);
   1349	}
   1350	if (kmem_page) {
   1351		if (live_page)
   1352			__sort_page_result(&page_live_tree, &page_alloc_sorted,
   1353					   &page_alloc_sort);
   1354		else
   1355			__sort_page_result(&page_alloc_tree, &page_alloc_sorted,
   1356					   &page_alloc_sort);
   1357
   1358		__sort_page_result(&page_caller_tree, &page_caller_sorted,
   1359				   &page_caller_sort);
   1360	}
   1361}
   1362
   1363static int __cmd_kmem(struct perf_session *session)
   1364{
   1365	int err = -EINVAL;
   1366	struct evsel *evsel;
   1367	const struct evsel_str_handler kmem_tracepoints[] = {
   1368		/* slab allocator */
   1369		{ "kmem:kmalloc",		evsel__process_alloc_event, },
   1370		{ "kmem:kmem_cache_alloc",	evsel__process_alloc_event, },
   1371		{ "kmem:kmalloc_node",		evsel__process_alloc_node_event, },
   1372		{ "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, },
   1373		{ "kmem:kfree",			evsel__process_free_event, },
   1374		{ "kmem:kmem_cache_free",	evsel__process_free_event, },
   1375		/* page allocator */
   1376		{ "kmem:mm_page_alloc",		evsel__process_page_alloc_event, },
   1377		{ "kmem:mm_page_free",		evsel__process_page_free_event, },
   1378	};
   1379
   1380	if (!perf_session__has_traces(session, "kmem record"))
   1381		goto out;
   1382
   1383	if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
   1384		pr_err("Initializing perf session tracepoint handlers failed\n");
   1385		goto out;
   1386	}
   1387
   1388	evlist__for_each_entry(session->evlist, evsel) {
   1389		if (!strcmp(evsel__name(evsel), "kmem:mm_page_alloc") &&
   1390		    evsel__field(evsel, "pfn")) {
   1391			use_pfn = true;
   1392			break;
   1393		}
   1394	}
   1395
   1396	setup_pager();
   1397	err = perf_session__process_events(session);
   1398	if (err != 0) {
   1399		pr_err("error during process events: %d\n", err);
   1400		goto out;
   1401	}
   1402	sort_result();
   1403	print_result(session);
   1404out:
   1405	return err;
   1406}
   1407
   1408/* slab sort keys */
   1409static int ptr_cmp(void *a, void *b)
   1410{
   1411	struct alloc_stat *l = a;
   1412	struct alloc_stat *r = b;
   1413
   1414	if (l->ptr < r->ptr)
   1415		return -1;
   1416	else if (l->ptr > r->ptr)
   1417		return 1;
   1418	return 0;
   1419}
   1420
   1421static struct sort_dimension ptr_sort_dimension = {
   1422	.name	= "ptr",
   1423	.cmp	= ptr_cmp,
   1424};
   1425
   1426static int slab_callsite_cmp(void *a, void *b)
   1427{
   1428	struct alloc_stat *l = a;
   1429	struct alloc_stat *r = b;
   1430
   1431	if (l->call_site < r->call_site)
   1432		return -1;
   1433	else if (l->call_site > r->call_site)
   1434		return 1;
   1435	return 0;
   1436}
   1437
   1438static struct sort_dimension callsite_sort_dimension = {
   1439	.name	= "callsite",
   1440	.cmp	= slab_callsite_cmp,
   1441};
   1442
   1443static int hit_cmp(void *a, void *b)
   1444{
   1445	struct alloc_stat *l = a;
   1446	struct alloc_stat *r = b;
   1447
   1448	if (l->hit < r->hit)
   1449		return -1;
   1450	else if (l->hit > r->hit)
   1451		return 1;
   1452	return 0;
   1453}
   1454
   1455static struct sort_dimension hit_sort_dimension = {
   1456	.name	= "hit",
   1457	.cmp	= hit_cmp,
   1458};
   1459
   1460static int bytes_cmp(void *a, void *b)
   1461{
   1462	struct alloc_stat *l = a;
   1463	struct alloc_stat *r = b;
   1464
   1465	if (l->bytes_alloc < r->bytes_alloc)
   1466		return -1;
   1467	else if (l->bytes_alloc > r->bytes_alloc)
   1468		return 1;
   1469	return 0;
   1470}
   1471
   1472static struct sort_dimension bytes_sort_dimension = {
   1473	.name	= "bytes",
   1474	.cmp	= bytes_cmp,
   1475};
   1476
   1477static int frag_cmp(void *a, void *b)
   1478{
   1479	double x, y;
   1480	struct alloc_stat *l = a;
   1481	struct alloc_stat *r = b;
   1482
   1483	x = fragmentation(l->bytes_req, l->bytes_alloc);
   1484	y = fragmentation(r->bytes_req, r->bytes_alloc);
   1485
   1486	if (x < y)
   1487		return -1;
   1488	else if (x > y)
   1489		return 1;
   1490	return 0;
   1491}
   1492
   1493static struct sort_dimension frag_sort_dimension = {
   1494	.name	= "frag",
   1495	.cmp	= frag_cmp,
   1496};
   1497
   1498static int pingpong_cmp(void *a, void *b)
   1499{
   1500	struct alloc_stat *l = a;
   1501	struct alloc_stat *r = b;
   1502
   1503	if (l->pingpong < r->pingpong)
   1504		return -1;
   1505	else if (l->pingpong > r->pingpong)
   1506		return 1;
   1507	return 0;
   1508}
   1509
   1510static struct sort_dimension pingpong_sort_dimension = {
   1511	.name	= "pingpong",
   1512	.cmp	= pingpong_cmp,
   1513};
   1514
   1515/* page sort keys */
   1516static int page_cmp(void *a, void *b)
   1517{
   1518	struct page_stat *l = a;
   1519	struct page_stat *r = b;
   1520
   1521	if (l->page < r->page)
   1522		return -1;
   1523	else if (l->page > r->page)
   1524		return 1;
   1525	return 0;
   1526}
   1527
   1528static struct sort_dimension page_sort_dimension = {
   1529	.name	= "page",
   1530	.cmp	= page_cmp,
   1531};
   1532
   1533static int page_callsite_cmp(void *a, void *b)
   1534{
   1535	struct page_stat *l = a;
   1536	struct page_stat *r = b;
   1537
   1538	if (l->callsite < r->callsite)
   1539		return -1;
   1540	else if (l->callsite > r->callsite)
   1541		return 1;
   1542	return 0;
   1543}
   1544
   1545static struct sort_dimension page_callsite_sort_dimension = {
   1546	.name	= "callsite",
   1547	.cmp	= page_callsite_cmp,
   1548};
   1549
   1550static int page_hit_cmp(void *a, void *b)
   1551{
   1552	struct page_stat *l = a;
   1553	struct page_stat *r = b;
   1554
   1555	if (l->nr_alloc < r->nr_alloc)
   1556		return -1;
   1557	else if (l->nr_alloc > r->nr_alloc)
   1558		return 1;
   1559	return 0;
   1560}
   1561
   1562static struct sort_dimension page_hit_sort_dimension = {
   1563	.name	= "hit",
   1564	.cmp	= page_hit_cmp,
   1565};
   1566
   1567static int page_bytes_cmp(void *a, void *b)
   1568{
   1569	struct page_stat *l = a;
   1570	struct page_stat *r = b;
   1571
   1572	if (l->alloc_bytes < r->alloc_bytes)
   1573		return -1;
   1574	else if (l->alloc_bytes > r->alloc_bytes)
   1575		return 1;
   1576	return 0;
   1577}
   1578
   1579static struct sort_dimension page_bytes_sort_dimension = {
   1580	.name	= "bytes",
   1581	.cmp	= page_bytes_cmp,
   1582};
   1583
   1584static int page_order_cmp(void *a, void *b)
   1585{
   1586	struct page_stat *l = a;
   1587	struct page_stat *r = b;
   1588
   1589	if (l->order < r->order)
   1590		return -1;
   1591	else if (l->order > r->order)
   1592		return 1;
   1593	return 0;
   1594}
   1595
   1596static struct sort_dimension page_order_sort_dimension = {
   1597	.name	= "order",
   1598	.cmp	= page_order_cmp,
   1599};
   1600
   1601static int migrate_type_cmp(void *a, void *b)
   1602{
   1603	struct page_stat *l = a;
   1604	struct page_stat *r = b;
   1605
   1606	/* for internal use to find free'd page */
   1607	if (l->migrate_type == -1U)
   1608		return 0;
   1609
   1610	if (l->migrate_type < r->migrate_type)
   1611		return -1;
   1612	else if (l->migrate_type > r->migrate_type)
   1613		return 1;
   1614	return 0;
   1615}
   1616
   1617static struct sort_dimension migrate_type_sort_dimension = {
   1618	.name	= "migtype",
   1619	.cmp	= migrate_type_cmp,
   1620};
   1621
   1622static int gfp_flags_cmp(void *a, void *b)
   1623{
   1624	struct page_stat *l = a;
   1625	struct page_stat *r = b;
   1626
   1627	/* for internal use to find free'd page */
   1628	if (l->gfp_flags == -1U)
   1629		return 0;
   1630
   1631	if (l->gfp_flags < r->gfp_flags)
   1632		return -1;
   1633	else if (l->gfp_flags > r->gfp_flags)
   1634		return 1;
   1635	return 0;
   1636}
   1637
   1638static struct sort_dimension gfp_flags_sort_dimension = {
   1639	.name	= "gfp",
   1640	.cmp	= gfp_flags_cmp,
   1641};
   1642
   1643static struct sort_dimension *slab_sorts[] = {
   1644	&ptr_sort_dimension,
   1645	&callsite_sort_dimension,
   1646	&hit_sort_dimension,
   1647	&bytes_sort_dimension,
   1648	&frag_sort_dimension,
   1649	&pingpong_sort_dimension,
   1650};
   1651
   1652static struct sort_dimension *page_sorts[] = {
   1653	&page_sort_dimension,
   1654	&page_callsite_sort_dimension,
   1655	&page_hit_sort_dimension,
   1656	&page_bytes_sort_dimension,
   1657	&page_order_sort_dimension,
   1658	&migrate_type_sort_dimension,
   1659	&gfp_flags_sort_dimension,
   1660};
   1661
   1662static int slab_sort_dimension__add(const char *tok, struct list_head *list)
   1663{
   1664	struct sort_dimension *sort;
   1665	int i;
   1666
   1667	for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
   1668		if (!strcmp(slab_sorts[i]->name, tok)) {
   1669			sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
   1670			if (!sort) {
   1671				pr_err("%s: memdup failed\n", __func__);
   1672				return -1;
   1673			}
   1674			list_add_tail(&sort->list, list);
   1675			return 0;
   1676		}
   1677	}
   1678
   1679	return -1;
   1680}
   1681
   1682static int page_sort_dimension__add(const char *tok, struct list_head *list)
   1683{
   1684	struct sort_dimension *sort;
   1685	int i;
   1686
   1687	for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
   1688		if (!strcmp(page_sorts[i]->name, tok)) {
   1689			sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
   1690			if (!sort) {
   1691				pr_err("%s: memdup failed\n", __func__);
   1692				return -1;
   1693			}
   1694			list_add_tail(&sort->list, list);
   1695			return 0;
   1696		}
   1697	}
   1698
   1699	return -1;
   1700}
   1701
   1702static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
   1703{
   1704	char *tok;
   1705	char *str = strdup(arg);
   1706	char *pos = str;
   1707
   1708	if (!str) {
   1709		pr_err("%s: strdup failed\n", __func__);
   1710		return -1;
   1711	}
   1712
   1713	while (true) {
   1714		tok = strsep(&pos, ",");
   1715		if (!tok)
   1716			break;
   1717		if (slab_sort_dimension__add(tok, sort_list) < 0) {
   1718			pr_err("Unknown slab --sort key: '%s'", tok);
   1719			free(str);
   1720			return -1;
   1721		}
   1722	}
   1723
   1724	free(str);
   1725	return 0;
   1726}
   1727
   1728static int setup_page_sorting(struct list_head *sort_list, const char *arg)
   1729{
   1730	char *tok;
   1731	char *str = strdup(arg);
   1732	char *pos = str;
   1733
   1734	if (!str) {
   1735		pr_err("%s: strdup failed\n", __func__);
   1736		return -1;
   1737	}
   1738
   1739	while (true) {
   1740		tok = strsep(&pos, ",");
   1741		if (!tok)
   1742			break;
   1743		if (page_sort_dimension__add(tok, sort_list) < 0) {
   1744			pr_err("Unknown page --sort key: '%s'", tok);
   1745			free(str);
   1746			return -1;
   1747		}
   1748	}
   1749
   1750	free(str);
   1751	return 0;
   1752}
   1753
   1754static int parse_sort_opt(const struct option *opt __maybe_unused,
   1755			  const char *arg, int unset __maybe_unused)
   1756{
   1757	if (!arg)
   1758		return -1;
   1759
   1760	if (kmem_page > kmem_slab ||
   1761	    (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
   1762		if (caller_flag > alloc_flag)
   1763			return setup_page_sorting(&page_caller_sort, arg);
   1764		else
   1765			return setup_page_sorting(&page_alloc_sort, arg);
   1766	} else {
   1767		if (caller_flag > alloc_flag)
   1768			return setup_slab_sorting(&slab_caller_sort, arg);
   1769		else
   1770			return setup_slab_sorting(&slab_alloc_sort, arg);
   1771	}
   1772
   1773	return 0;
   1774}
   1775
   1776static int parse_caller_opt(const struct option *opt __maybe_unused,
   1777			    const char *arg __maybe_unused,
   1778			    int unset __maybe_unused)
   1779{
   1780	caller_flag = (alloc_flag + 1);
   1781	return 0;
   1782}
   1783
   1784static int parse_alloc_opt(const struct option *opt __maybe_unused,
   1785			   const char *arg __maybe_unused,
   1786			   int unset __maybe_unused)
   1787{
   1788	alloc_flag = (caller_flag + 1);
   1789	return 0;
   1790}
   1791
   1792static int parse_slab_opt(const struct option *opt __maybe_unused,
   1793			  const char *arg __maybe_unused,
   1794			  int unset __maybe_unused)
   1795{
   1796	kmem_slab = (kmem_page + 1);
   1797	return 0;
   1798}
   1799
   1800static int parse_page_opt(const struct option *opt __maybe_unused,
   1801			  const char *arg __maybe_unused,
   1802			  int unset __maybe_unused)
   1803{
   1804	kmem_page = (kmem_slab + 1);
   1805	return 0;
   1806}
   1807
   1808static int parse_line_opt(const struct option *opt __maybe_unused,
   1809			  const char *arg, int unset __maybe_unused)
   1810{
   1811	int lines;
   1812
   1813	if (!arg)
   1814		return -1;
   1815
   1816	lines = strtoul(arg, NULL, 10);
   1817
   1818	if (caller_flag > alloc_flag)
   1819		caller_lines = lines;
   1820	else
   1821		alloc_lines = lines;
   1822
   1823	return 0;
   1824}
   1825
   1826static int __cmd_record(int argc, const char **argv)
   1827{
   1828	const char * const record_args[] = {
   1829	"record", "-a", "-R", "-c", "1",
   1830	};
   1831	const char * const slab_events[] = {
   1832	"-e", "kmem:kmalloc",
   1833	"-e", "kmem:kmalloc_node",
   1834	"-e", "kmem:kfree",
   1835	"-e", "kmem:kmem_cache_alloc",
   1836	"-e", "kmem:kmem_cache_alloc_node",
   1837	"-e", "kmem:kmem_cache_free",
   1838	};
   1839	const char * const page_events[] = {
   1840	"-e", "kmem:mm_page_alloc",
   1841	"-e", "kmem:mm_page_free",
   1842	};
   1843	unsigned int rec_argc, i, j;
   1844	const char **rec_argv;
   1845
   1846	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
   1847	if (kmem_slab)
   1848		rec_argc += ARRAY_SIZE(slab_events);
   1849	if (kmem_page)
   1850		rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
   1851
   1852	rec_argv = calloc(rec_argc + 1, sizeof(char *));
   1853
   1854	if (rec_argv == NULL)
   1855		return -ENOMEM;
   1856
   1857	for (i = 0; i < ARRAY_SIZE(record_args); i++)
   1858		rec_argv[i] = strdup(record_args[i]);
   1859
   1860	if (kmem_slab) {
   1861		for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
   1862			rec_argv[i] = strdup(slab_events[j]);
   1863	}
   1864	if (kmem_page) {
   1865		rec_argv[i++] = strdup("-g");
   1866
   1867		for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
   1868			rec_argv[i] = strdup(page_events[j]);
   1869	}
   1870
   1871	for (j = 1; j < (unsigned int)argc; j++, i++)
   1872		rec_argv[i] = argv[j];
   1873
   1874	return cmd_record(i, rec_argv);
   1875}
   1876
   1877static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
   1878{
   1879	if (!strcmp(var, "kmem.default")) {
   1880		if (!strcmp(value, "slab"))
   1881			kmem_default = KMEM_SLAB;
   1882		else if (!strcmp(value, "page"))
   1883			kmem_default = KMEM_PAGE;
   1884		else
   1885			pr_err("invalid default value ('slab' or 'page' required): %s\n",
   1886			       value);
   1887		return 0;
   1888	}
   1889
   1890	return 0;
   1891}
   1892
   1893int cmd_kmem(int argc, const char **argv)
   1894{
   1895	const char * const default_slab_sort = "frag,hit,bytes";
   1896	const char * const default_page_sort = "bytes,hit";
   1897	struct perf_data data = {
   1898		.mode = PERF_DATA_MODE_READ,
   1899	};
   1900	const struct option kmem_options[] = {
   1901	OPT_STRING('i', "input", &input_name, "file", "input file name"),
   1902	OPT_INCR('v', "verbose", &verbose,
   1903		    "be more verbose (show symbol address, etc)"),
   1904	OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
   1905			   "show per-callsite statistics", parse_caller_opt),
   1906	OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
   1907			   "show per-allocation statistics", parse_alloc_opt),
   1908	OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
   1909		     "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
   1910		     "page, order, migtype, gfp", parse_sort_opt),
   1911	OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
   1912	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
   1913	OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
   1914	OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
   1915			   parse_slab_opt),
   1916	OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
   1917			   parse_page_opt),
   1918	OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
   1919	OPT_STRING(0, "time", &time_str, "str",
   1920		   "Time span of interest (start,stop)"),
   1921	OPT_END()
   1922	};
   1923	const char *const kmem_subcommands[] = { "record", "stat", NULL };
   1924	const char *kmem_usage[] = {
   1925		NULL,
   1926		NULL
   1927	};
   1928	struct perf_session *session;
   1929	static const char errmsg[] = "No %s allocation events found.  Have you run 'perf kmem record --%s'?\n";
   1930	int ret = perf_config(kmem_config, NULL);
   1931
   1932	if (ret)
   1933		return ret;
   1934
   1935	argc = parse_options_subcommand(argc, argv, kmem_options,
   1936					kmem_subcommands, kmem_usage,
   1937					PARSE_OPT_STOP_AT_NON_OPTION);
   1938
   1939	if (!argc)
   1940		usage_with_options(kmem_usage, kmem_options);
   1941
   1942	if (kmem_slab == 0 && kmem_page == 0) {
   1943		if (kmem_default == KMEM_SLAB)
   1944			kmem_slab = 1;
   1945		else
   1946			kmem_page = 1;
   1947	}
   1948
   1949	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
   1950		symbol__init(NULL);
   1951		return __cmd_record(argc, argv);
   1952	}
   1953
   1954	data.path = input_name;
   1955
   1956	kmem_session = session = perf_session__new(&data, &perf_kmem);
   1957	if (IS_ERR(session))
   1958		return PTR_ERR(session);
   1959
   1960	ret = -1;
   1961
   1962	if (kmem_slab) {
   1963		if (!evlist__find_tracepoint_by_name(session->evlist, "kmem:kmalloc")) {
   1964			pr_err(errmsg, "slab", "slab");
   1965			goto out_delete;
   1966		}
   1967	}
   1968
   1969	if (kmem_page) {
   1970		struct evsel *evsel = evlist__find_tracepoint_by_name(session->evlist, "kmem:mm_page_alloc");
   1971
   1972		if (evsel == NULL) {
   1973			pr_err(errmsg, "page", "page");
   1974			goto out_delete;
   1975		}
   1976
   1977		kmem_page_size = tep_get_page_size(evsel->tp_format->tep);
   1978		symbol_conf.use_callchain = true;
   1979	}
   1980
   1981	symbol__init(&session->header.env);
   1982
   1983	if (perf_time__parse_str(&ptime, time_str) != 0) {
   1984		pr_err("Invalid time string\n");
   1985		ret = -EINVAL;
   1986		goto out_delete;
   1987	}
   1988
   1989	if (!strcmp(argv[0], "stat")) {
   1990		setlocale(LC_ALL, "");
   1991
   1992		if (cpu__setup_cpunode_map())
   1993			goto out_delete;
   1994
   1995		if (list_empty(&slab_caller_sort))
   1996			setup_slab_sorting(&slab_caller_sort, default_slab_sort);
   1997		if (list_empty(&slab_alloc_sort))
   1998			setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
   1999		if (list_empty(&page_caller_sort))
   2000			setup_page_sorting(&page_caller_sort, default_page_sort);
   2001		if (list_empty(&page_alloc_sort))
   2002			setup_page_sorting(&page_alloc_sort, default_page_sort);
   2003
   2004		if (kmem_page) {
   2005			setup_page_sorting(&page_alloc_sort_input,
   2006					   "page,order,migtype,gfp");
   2007			setup_page_sorting(&page_caller_sort_input,
   2008					   "callsite,order,migtype,gfp");
   2009		}
   2010		ret = __cmd_kmem(session);
   2011	} else
   2012		usage_with_options(kmem_usage, kmem_options);
   2013
   2014out_delete:
   2015	perf_session__delete(session);
   2016
   2017	return ret;
   2018}
   2019