cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

profile.c (13640B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *  linux/kernel/profile.c
      4 *  Simple profiling. Manages a direct-mapped profile hit count buffer,
      5 *  with configurable resolution, support for restricting the cpus on
      6 *  which profiling is done, and switching between cpu time and
      7 *  schedule() calls via kernel command line parameters passed at boot.
      8 *
      9 *  Scheduler profiling support, Arjan van de Ven and Ingo Molnar,
     10 *	Red Hat, July 2004
     11 *  Consolidation of architecture support code for profiling,
     12 *	Nadia Yvette Chambers, Oracle, July 2004
     13 *  Amortized hit count accounting via per-cpu open-addressed hashtables
     14 *	to resolve timer interrupt livelocks, Nadia Yvette Chambers,
     15 *	Oracle, 2004
     16 */
     17
     18#include <linux/export.h>
     19#include <linux/profile.h>
     20#include <linux/memblock.h>
     21#include <linux/notifier.h>
     22#include <linux/mm.h>
     23#include <linux/cpumask.h>
     24#include <linux/cpu.h>
     25#include <linux/highmem.h>
     26#include <linux/mutex.h>
     27#include <linux/slab.h>
     28#include <linux/vmalloc.h>
     29#include <linux/sched/stat.h>
     30
     31#include <asm/sections.h>
     32#include <asm/irq_regs.h>
     33#include <asm/ptrace.h>
     34
     35struct profile_hit {
     36	u32 pc, hits;
     37};
     38#define PROFILE_GRPSHIFT	3
     39#define PROFILE_GRPSZ		(1 << PROFILE_GRPSHIFT)
     40#define NR_PROFILE_HIT		(PAGE_SIZE/sizeof(struct profile_hit))
     41#define NR_PROFILE_GRP		(NR_PROFILE_HIT/PROFILE_GRPSZ)
     42
     43static atomic_t *prof_buffer;
     44static unsigned long prof_len;
     45static unsigned short int prof_shift;
     46
     47int prof_on __read_mostly;
     48EXPORT_SYMBOL_GPL(prof_on);
     49
     50static cpumask_var_t prof_cpu_mask;
     51#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
     52static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
     53static DEFINE_PER_CPU(int, cpu_profile_flip);
     54static DEFINE_MUTEX(profile_flip_mutex);
     55#endif /* CONFIG_SMP */
     56
     57int profile_setup(char *str)
     58{
     59	static const char schedstr[] = "schedule";
     60	static const char sleepstr[] = "sleep";
     61	static const char kvmstr[] = "kvm";
     62	int par;
     63
     64	if (!strncmp(str, sleepstr, strlen(sleepstr))) {
     65#ifdef CONFIG_SCHEDSTATS
     66		force_schedstat_enabled();
     67		prof_on = SLEEP_PROFILING;
     68		if (str[strlen(sleepstr)] == ',')
     69			str += strlen(sleepstr) + 1;
     70		if (get_option(&str, &par))
     71			prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
     72		pr_info("kernel sleep profiling enabled (shift: %u)\n",
     73			prof_shift);
     74#else
     75		pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
     76#endif /* CONFIG_SCHEDSTATS */
     77	} else if (!strncmp(str, schedstr, strlen(schedstr))) {
     78		prof_on = SCHED_PROFILING;
     79		if (str[strlen(schedstr)] == ',')
     80			str += strlen(schedstr) + 1;
     81		if (get_option(&str, &par))
     82			prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
     83		pr_info("kernel schedule profiling enabled (shift: %u)\n",
     84			prof_shift);
     85	} else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
     86		prof_on = KVM_PROFILING;
     87		if (str[strlen(kvmstr)] == ',')
     88			str += strlen(kvmstr) + 1;
     89		if (get_option(&str, &par))
     90			prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
     91		pr_info("kernel KVM profiling enabled (shift: %u)\n",
     92			prof_shift);
     93	} else if (get_option(&str, &par)) {
     94		prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
     95		prof_on = CPU_PROFILING;
     96		pr_info("kernel profiling enabled (shift: %u)\n",
     97			prof_shift);
     98	}
     99	return 1;
    100}
    101__setup("profile=", profile_setup);
    102
    103
    104int __ref profile_init(void)
    105{
    106	int buffer_bytes;
    107	if (!prof_on)
    108		return 0;
    109
    110	/* only text is profiled */
    111	prof_len = (_etext - _stext) >> prof_shift;
    112	buffer_bytes = prof_len*sizeof(atomic_t);
    113
    114	if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
    115		return -ENOMEM;
    116
    117	cpumask_copy(prof_cpu_mask, cpu_possible_mask);
    118
    119	prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN);
    120	if (prof_buffer)
    121		return 0;
    122
    123	prof_buffer = alloc_pages_exact(buffer_bytes,
    124					GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
    125	if (prof_buffer)
    126		return 0;
    127
    128	prof_buffer = vzalloc(buffer_bytes);
    129	if (prof_buffer)
    130		return 0;
    131
    132	free_cpumask_var(prof_cpu_mask);
    133	return -ENOMEM;
    134}
    135
    136#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
    137/*
    138 * Each cpu has a pair of open-addressed hashtables for pending
    139 * profile hits. read_profile() IPI's all cpus to request them
    140 * to flip buffers and flushes their contents to prof_buffer itself.
    141 * Flip requests are serialized by the profile_flip_mutex. The sole
    142 * use of having a second hashtable is for avoiding cacheline
    143 * contention that would otherwise happen during flushes of pending
    144 * profile hits required for the accuracy of reported profile hits
    145 * and so resurrect the interrupt livelock issue.
    146 *
    147 * The open-addressed hashtables are indexed by profile buffer slot
    148 * and hold the number of pending hits to that profile buffer slot on
    149 * a cpu in an entry. When the hashtable overflows, all pending hits
    150 * are accounted to their corresponding profile buffer slots with
    151 * atomic_add() and the hashtable emptied. As numerous pending hits
    152 * may be accounted to a profile buffer slot in a hashtable entry,
    153 * this amortizes a number of atomic profile buffer increments likely
    154 * to be far larger than the number of entries in the hashtable,
    155 * particularly given that the number of distinct profile buffer
    156 * positions to which hits are accounted during short intervals (e.g.
    157 * several seconds) is usually very small. Exclusion from buffer
    158 * flipping is provided by interrupt disablement (note that for
    159 * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from
    160 * process context).
    161 * The hash function is meant to be lightweight as opposed to strong,
    162 * and was vaguely inspired by ppc64 firmware-supported inverted
    163 * pagetable hash functions, but uses a full hashtable full of finite
    164 * collision chains, not just pairs of them.
    165 *
    166 * -- nyc
    167 */
    168static void __profile_flip_buffers(void *unused)
    169{
    170	int cpu = smp_processor_id();
    171
    172	per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
    173}
    174
    175static void profile_flip_buffers(void)
    176{
    177	int i, j, cpu;
    178
    179	mutex_lock(&profile_flip_mutex);
    180	j = per_cpu(cpu_profile_flip, get_cpu());
    181	put_cpu();
    182	on_each_cpu(__profile_flip_buffers, NULL, 1);
    183	for_each_online_cpu(cpu) {
    184		struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
    185		for (i = 0; i < NR_PROFILE_HIT; ++i) {
    186			if (!hits[i].hits) {
    187				if (hits[i].pc)
    188					hits[i].pc = 0;
    189				continue;
    190			}
    191			atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
    192			hits[i].hits = hits[i].pc = 0;
    193		}
    194	}
    195	mutex_unlock(&profile_flip_mutex);
    196}
    197
    198static void profile_discard_flip_buffers(void)
    199{
    200	int i, cpu;
    201
    202	mutex_lock(&profile_flip_mutex);
    203	i = per_cpu(cpu_profile_flip, get_cpu());
    204	put_cpu();
    205	on_each_cpu(__profile_flip_buffers, NULL, 1);
    206	for_each_online_cpu(cpu) {
    207		struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
    208		memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
    209	}
    210	mutex_unlock(&profile_flip_mutex);
    211}
    212
    213static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
    214{
    215	unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
    216	int i, j, cpu;
    217	struct profile_hit *hits;
    218
    219	pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
    220	i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
    221	secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
    222	cpu = get_cpu();
    223	hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
    224	if (!hits) {
    225		put_cpu();
    226		return;
    227	}
    228	/*
    229	 * We buffer the global profiler buffer into a per-CPU
    230	 * queue and thus reduce the number of global (and possibly
    231	 * NUMA-alien) accesses. The write-queue is self-coalescing:
    232	 */
    233	local_irq_save(flags);
    234	do {
    235		for (j = 0; j < PROFILE_GRPSZ; ++j) {
    236			if (hits[i + j].pc == pc) {
    237				hits[i + j].hits += nr_hits;
    238				goto out;
    239			} else if (!hits[i + j].hits) {
    240				hits[i + j].pc = pc;
    241				hits[i + j].hits = nr_hits;
    242				goto out;
    243			}
    244		}
    245		i = (i + secondary) & (NR_PROFILE_HIT - 1);
    246	} while (i != primary);
    247
    248	/*
    249	 * Add the current hit(s) and flush the write-queue out
    250	 * to the global buffer:
    251	 */
    252	atomic_add(nr_hits, &prof_buffer[pc]);
    253	for (i = 0; i < NR_PROFILE_HIT; ++i) {
    254		atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
    255		hits[i].pc = hits[i].hits = 0;
    256	}
    257out:
    258	local_irq_restore(flags);
    259	put_cpu();
    260}
    261
    262static int profile_dead_cpu(unsigned int cpu)
    263{
    264	struct page *page;
    265	int i;
    266
    267	if (cpumask_available(prof_cpu_mask))
    268		cpumask_clear_cpu(cpu, prof_cpu_mask);
    269
    270	for (i = 0; i < 2; i++) {
    271		if (per_cpu(cpu_profile_hits, cpu)[i]) {
    272			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]);
    273			per_cpu(cpu_profile_hits, cpu)[i] = NULL;
    274			__free_page(page);
    275		}
    276	}
    277	return 0;
    278}
    279
    280static int profile_prepare_cpu(unsigned int cpu)
    281{
    282	int i, node = cpu_to_mem(cpu);
    283	struct page *page;
    284
    285	per_cpu(cpu_profile_flip, cpu) = 0;
    286
    287	for (i = 0; i < 2; i++) {
    288		if (per_cpu(cpu_profile_hits, cpu)[i])
    289			continue;
    290
    291		page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
    292		if (!page) {
    293			profile_dead_cpu(cpu);
    294			return -ENOMEM;
    295		}
    296		per_cpu(cpu_profile_hits, cpu)[i] = page_address(page);
    297
    298	}
    299	return 0;
    300}
    301
    302static int profile_online_cpu(unsigned int cpu)
    303{
    304	if (cpumask_available(prof_cpu_mask))
    305		cpumask_set_cpu(cpu, prof_cpu_mask);
    306
    307	return 0;
    308}
    309
    310#else /* !CONFIG_SMP */
    311#define profile_flip_buffers()		do { } while (0)
    312#define profile_discard_flip_buffers()	do { } while (0)
    313
    314static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
    315{
    316	unsigned long pc;
    317	pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
    318	atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
    319}
    320#endif /* !CONFIG_SMP */
    321
    322void profile_hits(int type, void *__pc, unsigned int nr_hits)
    323{
    324	if (prof_on != type || !prof_buffer)
    325		return;
    326	do_profile_hits(type, __pc, nr_hits);
    327}
    328EXPORT_SYMBOL_GPL(profile_hits);
    329
    330void profile_tick(int type)
    331{
    332	struct pt_regs *regs = get_irq_regs();
    333
    334	if (!user_mode(regs) && cpumask_available(prof_cpu_mask) &&
    335	    cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
    336		profile_hit(type, (void *)profile_pc(regs));
    337}
    338
    339#ifdef CONFIG_PROC_FS
    340#include <linux/proc_fs.h>
    341#include <linux/seq_file.h>
    342#include <linux/uaccess.h>
    343
    344static int prof_cpu_mask_proc_show(struct seq_file *m, void *v)
    345{
    346	seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask));
    347	return 0;
    348}
    349
    350static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file)
    351{
    352	return single_open(file, prof_cpu_mask_proc_show, NULL);
    353}
    354
    355static ssize_t prof_cpu_mask_proc_write(struct file *file,
    356	const char __user *buffer, size_t count, loff_t *pos)
    357{
    358	cpumask_var_t new_value;
    359	int err;
    360
    361	if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
    362		return -ENOMEM;
    363
    364	err = cpumask_parse_user(buffer, count, new_value);
    365	if (!err) {
    366		cpumask_copy(prof_cpu_mask, new_value);
    367		err = count;
    368	}
    369	free_cpumask_var(new_value);
    370	return err;
    371}
    372
    373static const struct proc_ops prof_cpu_mask_proc_ops = {
    374	.proc_open	= prof_cpu_mask_proc_open,
    375	.proc_read	= seq_read,
    376	.proc_lseek	= seq_lseek,
    377	.proc_release	= single_release,
    378	.proc_write	= prof_cpu_mask_proc_write,
    379};
    380
    381void create_prof_cpu_mask(void)
    382{
    383	/* create /proc/irq/prof_cpu_mask */
    384	proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_ops);
    385}
    386
    387/*
    388 * This function accesses profiling information. The returned data is
    389 * binary: the sampling step and the actual contents of the profile
    390 * buffer. Use of the program readprofile is recommended in order to
    391 * get meaningful info out of these data.
    392 */
    393static ssize_t
    394read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
    395{
    396	unsigned long p = *ppos;
    397	ssize_t read;
    398	char *pnt;
    399	unsigned long sample_step = 1UL << prof_shift;
    400
    401	profile_flip_buffers();
    402	if (p >= (prof_len+1)*sizeof(unsigned int))
    403		return 0;
    404	if (count > (prof_len+1)*sizeof(unsigned int) - p)
    405		count = (prof_len+1)*sizeof(unsigned int) - p;
    406	read = 0;
    407
    408	while (p < sizeof(unsigned int) && count > 0) {
    409		if (put_user(*((char *)(&sample_step)+p), buf))
    410			return -EFAULT;
    411		buf++; p++; count--; read++;
    412	}
    413	pnt = (char *)prof_buffer + p - sizeof(atomic_t);
    414	if (copy_to_user(buf, (void *)pnt, count))
    415		return -EFAULT;
    416	read += count;
    417	*ppos += read;
    418	return read;
    419}
    420
    421/*
    422 * Writing to /proc/profile resets the counters
    423 *
    424 * Writing a 'profiling multiplier' value into it also re-sets the profiling
    425 * interrupt frequency, on architectures that support this.
    426 */
    427static ssize_t write_profile(struct file *file, const char __user *buf,
    428			     size_t count, loff_t *ppos)
    429{
    430#ifdef CONFIG_SMP
    431	extern int setup_profiling_timer(unsigned int multiplier);
    432
    433	if (count == sizeof(int)) {
    434		unsigned int multiplier;
    435
    436		if (copy_from_user(&multiplier, buf, sizeof(int)))
    437			return -EFAULT;
    438
    439		if (setup_profiling_timer(multiplier))
    440			return -EINVAL;
    441	}
    442#endif
    443	profile_discard_flip_buffers();
    444	memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
    445	return count;
    446}
    447
    448static const struct proc_ops profile_proc_ops = {
    449	.proc_read	= read_profile,
    450	.proc_write	= write_profile,
    451	.proc_lseek	= default_llseek,
    452};
    453
    454int __ref create_proc_profile(void)
    455{
    456	struct proc_dir_entry *entry;
    457#ifdef CONFIG_SMP
    458	enum cpuhp_state online_state;
    459#endif
    460
    461	int err = 0;
    462
    463	if (!prof_on)
    464		return 0;
    465#ifdef CONFIG_SMP
    466	err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE",
    467				profile_prepare_cpu, profile_dead_cpu);
    468	if (err)
    469		return err;
    470
    471	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PROFILE_ONLINE",
    472				profile_online_cpu, NULL);
    473	if (err < 0)
    474		goto err_state_prep;
    475	online_state = err;
    476	err = 0;
    477#endif
    478	entry = proc_create("profile", S_IWUSR | S_IRUGO,
    479			    NULL, &profile_proc_ops);
    480	if (!entry)
    481		goto err_state_onl;
    482	proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t));
    483
    484	return err;
    485err_state_onl:
    486#ifdef CONFIG_SMP
    487	cpuhp_remove_state(online_state);
    488err_state_prep:
    489	cpuhp_remove_state(CPUHP_PROFILE_PREPARE);
    490#endif
    491	return err;
    492}
    493subsys_initcall(create_proc_profile);
    494#endif /* CONFIG_PROC_FS */