cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

acpi_pad.c (12127B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * acpi_pad.c ACPI Processor Aggregator Driver
      4 *
      5 * Copyright (c) 2009, Intel Corporation.
      6 */
      7
      8#include <linux/kernel.h>
      9#include <linux/cpumask.h>
     10#include <linux/module.h>
     11#include <linux/init.h>
     12#include <linux/types.h>
     13#include <linux/kthread.h>
     14#include <uapi/linux/sched/types.h>
     15#include <linux/freezer.h>
     16#include <linux/cpu.h>
     17#include <linux/tick.h>
     18#include <linux/slab.h>
     19#include <linux/acpi.h>
     20#include <linux/perf_event.h>
     21#include <asm/mwait.h>
     22#include <xen/xen.h>
     23
     24#define ACPI_PROCESSOR_AGGREGATOR_CLASS	"acpi_pad"
     25#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
     26#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80
     27static DEFINE_MUTEX(isolated_cpus_lock);
     28static DEFINE_MUTEX(round_robin_lock);
     29
     30static unsigned long power_saving_mwait_eax;
     31
     32static unsigned char tsc_detected_unstable;
     33static unsigned char tsc_marked_unstable;
     34
     35static void power_saving_mwait_init(void)
     36{
     37	unsigned int eax, ebx, ecx, edx;
     38	unsigned int highest_cstate = 0;
     39	unsigned int highest_subcstate = 0;
     40	int i;
     41
     42	if (!boot_cpu_has(X86_FEATURE_MWAIT))
     43		return;
     44	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
     45		return;
     46
     47	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
     48
     49	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
     50	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
     51		return;
     52
     53	edx >>= MWAIT_SUBSTATE_SIZE;
     54	for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
     55		if (edx & MWAIT_SUBSTATE_MASK) {
     56			highest_cstate = i;
     57			highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
     58		}
     59	}
     60	power_saving_mwait_eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
     61		(highest_subcstate - 1);
     62
     63#if defined(CONFIG_X86)
     64	switch (boot_cpu_data.x86_vendor) {
     65	case X86_VENDOR_HYGON:
     66	case X86_VENDOR_AMD:
     67	case X86_VENDOR_INTEL:
     68	case X86_VENDOR_ZHAOXIN:
     69		/*
     70		 * AMD Fam10h TSC will tick in all
     71		 * C/P/S0/S1 states when this bit is set.
     72		 */
     73		if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
     74			tsc_detected_unstable = 1;
     75		break;
     76	default:
     77		/* TSC could halt in idle */
     78		tsc_detected_unstable = 1;
     79	}
     80#endif
     81}
     82
     83static unsigned long cpu_weight[NR_CPUS];
     84static int tsk_in_cpu[NR_CPUS] = {[0 ... NR_CPUS-1] = -1};
     85static DECLARE_BITMAP(pad_busy_cpus_bits, NR_CPUS);
     86static void round_robin_cpu(unsigned int tsk_index)
     87{
     88	struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits);
     89	cpumask_var_t tmp;
     90	int cpu;
     91	unsigned long min_weight = -1;
     92	unsigned long preferred_cpu;
     93
     94	if (!alloc_cpumask_var(&tmp, GFP_KERNEL))
     95		return;
     96
     97	mutex_lock(&round_robin_lock);
     98	cpumask_clear(tmp);
     99	for_each_cpu(cpu, pad_busy_cpus)
    100		cpumask_or(tmp, tmp, topology_sibling_cpumask(cpu));
    101	cpumask_andnot(tmp, cpu_online_mask, tmp);
    102	/* avoid HT sibilings if possible */
    103	if (cpumask_empty(tmp))
    104		cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
    105	if (cpumask_empty(tmp)) {
    106		mutex_unlock(&round_robin_lock);
    107		free_cpumask_var(tmp);
    108		return;
    109	}
    110	for_each_cpu(cpu, tmp) {
    111		if (cpu_weight[cpu] < min_weight) {
    112			min_weight = cpu_weight[cpu];
    113			preferred_cpu = cpu;
    114		}
    115	}
    116
    117	if (tsk_in_cpu[tsk_index] != -1)
    118		cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
    119	tsk_in_cpu[tsk_index] = preferred_cpu;
    120	cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
    121	cpu_weight[preferred_cpu]++;
    122	mutex_unlock(&round_robin_lock);
    123
    124	set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
    125
    126	free_cpumask_var(tmp);
    127}
    128
    129static void exit_round_robin(unsigned int tsk_index)
    130{
    131	struct cpumask *pad_busy_cpus = to_cpumask(pad_busy_cpus_bits);
    132
    133	cpumask_clear_cpu(tsk_in_cpu[tsk_index], pad_busy_cpus);
    134	tsk_in_cpu[tsk_index] = -1;
    135}
    136
    137static unsigned int idle_pct = 5; /* percentage */
    138static unsigned int round_robin_time = 1; /* second */
    139static int power_saving_thread(void *data)
    140{
    141	int do_sleep;
    142	unsigned int tsk_index = (unsigned long)data;
    143	u64 last_jiffies = 0;
    144
    145	sched_set_fifo_low(current);
    146
    147	while (!kthread_should_stop()) {
    148		unsigned long expire_time;
    149
    150		/* round robin to cpus */
    151		expire_time = last_jiffies + round_robin_time * HZ;
    152		if (time_before(expire_time, jiffies)) {
    153			last_jiffies = jiffies;
    154			round_robin_cpu(tsk_index);
    155		}
    156
    157		do_sleep = 0;
    158
    159		expire_time = jiffies + HZ * (100 - idle_pct) / 100;
    160
    161		while (!need_resched()) {
    162			if (tsc_detected_unstable && !tsc_marked_unstable) {
    163				/* TSC could halt in idle, so notify users */
    164				mark_tsc_unstable("TSC halts in idle");
    165				tsc_marked_unstable = 1;
    166			}
    167			local_irq_disable();
    168
    169			perf_lopwr_cb(true);
    170
    171			tick_broadcast_enable();
    172			tick_broadcast_enter();
    173			stop_critical_timings();
    174
    175			mwait_idle_with_hints(power_saving_mwait_eax, 1);
    176
    177			start_critical_timings();
    178			tick_broadcast_exit();
    179
    180			perf_lopwr_cb(false);
    181
    182			local_irq_enable();
    183
    184			if (time_before(expire_time, jiffies)) {
    185				do_sleep = 1;
    186				break;
    187			}
    188		}
    189
    190		/*
    191		 * current sched_rt has threshold for rt task running time.
    192		 * When a rt task uses 95% CPU time, the rt thread will be
    193		 * scheduled out for 5% CPU time to not starve other tasks. But
    194		 * the mechanism only works when all CPUs have RT task running,
    195		 * as if one CPU hasn't RT task, RT task from other CPUs will
    196		 * borrow CPU time from this CPU and cause RT task use > 95%
    197		 * CPU time. To make 'avoid starvation' work, takes a nap here.
    198		 */
    199		if (unlikely(do_sleep))
    200			schedule_timeout_killable(HZ * idle_pct / 100);
    201
    202		/* If an external event has set the need_resched flag, then
    203		 * we need to deal with it, or this loop will continue to
    204		 * spin without calling __mwait().
    205		 */
    206		if (unlikely(need_resched()))
    207			schedule();
    208	}
    209
    210	exit_round_robin(tsk_index);
    211	return 0;
    212}
    213
    214static struct task_struct *ps_tsks[NR_CPUS];
    215static unsigned int ps_tsk_num;
    216static int create_power_saving_task(void)
    217{
    218	int rc;
    219
    220	ps_tsks[ps_tsk_num] = kthread_run(power_saving_thread,
    221		(void *)(unsigned long)ps_tsk_num,
    222		"acpi_pad/%d", ps_tsk_num);
    223
    224	if (IS_ERR(ps_tsks[ps_tsk_num])) {
    225		rc = PTR_ERR(ps_tsks[ps_tsk_num]);
    226		ps_tsks[ps_tsk_num] = NULL;
    227	} else {
    228		rc = 0;
    229		ps_tsk_num++;
    230	}
    231
    232	return rc;
    233}
    234
    235static void destroy_power_saving_task(void)
    236{
    237	if (ps_tsk_num > 0) {
    238		ps_tsk_num--;
    239		kthread_stop(ps_tsks[ps_tsk_num]);
    240		ps_tsks[ps_tsk_num] = NULL;
    241	}
    242}
    243
    244static void set_power_saving_task_num(unsigned int num)
    245{
    246	if (num > ps_tsk_num) {
    247		while (ps_tsk_num < num) {
    248			if (create_power_saving_task())
    249				return;
    250		}
    251	} else if (num < ps_tsk_num) {
    252		while (ps_tsk_num > num)
    253			destroy_power_saving_task();
    254	}
    255}
    256
    257static void acpi_pad_idle_cpus(unsigned int num_cpus)
    258{
    259	cpus_read_lock();
    260
    261	num_cpus = min_t(unsigned int, num_cpus, num_online_cpus());
    262	set_power_saving_task_num(num_cpus);
    263
    264	cpus_read_unlock();
    265}
    266
    267static uint32_t acpi_pad_idle_cpus_num(void)
    268{
    269	return ps_tsk_num;
    270}
    271
    272static ssize_t rrtime_store(struct device *dev,
    273	struct device_attribute *attr, const char *buf, size_t count)
    274{
    275	unsigned long num;
    276
    277	if (kstrtoul(buf, 0, &num))
    278		return -EINVAL;
    279	if (num < 1 || num >= 100)
    280		return -EINVAL;
    281	mutex_lock(&isolated_cpus_lock);
    282	round_robin_time = num;
    283	mutex_unlock(&isolated_cpus_lock);
    284	return count;
    285}
    286
    287static ssize_t rrtime_show(struct device *dev,
    288	struct device_attribute *attr, char *buf)
    289{
    290	return scnprintf(buf, PAGE_SIZE, "%d\n", round_robin_time);
    291}
    292static DEVICE_ATTR_RW(rrtime);
    293
    294static ssize_t idlepct_store(struct device *dev,
    295	struct device_attribute *attr, const char *buf, size_t count)
    296{
    297	unsigned long num;
    298
    299	if (kstrtoul(buf, 0, &num))
    300		return -EINVAL;
    301	if (num < 1 || num >= 100)
    302		return -EINVAL;
    303	mutex_lock(&isolated_cpus_lock);
    304	idle_pct = num;
    305	mutex_unlock(&isolated_cpus_lock);
    306	return count;
    307}
    308
    309static ssize_t idlepct_show(struct device *dev,
    310	struct device_attribute *attr, char *buf)
    311{
    312	return scnprintf(buf, PAGE_SIZE, "%d\n", idle_pct);
    313}
    314static DEVICE_ATTR_RW(idlepct);
    315
    316static ssize_t idlecpus_store(struct device *dev,
    317	struct device_attribute *attr, const char *buf, size_t count)
    318{
    319	unsigned long num;
    320
    321	if (kstrtoul(buf, 0, &num))
    322		return -EINVAL;
    323	mutex_lock(&isolated_cpus_lock);
    324	acpi_pad_idle_cpus(num);
    325	mutex_unlock(&isolated_cpus_lock);
    326	return count;
    327}
    328
    329static ssize_t idlecpus_show(struct device *dev,
    330	struct device_attribute *attr, char *buf)
    331{
    332	return cpumap_print_to_pagebuf(false, buf,
    333				       to_cpumask(pad_busy_cpus_bits));
    334}
    335
    336static DEVICE_ATTR_RW(idlecpus);
    337
    338static int acpi_pad_add_sysfs(struct acpi_device *device)
    339{
    340	int result;
    341
    342	result = device_create_file(&device->dev, &dev_attr_idlecpus);
    343	if (result)
    344		return -ENODEV;
    345	result = device_create_file(&device->dev, &dev_attr_idlepct);
    346	if (result) {
    347		device_remove_file(&device->dev, &dev_attr_idlecpus);
    348		return -ENODEV;
    349	}
    350	result = device_create_file(&device->dev, &dev_attr_rrtime);
    351	if (result) {
    352		device_remove_file(&device->dev, &dev_attr_idlecpus);
    353		device_remove_file(&device->dev, &dev_attr_idlepct);
    354		return -ENODEV;
    355	}
    356	return 0;
    357}
    358
    359static void acpi_pad_remove_sysfs(struct acpi_device *device)
    360{
    361	device_remove_file(&device->dev, &dev_attr_idlecpus);
    362	device_remove_file(&device->dev, &dev_attr_idlepct);
    363	device_remove_file(&device->dev, &dev_attr_rrtime);
    364}
    365
    366/*
    367 * Query firmware how many CPUs should be idle
    368 * return -1 on failure
    369 */
    370static int acpi_pad_pur(acpi_handle handle)
    371{
    372	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
    373	union acpi_object *package;
    374	int num = -1;
    375
    376	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PUR", NULL, &buffer)))
    377		return num;
    378
    379	if (!buffer.length || !buffer.pointer)
    380		return num;
    381
    382	package = buffer.pointer;
    383
    384	if (package->type == ACPI_TYPE_PACKAGE &&
    385		package->package.count == 2 &&
    386		package->package.elements[0].integer.value == 1) /* rev 1 */
    387
    388		num = package->package.elements[1].integer.value;
    389
    390	kfree(buffer.pointer);
    391	return num;
    392}
    393
    394static void acpi_pad_handle_notify(acpi_handle handle)
    395{
    396	int num_cpus;
    397	uint32_t idle_cpus;
    398	struct acpi_buffer param = {
    399		.length = 4,
    400		.pointer = (void *)&idle_cpus,
    401	};
    402
    403	mutex_lock(&isolated_cpus_lock);
    404	num_cpus = acpi_pad_pur(handle);
    405	if (num_cpus < 0) {
    406		mutex_unlock(&isolated_cpus_lock);
    407		return;
    408	}
    409	acpi_pad_idle_cpus(num_cpus);
    410	idle_cpus = acpi_pad_idle_cpus_num();
    411	acpi_evaluate_ost(handle, ACPI_PROCESSOR_AGGREGATOR_NOTIFY, 0, &param);
    412	mutex_unlock(&isolated_cpus_lock);
    413}
    414
    415static void acpi_pad_notify(acpi_handle handle, u32 event,
    416	void *data)
    417{
    418	struct acpi_device *device = data;
    419
    420	switch (event) {
    421	case ACPI_PROCESSOR_AGGREGATOR_NOTIFY:
    422		acpi_pad_handle_notify(handle);
    423		acpi_bus_generate_netlink_event(device->pnp.device_class,
    424			dev_name(&device->dev), event, 0);
    425		break;
    426	default:
    427		pr_warn("Unsupported event [0x%x]\n", event);
    428		break;
    429	}
    430}
    431
    432static int acpi_pad_add(struct acpi_device *device)
    433{
    434	acpi_status status;
    435
    436	strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME);
    437	strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS);
    438
    439	if (acpi_pad_add_sysfs(device))
    440		return -ENODEV;
    441
    442	status = acpi_install_notify_handler(device->handle,
    443		ACPI_DEVICE_NOTIFY, acpi_pad_notify, device);
    444	if (ACPI_FAILURE(status)) {
    445		acpi_pad_remove_sysfs(device);
    446		return -ENODEV;
    447	}
    448
    449	return 0;
    450}
    451
    452static int acpi_pad_remove(struct acpi_device *device)
    453{
    454	mutex_lock(&isolated_cpus_lock);
    455	acpi_pad_idle_cpus(0);
    456	mutex_unlock(&isolated_cpus_lock);
    457
    458	acpi_remove_notify_handler(device->handle,
    459		ACPI_DEVICE_NOTIFY, acpi_pad_notify);
    460	acpi_pad_remove_sysfs(device);
    461	return 0;
    462}
    463
    464static const struct acpi_device_id pad_device_ids[] = {
    465	{"ACPI000C", 0},
    466	{"", 0},
    467};
    468MODULE_DEVICE_TABLE(acpi, pad_device_ids);
    469
    470static struct acpi_driver acpi_pad_driver = {
    471	.name = "processor_aggregator",
    472	.class = ACPI_PROCESSOR_AGGREGATOR_CLASS,
    473	.ids = pad_device_ids,
    474	.ops = {
    475		.add = acpi_pad_add,
    476		.remove = acpi_pad_remove,
    477	},
    478};
    479
    480static int __init acpi_pad_init(void)
    481{
    482	/* Xen ACPI PAD is used when running as Xen Dom0. */
    483	if (xen_initial_domain())
    484		return -ENODEV;
    485
    486	power_saving_mwait_init();
    487	if (power_saving_mwait_eax == 0)
    488		return -EINVAL;
    489
    490	return acpi_bus_register_driver(&acpi_pad_driver);
    491}
    492
    493static void __exit acpi_pad_exit(void)
    494{
    495	acpi_bus_unregister_driver(&acpi_pad_driver);
    496}
    497
    498module_init(acpi_pad_init);
    499module_exit(acpi_pad_exit);
    500MODULE_AUTHOR("Shaohua Li<shaohua.li@intel.com>");
    501MODULE_DESCRIPTION("ACPI Processor Aggregator Driver");
    502MODULE_LICENSE("GPL");