cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

subcore.c (11071B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
      4 */
      5
      6#define pr_fmt(fmt)	"powernv: " fmt
      7
      8#include <linux/kernel.h>
      9#include <linux/cpu.h>
     10#include <linux/cpumask.h>
     11#include <linux/device.h>
     12#include <linux/gfp.h>
     13#include <linux/smp.h>
     14#include <linux/stop_machine.h>
     15
     16#include <asm/cputhreads.h>
     17#include <asm/cpuidle.h>
     18#include <asm/kvm_ppc.h>
     19#include <asm/machdep.h>
     20#include <asm/opal.h>
     21#include <asm/smp.h>
     22
     23#include "subcore.h"
     24#include "powernv.h"
     25
     26
     27/*
     28 * Split/unsplit procedure:
     29 *
     30 * A core can be in one of three states, unsplit, 2-way split, and 4-way split.
     31 *
     32 * The mapping to subcores_per_core is simple:
     33 *
     34 *  State       | subcores_per_core
     35 *  ------------|------------------
     36 *  Unsplit     |        1
     37 *  2-way split |        2
     38 *  4-way split |        4
     39 *
     40 * The core is split along thread boundaries, the mapping between subcores and
     41 * threads is as follows:
     42 *
     43 *  Unsplit:
     44 *          ----------------------------
     45 *  Subcore |            0             |
     46 *          ----------------------------
     47 *  Thread  |  0  1  2  3  4  5  6  7  |
     48 *          ----------------------------
     49 *
     50 *  2-way split:
     51 *          -------------------------------------
     52 *  Subcore |        0        |        1        |
     53 *          -------------------------------------
     54 *  Thread  |  0   1   2   3  |  4   5   6   7  |
     55 *          -------------------------------------
     56 *
     57 *  4-way split:
     58 *          -----------------------------------------
     59 *  Subcore |    0    |    1    |    2    |    3    |
     60 *          -----------------------------------------
     61 *  Thread  |  0   1  |  2   3  |  4   5  |  6   7  |
     62 *          -----------------------------------------
     63 *
     64 *
     65 * Transitions
     66 * -----------
     67 *
     68 * It is not possible to transition between either of the split states, the
     69 * core must first be unsplit. The legal transitions are:
     70 *
     71 *  -----------          ---------------
     72 *  |         |  <---->  | 2-way split |
     73 *  |         |          ---------------
     74 *  | Unsplit |
     75 *  |         |          ---------------
     76 *  |         |  <---->  | 4-way split |
     77 *  -----------          ---------------
     78 *
     79 * Unsplitting
     80 * -----------
     81 *
     82 * Unsplitting is the simpler procedure. It requires thread 0 to request the
     83 * unsplit while all other threads NAP.
     84 *
     85 * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells
     86 * the hardware that if all threads except 0 are napping, the hardware should
     87 * unsplit the core.
     88 *
     89 * Non-zero threads are sent to a NAP loop, they don't exit the loop until they
     90 * see the core unsplit.
     91 *
     92 * Core 0 spins waiting for the hardware to see all the other threads napping
     93 * and perform the unsplit.
     94 *
     95 * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them
     96 * out of NAP. They will then see the core unsplit and exit the NAP loop.
     97 *
     98 * Splitting
     99 * ---------
    100 *
    101 * The basic splitting procedure is fairly straight forward. However it is
    102 * complicated by the fact that after the split occurs, the newly created
    103 * subcores are not in a fully initialised state.
    104 *
    105 * Most notably the subcores do not have the correct value for SDR1, which
    106 * means they must not be running in virtual mode when the split occurs. The
    107 * subcores have separate timebases SPRs but these are pre-synchronised by
    108 * opal.
    109 *
    110 * To begin with secondary threads are sent to an assembly routine. There they
    111 * switch to real mode, so they are immune to the uninitialised SDR1 value.
    112 * Once in real mode they indicate that they are in real mode, and spin waiting
    113 * to see the core split.
    114 *
    115 * Thread 0 waits to see that all secondaries are in real mode, and then begins
    116 * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which
    117 * prevents the hardware from unsplitting. Then it sets the appropriate HID bit
    118 * to request the split, and spins waiting to see that the split has happened.
    119 *
    120 * Concurrently the secondaries will notice the split. When they do they set up
    121 * their SPRs, notably SDR1, and then they can return to virtual mode and exit
    122 * the procedure.
    123 */
    124
    125/* Initialised at boot by subcore_init() */
    126static int subcores_per_core;
    127
    128/*
    129 * Used to communicate to offline cpus that we want them to pop out of the
    130 * offline loop and do a split or unsplit.
    131 *
    132 * 0 - no split happening
    133 * 1 - unsplit in progress
    134 * 2 - split to 2 in progress
    135 * 4 - split to 4 in progress
    136 */
    137static int new_split_mode;
    138
    139static cpumask_var_t cpu_offline_mask;
    140
    141struct split_state {
    142	u8 step;
    143	u8 master;
    144};
    145
    146static DEFINE_PER_CPU(struct split_state, split_state);
    147
    148static void wait_for_sync_step(int step)
    149{
    150	int i, cpu = smp_processor_id();
    151
    152	for (i = cpu + 1; i < cpu + threads_per_core; i++)
    153		while(per_cpu(split_state, i).step < step)
    154			barrier();
    155
    156	/* Order the wait loop vs any subsequent loads/stores. */
    157	mb();
    158}
    159
    160static void update_hid_in_slw(u64 hid0)
    161{
    162	u64 idle_states = pnv_get_supported_cpuidle_states();
    163
    164	if (idle_states & OPAL_PM_WINKLE_ENABLED) {
    165		/* OPAL call to patch slw with the new HID0 value */
    166		u64 cpu_pir = hard_smp_processor_id();
    167
    168		opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
    169	}
    170}
    171
    172static inline void update_power8_hid0(unsigned long hid0)
    173{
    174	/*
    175	 *  The HID0 update on Power8 should at the very least be
    176	 *  preceded by a SYNC instruction followed by an ISYNC
    177	 *  instruction
    178	 */
    179	asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
    180}
    181
    182static void unsplit_core(void)
    183{
    184	u64 hid0, mask;
    185	int i, cpu;
    186
    187	mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
    188
    189	cpu = smp_processor_id();
    190	if (cpu_thread_in_core(cpu) != 0) {
    191		while (mfspr(SPRN_HID0) & mask)
    192			power7_idle_type(PNV_THREAD_NAP);
    193
    194		per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
    195		return;
    196	}
    197
    198	hid0 = mfspr(SPRN_HID0);
    199	hid0 &= ~HID0_POWER8_DYNLPARDIS;
    200	update_power8_hid0(hid0);
    201	update_hid_in_slw(hid0);
    202
    203	while (mfspr(SPRN_HID0) & mask)
    204		cpu_relax();
    205
    206	/* Wake secondaries out of NAP */
    207	for (i = cpu + 1; i < cpu + threads_per_core; i++)
    208		smp_send_reschedule(i);
    209
    210	wait_for_sync_step(SYNC_STEP_UNSPLIT);
    211}
    212
    213static void split_core(int new_mode)
    214{
    215	struct {  u64 value; u64 mask; } split_parms[2] = {
    216		{ HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE },
    217		{ HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE }
    218	};
    219	int i, cpu;
    220	u64 hid0;
    221
    222	/* Convert new_mode (2 or 4) into an index into our parms array */
    223	i = (new_mode >> 1) - 1;
    224	BUG_ON(i < 0 || i > 1);
    225
    226	cpu = smp_processor_id();
    227	if (cpu_thread_in_core(cpu) != 0) {
    228		split_core_secondary_loop(&per_cpu(split_state, cpu).step);
    229		return;
    230	}
    231
    232	wait_for_sync_step(SYNC_STEP_REAL_MODE);
    233
    234	/* Write new mode */
    235	hid0  = mfspr(SPRN_HID0);
    236	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
    237	update_power8_hid0(hid0);
    238	update_hid_in_slw(hid0);
    239
    240	/* Wait for it to happen */
    241	while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
    242		cpu_relax();
    243}
    244
    245static void cpu_do_split(int new_mode)
    246{
    247	/*
    248	 * At boot subcores_per_core will be 0, so we will always unsplit at
    249	 * boot. In the usual case where the core is already unsplit it's a
    250	 * nop, and this just ensures the kernel's notion of the mode is
    251	 * consistent with the hardware.
    252	 */
    253	if (subcores_per_core != 1)
    254		unsplit_core();
    255
    256	if (new_mode != 1)
    257		split_core(new_mode);
    258
    259	mb();
    260	per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED;
    261}
    262
    263bool cpu_core_split_required(void)
    264{
    265	smp_rmb();
    266
    267	if (!new_split_mode)
    268		return false;
    269
    270	cpu_do_split(new_split_mode);
    271
    272	return true;
    273}
    274
    275void update_subcore_sibling_mask(void)
    276{
    277	int cpu;
    278	/*
    279	 * sibling mask for the first cpu. Left shift this by required bits
    280	 * to get sibling mask for the rest of the cpus.
    281	 */
    282	int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
    283
    284	for_each_possible_cpu(cpu) {
    285		int tid = cpu_thread_in_core(cpu);
    286		int offset = (tid / threads_per_subcore) * threads_per_subcore;
    287		int mask = sibling_mask_first_cpu << offset;
    288
    289		paca_ptrs[cpu]->subcore_sibling_mask = mask;
    290
    291	}
    292}
    293
    294static int cpu_update_split_mode(void *data)
    295{
    296	int cpu, new_mode = *(int *)data;
    297
    298	if (this_cpu_ptr(&split_state)->master) {
    299		new_split_mode = new_mode;
    300		smp_wmb();
    301
    302		cpumask_andnot(cpu_offline_mask, cpu_present_mask,
    303			       cpu_online_mask);
    304
    305		/* This should work even though the cpu is offline */
    306		for_each_cpu(cpu, cpu_offline_mask)
    307			smp_send_reschedule(cpu);
    308	}
    309
    310	cpu_do_split(new_mode);
    311
    312	if (this_cpu_ptr(&split_state)->master) {
    313		/* Wait for all cpus to finish before we touch subcores_per_core */
    314		for_each_present_cpu(cpu) {
    315			if (cpu >= setup_max_cpus)
    316				break;
    317
    318			while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED)
    319				barrier();
    320		}
    321
    322		new_split_mode = 0;
    323
    324		/* Make the new mode public */
    325		subcores_per_core = new_mode;
    326		threads_per_subcore = threads_per_core / subcores_per_core;
    327		update_subcore_sibling_mask();
    328
    329		/* Make sure the new mode is written before we exit */
    330		mb();
    331	}
    332
    333	return 0;
    334}
    335
    336static int set_subcores_per_core(int new_mode)
    337{
    338	struct split_state *state;
    339	int cpu;
    340
    341	if (kvm_hv_mode_active()) {
    342		pr_err("Unable to change split core mode while KVM active.\n");
    343		return -EBUSY;
    344	}
    345
    346	/*
    347	 * We are only called at boot, or from the sysfs write. If that ever
    348	 * changes we'll need a lock here.
    349	 */
    350	BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3);
    351
    352	for_each_present_cpu(cpu) {
    353		state = &per_cpu(split_state, cpu);
    354		state->step = SYNC_STEP_INITIAL;
    355		state->master = 0;
    356	}
    357
    358	cpus_read_lock();
    359
    360	/* This cpu will update the globals before exiting stop machine */
    361	this_cpu_ptr(&split_state)->master = 1;
    362
    363	/* Ensure state is consistent before we call the other cpus */
    364	mb();
    365
    366	stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
    367				cpu_online_mask);
    368
    369	cpus_read_unlock();
    370
    371	return 0;
    372}
    373
    374static ssize_t __used store_subcores_per_core(struct device *dev,
    375		struct device_attribute *attr, const char *buf,
    376		size_t count)
    377{
    378	unsigned long val;
    379	int rc;
    380
    381	/* We are serialised by the attribute lock */
    382
    383	rc = sscanf(buf, "%lx", &val);
    384	if (rc != 1)
    385		return -EINVAL;
    386
    387	switch (val) {
    388	case 1:
    389	case 2:
    390	case 4:
    391		if (subcores_per_core == val)
    392			/* Nothing to do */
    393			goto out;
    394		break;
    395	default:
    396		return -EINVAL;
    397	}
    398
    399	rc = set_subcores_per_core(val);
    400	if (rc)
    401		return rc;
    402
    403out:
    404	return count;
    405}
    406
    407static ssize_t show_subcores_per_core(struct device *dev,
    408		struct device_attribute *attr, char *buf)
    409{
    410	return sprintf(buf, "%x\n", subcores_per_core);
    411}
    412
    413static DEVICE_ATTR(subcores_per_core, 0644,
    414		show_subcores_per_core, store_subcores_per_core);
    415
    416static int subcore_init(void)
    417{
    418	unsigned pvr_ver;
    419
    420	pvr_ver = PVR_VER(mfspr(SPRN_PVR));
    421
    422	if (pvr_ver != PVR_POWER8 &&
    423	    pvr_ver != PVR_POWER8E &&
    424	    pvr_ver != PVR_POWER8NVL)
    425		return 0;
    426
    427	/*
    428	 * We need all threads in a core to be present to split/unsplit so
    429         * continue only if max_cpus are aligned to threads_per_core.
    430	 */
    431	if (setup_max_cpus % threads_per_core)
    432		return 0;
    433
    434	BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL));
    435
    436	set_subcores_per_core(1);
    437
    438	return device_create_file(cpu_subsys.dev_root,
    439				  &dev_attr_subcores_per_core);
    440}
    441machine_device_initcall(powernv, subcore_init);