cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

smp.c (33563B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *  SMP related functions
      4 *
      5 *    Copyright IBM Corp. 1999, 2012
      6 *    Author(s): Denis Joseph Barrow,
      7 *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
      8 *
      9 *  based on other smp stuff by
     10 *    (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>
     11 *    (c) 1998 Ingo Molnar
     12 *
     13 * The code outside of smp.c uses logical cpu numbers, only smp.c does
     14 * the translation of logical to physical cpu ids. All new code that
     15 * operates on physical cpu numbers needs to go into smp.c.
     16 */
     17
     18#define KMSG_COMPONENT "cpu"
     19#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
     20
     21#include <linux/workqueue.h>
     22#include <linux/memblock.h>
     23#include <linux/export.h>
     24#include <linux/init.h>
     25#include <linux/mm.h>
     26#include <linux/err.h>
     27#include <linux/spinlock.h>
     28#include <linux/kernel_stat.h>
     29#include <linux/delay.h>
     30#include <linux/interrupt.h>
     31#include <linux/irqflags.h>
     32#include <linux/irq_work.h>
     33#include <linux/cpu.h>
     34#include <linux/slab.h>
     35#include <linux/sched/hotplug.h>
     36#include <linux/sched/task_stack.h>
     37#include <linux/crash_dump.h>
     38#include <linux/kprobes.h>
     39#include <asm/asm-offsets.h>
     40#include <asm/diag.h>
     41#include <asm/switch_to.h>
     42#include <asm/facility.h>
     43#include <asm/ipl.h>
     44#include <asm/setup.h>
     45#include <asm/irq.h>
     46#include <asm/tlbflush.h>
     47#include <asm/vtimer.h>
     48#include <asm/lowcore.h>
     49#include <asm/sclp.h>
     50#include <asm/debug.h>
     51#include <asm/os_info.h>
     52#include <asm/sigp.h>
     53#include <asm/idle.h>
     54#include <asm/nmi.h>
     55#include <asm/stacktrace.h>
     56#include <asm/topology.h>
     57#include <asm/vdso.h>
     58#include "entry.h"
     59
     60enum {
     61	ec_schedule = 0,
     62	ec_call_function_single,
     63	ec_stop_cpu,
     64	ec_mcck_pending,
     65	ec_irq_work,
     66};
     67
     68enum {
     69	CPU_STATE_STANDBY,
     70	CPU_STATE_CONFIGURED,
     71};
     72
     73static DEFINE_PER_CPU(struct cpu *, cpu_device);
     74
     75struct pcpu {
     76	unsigned long ec_mask;		/* bit mask for ec_xxx functions */
     77	unsigned long ec_clk;		/* sigp timestamp for ec_xxx */
     78	signed char state;		/* physical cpu state */
     79	signed char polarization;	/* physical polarization */
     80	u16 address;			/* physical cpu address */
     81};
     82
     83static u8 boot_core_type;
     84static struct pcpu pcpu_devices[NR_CPUS];
     85
     86unsigned int smp_cpu_mt_shift;
     87EXPORT_SYMBOL(smp_cpu_mt_shift);
     88
     89unsigned int smp_cpu_mtid;
     90EXPORT_SYMBOL(smp_cpu_mtid);
     91
     92#ifdef CONFIG_CRASH_DUMP
     93__vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
     94#endif
     95
     96static unsigned int smp_max_threads __initdata = -1U;
     97cpumask_t cpu_setup_mask;
     98
     99static int __init early_nosmt(char *s)
    100{
    101	smp_max_threads = 1;
    102	return 0;
    103}
    104early_param("nosmt", early_nosmt);
    105
    106static int __init early_smt(char *s)
    107{
    108	get_option(&s, &smp_max_threads);
    109	return 0;
    110}
    111early_param("smt", early_smt);
    112
    113/*
    114 * The smp_cpu_state_mutex must be held when changing the state or polarization
    115 * member of a pcpu data structure within the pcpu_devices arreay.
    116 */
    117DEFINE_MUTEX(smp_cpu_state_mutex);
    118
    119/*
    120 * Signal processor helper functions.
    121 */
    122static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm)
    123{
    124	int cc;
    125
    126	while (1) {
    127		cc = __pcpu_sigp(addr, order, parm, NULL);
    128		if (cc != SIGP_CC_BUSY)
    129			return cc;
    130		cpu_relax();
    131	}
    132}
    133
    134static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
    135{
    136	int cc, retry;
    137
    138	for (retry = 0; ; retry++) {
    139		cc = __pcpu_sigp(pcpu->address, order, parm, NULL);
    140		if (cc != SIGP_CC_BUSY)
    141			break;
    142		if (retry >= 3)
    143			udelay(10);
    144	}
    145	return cc;
    146}
    147
    148static inline int pcpu_stopped(struct pcpu *pcpu)
    149{
    150	u32 status;
    151
    152	if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
    153			0, &status) != SIGP_CC_STATUS_STORED)
    154		return 0;
    155	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
    156}
    157
    158static inline int pcpu_running(struct pcpu *pcpu)
    159{
    160	if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING,
    161			0, NULL) != SIGP_CC_STATUS_STORED)
    162		return 1;
    163	/* Status stored condition code is equivalent to cpu not running. */
    164	return 0;
    165}
    166
    167/*
    168 * Find struct pcpu by cpu address.
    169 */
    170static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
    171{
    172	int cpu;
    173
    174	for_each_cpu(cpu, mask)
    175		if (pcpu_devices[cpu].address == address)
    176			return pcpu_devices + cpu;
    177	return NULL;
    178}
    179
    180static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
    181{
    182	int order;
    183
    184	if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
    185		return;
    186	order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
    187	pcpu->ec_clk = get_tod_clock_fast();
    188	pcpu_sigp_retry(pcpu, order, 0);
    189}
    190
    191static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
    192{
    193	unsigned long async_stack, nodat_stack, mcck_stack;
    194	struct lowcore *lc;
    195
    196	lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
    197	nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
    198	async_stack = stack_alloc();
    199	mcck_stack = stack_alloc();
    200	if (!lc || !nodat_stack || !async_stack || !mcck_stack)
    201		goto out;
    202	memcpy(lc, &S390_lowcore, 512);
    203	memset((char *) lc + 512, 0, sizeof(*lc) - 512);
    204	lc->async_stack = async_stack + STACK_INIT_OFFSET;
    205	lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
    206	lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
    207	lc->cpu_nr = cpu;
    208	lc->spinlock_lockval = arch_spin_lockval(cpu);
    209	lc->spinlock_index = 0;
    210	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
    211	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
    212	lc->preempt_count = PREEMPT_DISABLED;
    213	if (nmi_alloc_mcesa(&lc->mcesad))
    214		goto out;
    215	lowcore_ptr[cpu] = lc;
    216	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
    217	return 0;
    218
    219out:
    220	stack_free(mcck_stack);
    221	stack_free(async_stack);
    222	free_pages(nodat_stack, THREAD_SIZE_ORDER);
    223	free_pages((unsigned long) lc, LC_ORDER);
    224	return -ENOMEM;
    225}
    226
    227static void pcpu_free_lowcore(struct pcpu *pcpu)
    228{
    229	unsigned long async_stack, nodat_stack, mcck_stack;
    230	struct lowcore *lc;
    231	int cpu;
    232
    233	cpu = pcpu - pcpu_devices;
    234	lc = lowcore_ptr[cpu];
    235	nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
    236	async_stack = lc->async_stack - STACK_INIT_OFFSET;
    237	mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
    238	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
    239	lowcore_ptr[cpu] = NULL;
    240	nmi_free_mcesa(&lc->mcesad);
    241	stack_free(async_stack);
    242	stack_free(mcck_stack);
    243	free_pages(nodat_stack, THREAD_SIZE_ORDER);
    244	free_pages((unsigned long) lc, LC_ORDER);
    245}
    246
    247static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
    248{
    249	struct lowcore *lc = lowcore_ptr[cpu];
    250
    251	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
    252	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
    253	lc->cpu_nr = cpu;
    254	lc->restart_flags = RESTART_FLAG_CTLREGS;
    255	lc->spinlock_lockval = arch_spin_lockval(cpu);
    256	lc->spinlock_index = 0;
    257	lc->percpu_offset = __per_cpu_offset[cpu];
    258	lc->kernel_asce = S390_lowcore.kernel_asce;
    259	lc->user_asce = s390_invalid_asce;
    260	lc->machine_flags = S390_lowcore.machine_flags;
    261	lc->user_timer = lc->system_timer =
    262		lc->steal_timer = lc->avg_steal_timer = 0;
    263	__ctl_store(lc->cregs_save_area, 0, 15);
    264	lc->cregs_save_area[1] = lc->kernel_asce;
    265	lc->cregs_save_area[7] = lc->user_asce;
    266	save_access_regs((unsigned int *) lc->access_regs_save_area);
    267	arch_spin_lock_setup(cpu);
    268}
    269
    270static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
    271{
    272	struct lowcore *lc;
    273	int cpu;
    274
    275	cpu = pcpu - pcpu_devices;
    276	lc = lowcore_ptr[cpu];
    277	lc->kernel_stack = (unsigned long) task_stack_page(tsk)
    278		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
    279	lc->current_task = (unsigned long) tsk;
    280	lc->lpp = LPP_MAGIC;
    281	lc->current_pid = tsk->pid;
    282	lc->user_timer = tsk->thread.user_timer;
    283	lc->guest_timer = tsk->thread.guest_timer;
    284	lc->system_timer = tsk->thread.system_timer;
    285	lc->hardirq_timer = tsk->thread.hardirq_timer;
    286	lc->softirq_timer = tsk->thread.softirq_timer;
    287	lc->steal_timer = 0;
    288}
    289
    290static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
    291{
    292	struct lowcore *lc;
    293	int cpu;
    294
    295	cpu = pcpu - pcpu_devices;
    296	lc = lowcore_ptr[cpu];
    297	lc->restart_stack = lc->kernel_stack;
    298	lc->restart_fn = (unsigned long) func;
    299	lc->restart_data = (unsigned long) data;
    300	lc->restart_source = -1U;
    301	pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
    302}
    303
    304typedef void (pcpu_delegate_fn)(void *);
    305
    306/*
    307 * Call function via PSW restart on pcpu and stop the current cpu.
    308 */
    309static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
    310{
    311	func(data);	/* should not return */
    312}
    313
    314static void pcpu_delegate(struct pcpu *pcpu,
    315			  pcpu_delegate_fn *func,
    316			  void *data, unsigned long stack)
    317{
    318	struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
    319	unsigned int source_cpu = stap();
    320
    321	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
    322	if (pcpu->address == source_cpu) {
    323		call_on_stack(2, stack, void, __pcpu_delegate,
    324			      pcpu_delegate_fn *, func, void *, data);
    325	}
    326	/* Stop target cpu (if func returns this stops the current cpu). */
    327	pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
    328	/* Restart func on the target cpu and stop the current cpu. */
    329	if (lc) {
    330		lc->restart_stack = stack;
    331		lc->restart_fn = (unsigned long)func;
    332		lc->restart_data = (unsigned long)data;
    333		lc->restart_source = source_cpu;
    334	} else {
    335		put_abs_lowcore(restart_stack, stack);
    336		put_abs_lowcore(restart_fn, (unsigned long)func);
    337		put_abs_lowcore(restart_data, (unsigned long)data);
    338		put_abs_lowcore(restart_source, source_cpu);
    339	}
    340	__bpon();
    341	asm volatile(
    342		"0:	sigp	0,%0,%2	# sigp restart to target cpu\n"
    343		"	brc	2,0b	# busy, try again\n"
    344		"1:	sigp	0,%1,%3	# sigp stop to current cpu\n"
    345		"	brc	2,1b	# busy, try again\n"
    346		: : "d" (pcpu->address), "d" (source_cpu),
    347		    "K" (SIGP_RESTART), "K" (SIGP_STOP)
    348		: "0", "1", "cc");
    349	for (;;) ;
    350}
    351
    352/*
    353 * Enable additional logical cpus for multi-threading.
    354 */
    355static int pcpu_set_smt(unsigned int mtid)
    356{
    357	int cc;
    358
    359	if (smp_cpu_mtid == mtid)
    360		return 0;
    361	cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL);
    362	if (cc == 0) {
    363		smp_cpu_mtid = mtid;
    364		smp_cpu_mt_shift = 0;
    365		while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
    366			smp_cpu_mt_shift++;
    367		pcpu_devices[0].address = stap();
    368	}
    369	return cc;
    370}
    371
    372/*
    373 * Call function on an online CPU.
    374 */
    375void smp_call_online_cpu(void (*func)(void *), void *data)
    376{
    377	struct pcpu *pcpu;
    378
    379	/* Use the current cpu if it is online. */
    380	pcpu = pcpu_find_address(cpu_online_mask, stap());
    381	if (!pcpu)
    382		/* Use the first online cpu. */
    383		pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
    384	pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
    385}
    386
    387/*
    388 * Call function on the ipl CPU.
    389 */
    390void smp_call_ipl_cpu(void (*func)(void *), void *data)
    391{
    392	struct lowcore *lc = lowcore_ptr[0];
    393
    394	if (pcpu_devices[0].address == stap())
    395		lc = &S390_lowcore;
    396
    397	pcpu_delegate(&pcpu_devices[0], func, data,
    398		      lc->nodat_stack);
    399}
    400
    401int smp_find_processor_id(u16 address)
    402{
    403	int cpu;
    404
    405	for_each_present_cpu(cpu)
    406		if (pcpu_devices[cpu].address == address)
    407			return cpu;
    408	return -1;
    409}
    410
    411void schedule_mcck_handler(void)
    412{
    413	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
    414}
    415
    416bool notrace arch_vcpu_is_preempted(int cpu)
    417{
    418	if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
    419		return false;
    420	if (pcpu_running(pcpu_devices + cpu))
    421		return false;
    422	return true;
    423}
    424EXPORT_SYMBOL(arch_vcpu_is_preempted);
    425
    426void notrace smp_yield_cpu(int cpu)
    427{
    428	if (!MACHINE_HAS_DIAG9C)
    429		return;
    430	diag_stat_inc_norecursion(DIAG_STAT_X09C);
    431	asm volatile("diag %0,0,0x9c"
    432		     : : "d" (pcpu_devices[cpu].address));
    433}
    434EXPORT_SYMBOL_GPL(smp_yield_cpu);
    435
    436/*
    437 * Send cpus emergency shutdown signal. This gives the cpus the
    438 * opportunity to complete outstanding interrupts.
    439 */
    440void notrace smp_emergency_stop(void)
    441{
    442	static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
    443	static cpumask_t cpumask;
    444	u64 end;
    445	int cpu;
    446
    447	arch_spin_lock(&lock);
    448	cpumask_copy(&cpumask, cpu_online_mask);
    449	cpumask_clear_cpu(smp_processor_id(), &cpumask);
    450
    451	end = get_tod_clock() + (1000000UL << 12);
    452	for_each_cpu(cpu, &cpumask) {
    453		struct pcpu *pcpu = pcpu_devices + cpu;
    454		set_bit(ec_stop_cpu, &pcpu->ec_mask);
    455		while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
    456				   0, NULL) == SIGP_CC_BUSY &&
    457		       get_tod_clock() < end)
    458			cpu_relax();
    459	}
    460	while (get_tod_clock() < end) {
    461		for_each_cpu(cpu, &cpumask)
    462			if (pcpu_stopped(pcpu_devices + cpu))
    463				cpumask_clear_cpu(cpu, &cpumask);
    464		if (cpumask_empty(&cpumask))
    465			break;
    466		cpu_relax();
    467	}
    468	arch_spin_unlock(&lock);
    469}
    470NOKPROBE_SYMBOL(smp_emergency_stop);
    471
    472/*
    473 * Stop all cpus but the current one.
    474 */
    475void smp_send_stop(void)
    476{
    477	int cpu;
    478
    479	/* Disable all interrupts/machine checks */
    480	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
    481	trace_hardirqs_off();
    482
    483	debug_set_critical();
    484
    485	if (oops_in_progress)
    486		smp_emergency_stop();
    487
    488	/* stop all processors */
    489	for_each_online_cpu(cpu) {
    490		if (cpu == smp_processor_id())
    491			continue;
    492		pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
    493		while (!pcpu_stopped(pcpu_devices + cpu))
    494			cpu_relax();
    495	}
    496}
    497
    498/*
    499 * This is the main routine where commands issued by other
    500 * cpus are handled.
    501 */
    502static void smp_handle_ext_call(void)
    503{
    504	unsigned long bits;
    505
    506	/* handle bit signal external calls */
    507	bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
    508	if (test_bit(ec_stop_cpu, &bits))
    509		smp_stop_cpu();
    510	if (test_bit(ec_schedule, &bits))
    511		scheduler_ipi();
    512	if (test_bit(ec_call_function_single, &bits))
    513		generic_smp_call_function_single_interrupt();
    514	if (test_bit(ec_mcck_pending, &bits))
    515		__s390_handle_mcck();
    516	if (test_bit(ec_irq_work, &bits))
    517		irq_work_run();
    518}
    519
    520static void do_ext_call_interrupt(struct ext_code ext_code,
    521				  unsigned int param32, unsigned long param64)
    522{
    523	inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS);
    524	smp_handle_ext_call();
    525}
    526
    527void arch_send_call_function_ipi_mask(const struct cpumask *mask)
    528{
    529	int cpu;
    530
    531	for_each_cpu(cpu, mask)
    532		pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
    533}
    534
    535void arch_send_call_function_single_ipi(int cpu)
    536{
    537	pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
    538}
    539
    540/*
    541 * this function sends a 'reschedule' IPI to another CPU.
    542 * it goes straight through and wastes no time serializing
    543 * anything. Worst case is that we lose a reschedule ...
    544 */
    545void smp_send_reschedule(int cpu)
    546{
    547	pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
    548}
    549
    550#ifdef CONFIG_IRQ_WORK
    551void arch_irq_work_raise(void)
    552{
    553	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
    554}
    555#endif
    556
    557/*
    558 * parameter area for the set/clear control bit callbacks
    559 */
    560struct ec_creg_mask_parms {
    561	unsigned long orval;
    562	unsigned long andval;
    563	int cr;
    564};
    565
    566/*
    567 * callback for setting/clearing control bits
    568 */
    569static void smp_ctl_bit_callback(void *info)
    570{
    571	struct ec_creg_mask_parms *pp = info;
    572	unsigned long cregs[16];
    573
    574	__ctl_store(cregs, 0, 15);
    575	cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
    576	__ctl_load(cregs, 0, 15);
    577}
    578
    579static DEFINE_SPINLOCK(ctl_lock);
    580
    581void smp_ctl_set_clear_bit(int cr, int bit, bool set)
    582{
    583	struct ec_creg_mask_parms parms = { .cr = cr, };
    584	u64 ctlreg;
    585
    586	if (set) {
    587		parms.orval = 1UL << bit;
    588		parms.andval = -1UL;
    589	} else {
    590		parms.orval = 0;
    591		parms.andval = ~(1UL << bit);
    592	}
    593	spin_lock(&ctl_lock);
    594	get_abs_lowcore(ctlreg, cregs_save_area[cr]);
    595	ctlreg = (ctlreg & parms.andval) | parms.orval;
    596	put_abs_lowcore(cregs_save_area[cr], ctlreg);
    597	spin_unlock(&ctl_lock);
    598	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
    599}
    600EXPORT_SYMBOL(smp_ctl_set_clear_bit);
    601
    602#ifdef CONFIG_CRASH_DUMP
    603
    604int smp_store_status(int cpu)
    605{
    606	struct lowcore *lc;
    607	struct pcpu *pcpu;
    608	unsigned long pa;
    609
    610	pcpu = pcpu_devices + cpu;
    611	lc = lowcore_ptr[cpu];
    612	pa = __pa(&lc->floating_pt_save_area);
    613	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
    614			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
    615		return -EIO;
    616	if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
    617		return 0;
    618	pa = lc->mcesad & MCESA_ORIGIN_MASK;
    619	if (MACHINE_HAS_GS)
    620		pa |= lc->mcesad & MCESA_LC_MASK;
    621	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
    622			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
    623		return -EIO;
    624	return 0;
    625}
    626
    627/*
    628 * Collect CPU state of the previous, crashed system.
    629 * There are four cases:
    630 * 1) standard zfcp/nvme dump
    631 *    condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
    632 *    The state for all CPUs except the boot CPU needs to be collected
    633 *    with sigp stop-and-store-status. The boot CPU state is located in
    634 *    the absolute lowcore of the memory stored in the HSA. The zcore code
    635 *    will copy the boot CPU state from the HSA.
    636 * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory)
    637 *    condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true
    638 *    The state for all CPUs except the boot CPU needs to be collected
    639 *    with sigp stop-and-store-status. The firmware or the boot-loader
    640 *    stored the registers of the boot CPU in the absolute lowcore in the
    641 *    memory of the old system.
    642 * 3) kdump and the old kernel did not store the CPU state,
    643 *    or stand-alone kdump for DASD
    644 *    condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
    645 *    The state for all CPUs except the boot CPU needs to be collected
    646 *    with sigp stop-and-store-status. The kexec code or the boot-loader
    647 *    stored the registers of the boot CPU in the memory of the old system.
    648 * 4) kdump and the old kernel stored the CPU state
    649 *    condition: OLDMEM_BASE != NULL && is_kdump_kernel()
    650 *    This case does not exist for s390 anymore, setup_arch explicitly
    651 *    deactivates the elfcorehdr= kernel parameter
    652 */
    653static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
    654				     bool is_boot_cpu, __vector128 *vxrs)
    655{
    656	if (is_boot_cpu)
    657		vxrs = boot_cpu_vector_save_area;
    658	else
    659		__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(vxrs));
    660	save_area_add_vxrs(sa, vxrs);
    661}
    662
    663static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
    664				     bool is_boot_cpu, void *regs)
    665{
    666	if (is_boot_cpu)
    667		copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
    668	else
    669		__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs));
    670	save_area_add_regs(sa, regs);
    671}
    672
    673void __init smp_save_dump_cpus(void)
    674{
    675	int addr, boot_cpu_addr, max_cpu_addr;
    676	struct save_area *sa;
    677	bool is_boot_cpu;
    678	void *page;
    679
    680	if (!(oldmem_data.start || is_ipl_type_dump()))
    681		/* No previous system present, normal boot. */
    682		return;
    683	/* Allocate a page as dumping area for the store status sigps */
    684	page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
    685	if (!page)
    686		panic("ERROR: Failed to allocate %lx bytes below %lx\n",
    687		      PAGE_SIZE, 1UL << 31);
    688
    689	/* Set multi-threading state to the previous system. */
    690	pcpu_set_smt(sclp.mtid_prev);
    691	boot_cpu_addr = stap();
    692	max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
    693	for (addr = 0; addr <= max_cpu_addr; addr++) {
    694		if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
    695		    SIGP_CC_NOT_OPERATIONAL)
    696			continue;
    697		is_boot_cpu = (addr == boot_cpu_addr);
    698		/* Allocate save area */
    699		sa = save_area_alloc(is_boot_cpu);
    700		if (!sa)
    701			panic("could not allocate memory for save area\n");
    702		if (MACHINE_HAS_VX)
    703			/* Get the vector registers */
    704			smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
    705		/*
    706		 * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
    707		 * of the boot CPU are stored in the HSA. To retrieve
    708		 * these registers an SCLP request is required which is
    709		 * done by drivers/s390/char/zcore.c:init_cpu_info()
    710		 */
    711		if (!is_boot_cpu || oldmem_data.start)
    712			/* Get the CPU registers */
    713			smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
    714	}
    715	memblock_free(page, PAGE_SIZE);
    716	diag_amode31_ops.diag308_reset();
    717	pcpu_set_smt(0);
    718}
    719#endif /* CONFIG_CRASH_DUMP */
    720
    721void smp_cpu_set_polarization(int cpu, int val)
    722{
    723	pcpu_devices[cpu].polarization = val;
    724}
    725
    726int smp_cpu_get_polarization(int cpu)
    727{
    728	return pcpu_devices[cpu].polarization;
    729}
    730
    731int smp_cpu_get_cpu_address(int cpu)
    732{
    733	return pcpu_devices[cpu].address;
    734}
    735
    736static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
    737{
    738	static int use_sigp_detection;
    739	int address;
    740
    741	if (use_sigp_detection || sclp_get_core_info(info, early)) {
    742		use_sigp_detection = 1;
    743		for (address = 0;
    744		     address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
    745		     address += (1U << smp_cpu_mt_shift)) {
    746			if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) ==
    747			    SIGP_CC_NOT_OPERATIONAL)
    748				continue;
    749			info->core[info->configured].core_id =
    750				address >> smp_cpu_mt_shift;
    751			info->configured++;
    752		}
    753		info->combined = info->configured;
    754	}
    755}
    756
    757static int smp_add_present_cpu(int cpu);
    758
    759static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
    760			bool configured, bool early)
    761{
    762	struct pcpu *pcpu;
    763	int cpu, nr, i;
    764	u16 address;
    765
    766	nr = 0;
    767	if (sclp.has_core_type && core->type != boot_core_type)
    768		return nr;
    769	cpu = cpumask_first(avail);
    770	address = core->core_id << smp_cpu_mt_shift;
    771	for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
    772		if (pcpu_find_address(cpu_present_mask, address + i))
    773			continue;
    774		pcpu = pcpu_devices + cpu;
    775		pcpu->address = address + i;
    776		if (configured)
    777			pcpu->state = CPU_STATE_CONFIGURED;
    778		else
    779			pcpu->state = CPU_STATE_STANDBY;
    780		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
    781		set_cpu_present(cpu, true);
    782		if (!early && smp_add_present_cpu(cpu) != 0)
    783			set_cpu_present(cpu, false);
    784		else
    785			nr++;
    786		cpumask_clear_cpu(cpu, avail);
    787		cpu = cpumask_next(cpu, avail);
    788	}
    789	return nr;
    790}
    791
    792static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
    793{
    794	struct sclp_core_entry *core;
    795	static cpumask_t avail;
    796	bool configured;
    797	u16 core_id;
    798	int nr, i;
    799
    800	cpus_read_lock();
    801	mutex_lock(&smp_cpu_state_mutex);
    802	nr = 0;
    803	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
    804	/*
    805	 * Add IPL core first (which got logical CPU number 0) to make sure
    806	 * that all SMT threads get subsequent logical CPU numbers.
    807	 */
    808	if (early) {
    809		core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
    810		for (i = 0; i < info->configured; i++) {
    811			core = &info->core[i];
    812			if (core->core_id == core_id) {
    813				nr += smp_add_core(core, &avail, true, early);
    814				break;
    815			}
    816		}
    817	}
    818	for (i = 0; i < info->combined; i++) {
    819		configured = i < info->configured;
    820		nr += smp_add_core(&info->core[i], &avail, configured, early);
    821	}
    822	mutex_unlock(&smp_cpu_state_mutex);
    823	cpus_read_unlock();
    824	return nr;
    825}
    826
    827void __init smp_detect_cpus(void)
    828{
    829	unsigned int cpu, mtid, c_cpus, s_cpus;
    830	struct sclp_core_info *info;
    831	u16 address;
    832
    833	/* Get CPU information */
    834	info = memblock_alloc(sizeof(*info), 8);
    835	if (!info)
    836		panic("%s: Failed to allocate %zu bytes align=0x%x\n",
    837		      __func__, sizeof(*info), 8);
    838	smp_get_core_info(info, 1);
    839	/* Find boot CPU type */
    840	if (sclp.has_core_type) {
    841		address = stap();
    842		for (cpu = 0; cpu < info->combined; cpu++)
    843			if (info->core[cpu].core_id == address) {
    844				/* The boot cpu dictates the cpu type. */
    845				boot_core_type = info->core[cpu].type;
    846				break;
    847			}
    848		if (cpu >= info->combined)
    849			panic("Could not find boot CPU type");
    850	}
    851
    852	/* Set multi-threading state for the current system */
    853	mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
    854	mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
    855	pcpu_set_smt(mtid);
    856
    857	/* Print number of CPUs */
    858	c_cpus = s_cpus = 0;
    859	for (cpu = 0; cpu < info->combined; cpu++) {
    860		if (sclp.has_core_type &&
    861		    info->core[cpu].type != boot_core_type)
    862			continue;
    863		if (cpu < info->configured)
    864			c_cpus += smp_cpu_mtid + 1;
    865		else
    866			s_cpus += smp_cpu_mtid + 1;
    867	}
    868	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
    869
    870	/* Add CPUs present at boot */
    871	__smp_rescan_cpus(info, true);
    872	memblock_free(info, sizeof(*info));
    873}
    874
    875/*
    876 *	Activate a secondary processor.
    877 */
    878static void smp_start_secondary(void *cpuvoid)
    879{
    880	int cpu = raw_smp_processor_id();
    881
    882	S390_lowcore.last_update_clock = get_tod_clock();
    883	S390_lowcore.restart_stack = (unsigned long)restart_stack;
    884	S390_lowcore.restart_fn = (unsigned long)do_restart;
    885	S390_lowcore.restart_data = 0;
    886	S390_lowcore.restart_source = -1U;
    887	S390_lowcore.restart_flags = 0;
    888	restore_access_regs(S390_lowcore.access_regs_save_area);
    889	cpu_init();
    890	rcu_cpu_starting(cpu);
    891	init_cpu_timer();
    892	vtime_init();
    893	vdso_getcpu_init();
    894	pfault_init();
    895	cpumask_set_cpu(cpu, &cpu_setup_mask);
    896	update_cpu_masks();
    897	notify_cpu_starting(cpu);
    898	if (topology_cpu_dedicated(cpu))
    899		set_cpu_flag(CIF_DEDICATED_CPU);
    900	else
    901		clear_cpu_flag(CIF_DEDICATED_CPU);
    902	set_cpu_online(cpu, true);
    903	inc_irq_stat(CPU_RST);
    904	local_irq_enable();
    905	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
    906}
    907
    908/* Upping and downing of CPUs */
    909int __cpu_up(unsigned int cpu, struct task_struct *tidle)
    910{
    911	struct pcpu *pcpu = pcpu_devices + cpu;
    912	int rc;
    913
    914	if (pcpu->state != CPU_STATE_CONFIGURED)
    915		return -EIO;
    916	if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) !=
    917	    SIGP_CC_ORDER_CODE_ACCEPTED)
    918		return -EIO;
    919
    920	rc = pcpu_alloc_lowcore(pcpu, cpu);
    921	if (rc)
    922		return rc;
    923	pcpu_prepare_secondary(pcpu, cpu);
    924	pcpu_attach_task(pcpu, tidle);
    925	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
    926	/* Wait until cpu puts itself in the online & active maps */
    927	while (!cpu_online(cpu))
    928		cpu_relax();
    929	return 0;
    930}
    931
    932static unsigned int setup_possible_cpus __initdata;
    933
    934static int __init _setup_possible_cpus(char *s)
    935{
    936	get_option(&s, &setup_possible_cpus);
    937	return 0;
    938}
    939early_param("possible_cpus", _setup_possible_cpus);
    940
    941int __cpu_disable(void)
    942{
    943	unsigned long cregs[16];
    944	int cpu;
    945
    946	/* Handle possible pending IPIs */
    947	smp_handle_ext_call();
    948	cpu = smp_processor_id();
    949	set_cpu_online(cpu, false);
    950	cpumask_clear_cpu(cpu, &cpu_setup_mask);
    951	update_cpu_masks();
    952	/* Disable pseudo page faults on this cpu. */
    953	pfault_fini();
    954	/* Disable interrupt sources via control register. */
    955	__ctl_store(cregs, 0, 15);
    956	cregs[0]  &= ~0x0000ee70UL;	/* disable all external interrupts */
    957	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */
    958	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */
    959	__ctl_load(cregs, 0, 15);
    960	clear_cpu_flag(CIF_NOHZ_DELAY);
    961	return 0;
    962}
    963
    964void __cpu_die(unsigned int cpu)
    965{
    966	struct pcpu *pcpu;
    967
    968	/* Wait until target cpu is down */
    969	pcpu = pcpu_devices + cpu;
    970	while (!pcpu_stopped(pcpu))
    971		cpu_relax();
    972	pcpu_free_lowcore(pcpu);
    973	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
    974	cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
    975}
    976
    977void __noreturn cpu_die(void)
    978{
    979	idle_task_exit();
    980	__bpon();
    981	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
    982	for (;;) ;
    983}
    984
    985void __init smp_fill_possible_mask(void)
    986{
    987	unsigned int possible, sclp_max, cpu;
    988
    989	sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
    990	sclp_max = min(smp_max_threads, sclp_max);
    991	sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids;
    992	possible = setup_possible_cpus ?: nr_cpu_ids;
    993	possible = min(possible, sclp_max);
    994	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
    995		set_cpu_possible(cpu, true);
    996}
    997
    998void __init smp_prepare_cpus(unsigned int max_cpus)
    999{
   1000	/* request the 0x1201 emergency signal external interrupt */
   1001	if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
   1002		panic("Couldn't request external interrupt 0x1201");
   1003	/* request the 0x1202 external call external interrupt */
   1004	if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
   1005		panic("Couldn't request external interrupt 0x1202");
   1006}
   1007
   1008void __init smp_prepare_boot_cpu(void)
   1009{
   1010	struct pcpu *pcpu = pcpu_devices;
   1011
   1012	WARN_ON(!cpu_present(0) || !cpu_online(0));
   1013	pcpu->state = CPU_STATE_CONFIGURED;
   1014	S390_lowcore.percpu_offset = __per_cpu_offset[0];
   1015	smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
   1016}
   1017
   1018void __init smp_setup_processor_id(void)
   1019{
   1020	pcpu_devices[0].address = stap();
   1021	S390_lowcore.cpu_nr = 0;
   1022	S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
   1023	S390_lowcore.spinlock_index = 0;
   1024}
   1025
   1026/*
   1027 * the frequency of the profiling timer can be changed
   1028 * by writing a multiplier value into /proc/profile.
   1029 *
   1030 * usually you want to run this on all CPUs ;)
   1031 */
   1032int setup_profiling_timer(unsigned int multiplier)
   1033{
   1034	return 0;
   1035}
   1036
   1037static ssize_t cpu_configure_show(struct device *dev,
   1038				  struct device_attribute *attr, char *buf)
   1039{
   1040	ssize_t count;
   1041
   1042	mutex_lock(&smp_cpu_state_mutex);
   1043	count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
   1044	mutex_unlock(&smp_cpu_state_mutex);
   1045	return count;
   1046}
   1047
   1048static ssize_t cpu_configure_store(struct device *dev,
   1049				   struct device_attribute *attr,
   1050				   const char *buf, size_t count)
   1051{
   1052	struct pcpu *pcpu;
   1053	int cpu, val, rc, i;
   1054	char delim;
   1055
   1056	if (sscanf(buf, "%d %c", &val, &delim) != 1)
   1057		return -EINVAL;
   1058	if (val != 0 && val != 1)
   1059		return -EINVAL;
   1060	cpus_read_lock();
   1061	mutex_lock(&smp_cpu_state_mutex);
   1062	rc = -EBUSY;
   1063	/* disallow configuration changes of online cpus and cpu 0 */
   1064	cpu = dev->id;
   1065	cpu = smp_get_base_cpu(cpu);
   1066	if (cpu == 0)
   1067		goto out;
   1068	for (i = 0; i <= smp_cpu_mtid; i++)
   1069		if (cpu_online(cpu + i))
   1070			goto out;
   1071	pcpu = pcpu_devices + cpu;
   1072	rc = 0;
   1073	switch (val) {
   1074	case 0:
   1075		if (pcpu->state != CPU_STATE_CONFIGURED)
   1076			break;
   1077		rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift);
   1078		if (rc)
   1079			break;
   1080		for (i = 0; i <= smp_cpu_mtid; i++) {
   1081			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
   1082				continue;
   1083			pcpu[i].state = CPU_STATE_STANDBY;
   1084			smp_cpu_set_polarization(cpu + i,
   1085						 POLARIZATION_UNKNOWN);
   1086		}
   1087		topology_expect_change();
   1088		break;
   1089	case 1:
   1090		if (pcpu->state != CPU_STATE_STANDBY)
   1091			break;
   1092		rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift);
   1093		if (rc)
   1094			break;
   1095		for (i = 0; i <= smp_cpu_mtid; i++) {
   1096			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
   1097				continue;
   1098			pcpu[i].state = CPU_STATE_CONFIGURED;
   1099			smp_cpu_set_polarization(cpu + i,
   1100						 POLARIZATION_UNKNOWN);
   1101		}
   1102		topology_expect_change();
   1103		break;
   1104	default:
   1105		break;
   1106	}
   1107out:
   1108	mutex_unlock(&smp_cpu_state_mutex);
   1109	cpus_read_unlock();
   1110	return rc ? rc : count;
   1111}
   1112static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
   1113
   1114static ssize_t show_cpu_address(struct device *dev,
   1115				struct device_attribute *attr, char *buf)
   1116{
   1117	return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
   1118}
   1119static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
   1120
   1121static struct attribute *cpu_common_attrs[] = {
   1122	&dev_attr_configure.attr,
   1123	&dev_attr_address.attr,
   1124	NULL,
   1125};
   1126
   1127static struct attribute_group cpu_common_attr_group = {
   1128	.attrs = cpu_common_attrs,
   1129};
   1130
   1131static struct attribute *cpu_online_attrs[] = {
   1132	&dev_attr_idle_count.attr,
   1133	&dev_attr_idle_time_us.attr,
   1134	NULL,
   1135};
   1136
   1137static struct attribute_group cpu_online_attr_group = {
   1138	.attrs = cpu_online_attrs,
   1139};
   1140
   1141static int smp_cpu_online(unsigned int cpu)
   1142{
   1143	struct device *s = &per_cpu(cpu_device, cpu)->dev;
   1144
   1145	return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
   1146}
   1147
   1148static int smp_cpu_pre_down(unsigned int cpu)
   1149{
   1150	struct device *s = &per_cpu(cpu_device, cpu)->dev;
   1151
   1152	sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
   1153	return 0;
   1154}
   1155
   1156static int smp_add_present_cpu(int cpu)
   1157{
   1158	struct device *s;
   1159	struct cpu *c;
   1160	int rc;
   1161
   1162	c = kzalloc(sizeof(*c), GFP_KERNEL);
   1163	if (!c)
   1164		return -ENOMEM;
   1165	per_cpu(cpu_device, cpu) = c;
   1166	s = &c->dev;
   1167	c->hotpluggable = 1;
   1168	rc = register_cpu(c, cpu);
   1169	if (rc)
   1170		goto out;
   1171	rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
   1172	if (rc)
   1173		goto out_cpu;
   1174	rc = topology_cpu_init(c);
   1175	if (rc)
   1176		goto out_topology;
   1177	return 0;
   1178
   1179out_topology:
   1180	sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
   1181out_cpu:
   1182	unregister_cpu(c);
   1183out:
   1184	return rc;
   1185}
   1186
   1187int __ref smp_rescan_cpus(void)
   1188{
   1189	struct sclp_core_info *info;
   1190	int nr;
   1191
   1192	info = kzalloc(sizeof(*info), GFP_KERNEL);
   1193	if (!info)
   1194		return -ENOMEM;
   1195	smp_get_core_info(info, 0);
   1196	nr = __smp_rescan_cpus(info, false);
   1197	kfree(info);
   1198	if (nr)
   1199		topology_schedule_update();
   1200	return 0;
   1201}
   1202
   1203static ssize_t __ref rescan_store(struct device *dev,
   1204				  struct device_attribute *attr,
   1205				  const char *buf,
   1206				  size_t count)
   1207{
   1208	int rc;
   1209
   1210	rc = lock_device_hotplug_sysfs();
   1211	if (rc)
   1212		return rc;
   1213	rc = smp_rescan_cpus();
   1214	unlock_device_hotplug();
   1215	return rc ? rc : count;
   1216}
   1217static DEVICE_ATTR_WO(rescan);
   1218
   1219static int __init s390_smp_init(void)
   1220{
   1221	int cpu, rc = 0;
   1222
   1223	rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
   1224	if (rc)
   1225		return rc;
   1226	for_each_present_cpu(cpu) {
   1227		rc = smp_add_present_cpu(cpu);
   1228		if (rc)
   1229			goto out;
   1230	}
   1231
   1232	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
   1233			       smp_cpu_online, smp_cpu_pre_down);
   1234	rc = rc <= 0 ? rc : 0;
   1235out:
   1236	return rc;
   1237}
   1238subsys_initcall(s390_smp_init);
   1239
   1240static __always_inline void set_new_lowcore(struct lowcore *lc)
   1241{
   1242	union register_pair dst, src;
   1243	u32 pfx;
   1244
   1245	src.even = (unsigned long) &S390_lowcore;
   1246	src.odd  = sizeof(S390_lowcore);
   1247	dst.even = (unsigned long) lc;
   1248	dst.odd  = sizeof(*lc);
   1249	pfx = __pa(lc);
   1250
   1251	asm volatile(
   1252		"	mvcl	%[dst],%[src]\n"
   1253		"	spx	%[pfx]\n"
   1254		: [dst] "+&d" (dst.pair), [src] "+&d" (src.pair)
   1255		: [pfx] "Q" (pfx)
   1256		: "memory", "cc");
   1257}
   1258
   1259static int __init smp_reinit_ipl_cpu(void)
   1260{
   1261	unsigned long async_stack, nodat_stack, mcck_stack;
   1262	struct lowcore *lc, *lc_ipl;
   1263	unsigned long flags, cr0;
   1264	u64 mcesad;
   1265
   1266	lc_ipl = lowcore_ptr[0];
   1267	lc = (struct lowcore *)	__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
   1268	nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
   1269	async_stack = stack_alloc();
   1270	mcck_stack = stack_alloc();
   1271	if (!lc || !nodat_stack || !async_stack || !mcck_stack || nmi_alloc_mcesa(&mcesad))
   1272		panic("Couldn't allocate memory");
   1273
   1274	local_irq_save(flags);
   1275	local_mcck_disable();
   1276	set_new_lowcore(lc);
   1277	S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET;
   1278	S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET;
   1279	S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET;
   1280	__ctl_store(cr0, 0, 0);
   1281	__ctl_clear_bit(0, 28); /* disable lowcore protection */
   1282	S390_lowcore.mcesad = mcesad;
   1283	__ctl_load(cr0, 0, 0);
   1284	lowcore_ptr[0] = lc;
   1285	local_mcck_enable();
   1286	local_irq_restore(flags);
   1287
   1288	free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER);
   1289	memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE);
   1290	memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl));
   1291
   1292	return 0;
   1293}
   1294early_initcall(smp_reinit_ipl_cpu);