cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

setup.c (29669B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *  64-bit pSeries and RS/6000 setup code.
      4 *
      5 *  Copyright (C) 1995  Linus Torvalds
      6 *  Adapted from 'alpha' version by Gary Thomas
      7 *  Modified by Cort Dougan (cort@cs.nmt.edu)
      8 *  Modified by PPC64 Team, IBM Corp
      9 */
     10
     11/*
     12 * bootup setup stuff..
     13 */
     14
     15#include <linux/cpu.h>
     16#include <linux/errno.h>
     17#include <linux/sched.h>
     18#include <linux/kernel.h>
     19#include <linux/mm.h>
     20#include <linux/stddef.h>
     21#include <linux/unistd.h>
     22#include <linux/user.h>
     23#include <linux/tty.h>
     24#include <linux/major.h>
     25#include <linux/interrupt.h>
     26#include <linux/reboot.h>
     27#include <linux/init.h>
     28#include <linux/ioport.h>
     29#include <linux/console.h>
     30#include <linux/pci.h>
     31#include <linux/utsname.h>
     32#include <linux/adb.h>
     33#include <linux/export.h>
     34#include <linux/delay.h>
     35#include <linux/irq.h>
     36#include <linux/seq_file.h>
     37#include <linux/root_dev.h>
     38#include <linux/of.h>
     39#include <linux/of_irq.h>
     40#include <linux/of_pci.h>
     41#include <linux/memblock.h>
     42#include <linux/swiotlb.h>
     43
     44#include <asm/mmu.h>
     45#include <asm/processor.h>
     46#include <asm/io.h>
     47#include <asm/rtas.h>
     48#include <asm/pci-bridge.h>
     49#include <asm/iommu.h>
     50#include <asm/dma.h>
     51#include <asm/machdep.h>
     52#include <asm/irq.h>
     53#include <asm/time.h>
     54#include <asm/nvram.h>
     55#include <asm/pmc.h>
     56#include <asm/xics.h>
     57#include <asm/xive.h>
     58#include <asm/ppc-pci.h>
     59#include <asm/i8259.h>
     60#include <asm/udbg.h>
     61#include <asm/smp.h>
     62#include <asm/firmware.h>
     63#include <asm/eeh.h>
     64#include <asm/reg.h>
     65#include <asm/plpar_wrappers.h>
     66#include <asm/kexec.h>
     67#include <asm/isa-bridge.h>
     68#include <asm/security_features.h>
     69#include <asm/asm-const.h>
     70#include <asm/idle.h>
     71#include <asm/swiotlb.h>
     72#include <asm/svm.h>
     73#include <asm/dtl.h>
     74#include <asm/hvconsole.h>
     75
     76#include "pseries.h"
     77
     78DEFINE_STATIC_KEY_FALSE(shared_processor);
     79EXPORT_SYMBOL(shared_processor);
     80
     81int CMO_PrPSP = -1;
     82int CMO_SecPSP = -1;
     83unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
     84EXPORT_SYMBOL(CMO_PageSize);
     85
     86int fwnmi_active;  /* TRUE if an FWNMI handler is present */
     87int ibm_nmi_interlock_token;
     88u32 pseries_security_flavor;
     89
     90static void pSeries_show_cpuinfo(struct seq_file *m)
     91{
     92	struct device_node *root;
     93	const char *model = "";
     94
     95	root = of_find_node_by_path("/");
     96	if (root)
     97		model = of_get_property(root, "model", NULL);
     98	seq_printf(m, "machine\t\t: CHRP %s\n", model);
     99	of_node_put(root);
    100	if (radix_enabled())
    101		seq_printf(m, "MMU\t\t: Radix\n");
    102	else
    103		seq_printf(m, "MMU\t\t: Hash\n");
    104}
    105
    106/* Initialize firmware assisted non-maskable interrupts if
    107 * the firmware supports this feature.
    108 */
    109static void __init fwnmi_init(void)
    110{
    111	unsigned long system_reset_addr, machine_check_addr;
    112	u8 *mce_data_buf;
    113	unsigned int i;
    114	int nr_cpus = num_possible_cpus();
    115#ifdef CONFIG_PPC_64S_HASH_MMU
    116	struct slb_entry *slb_ptr;
    117	size_t size;
    118#endif
    119	int ibm_nmi_register_token;
    120
    121	ibm_nmi_register_token = rtas_token("ibm,nmi-register");
    122	if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE)
    123		return;
    124
    125	ibm_nmi_interlock_token = rtas_token("ibm,nmi-interlock");
    126	if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE))
    127		return;
    128
    129	/* If the kernel's not linked at zero we point the firmware at low
    130	 * addresses anyway, and use a trampoline to get to the real code. */
    131	system_reset_addr  = __pa(system_reset_fwnmi) - PHYSICAL_START;
    132	machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;
    133
    134	if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL,
    135			   system_reset_addr, machine_check_addr))
    136		fwnmi_active = 1;
    137
    138	/*
    139	 * Allocate a chunk for per cpu buffer to hold rtas errorlog.
    140	 * It will be used in real mode mce handler, hence it needs to be
    141	 * below RMA.
    142	 */
    143	mce_data_buf = memblock_alloc_try_nid_raw(RTAS_ERROR_LOG_MAX * nr_cpus,
    144					RTAS_ERROR_LOG_MAX, MEMBLOCK_LOW_LIMIT,
    145					ppc64_rma_size, NUMA_NO_NODE);
    146	if (!mce_data_buf)
    147		panic("Failed to allocate %d bytes below %pa for MCE buffer\n",
    148		      RTAS_ERROR_LOG_MAX * nr_cpus, &ppc64_rma_size);
    149
    150	for_each_possible_cpu(i) {
    151		paca_ptrs[i]->mce_data_buf = mce_data_buf +
    152						(RTAS_ERROR_LOG_MAX * i);
    153	}
    154
    155#ifdef CONFIG_PPC_64S_HASH_MMU
    156	if (!radix_enabled()) {
    157		/* Allocate per cpu area to save old slb contents during MCE */
    158		size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
    159		slb_ptr = memblock_alloc_try_nid_raw(size,
    160				sizeof(struct slb_entry), MEMBLOCK_LOW_LIMIT,
    161				ppc64_rma_size, NUMA_NO_NODE);
    162		if (!slb_ptr)
    163			panic("Failed to allocate %zu bytes below %pa for slb area\n",
    164			      size, &ppc64_rma_size);
    165
    166		for_each_possible_cpu(i)
    167			paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
    168	}
    169#endif
    170}
    171
    172static void pseries_8259_cascade(struct irq_desc *desc)
    173{
    174	struct irq_chip *chip = irq_desc_get_chip(desc);
    175	unsigned int cascade_irq = i8259_irq();
    176
    177	if (cascade_irq)
    178		generic_handle_irq(cascade_irq);
    179
    180	chip->irq_eoi(&desc->irq_data);
    181}
    182
    183static void __init pseries_setup_i8259_cascade(void)
    184{
    185	struct device_node *np, *old, *found = NULL;
    186	unsigned int cascade;
    187	const u32 *addrp;
    188	unsigned long intack = 0;
    189	int naddr;
    190
    191	for_each_node_by_type(np, "interrupt-controller") {
    192		if (of_device_is_compatible(np, "chrp,iic")) {
    193			found = np;
    194			break;
    195		}
    196	}
    197
    198	if (found == NULL) {
    199		printk(KERN_DEBUG "pic: no ISA interrupt controller\n");
    200		return;
    201	}
    202
    203	cascade = irq_of_parse_and_map(found, 0);
    204	if (!cascade) {
    205		printk(KERN_ERR "pic: failed to map cascade interrupt");
    206		return;
    207	}
    208	pr_debug("pic: cascade mapped to irq %d\n", cascade);
    209
    210	for (old = of_node_get(found); old != NULL ; old = np) {
    211		np = of_get_parent(old);
    212		of_node_put(old);
    213		if (np == NULL)
    214			break;
    215		if (!of_node_name_eq(np, "pci"))
    216			continue;
    217		addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
    218		if (addrp == NULL)
    219			continue;
    220		naddr = of_n_addr_cells(np);
    221		intack = addrp[naddr-1];
    222		if (naddr > 1)
    223			intack |= ((unsigned long)addrp[naddr-2]) << 32;
    224	}
    225	if (intack)
    226		printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
    227	i8259_init(found, intack);
    228	of_node_put(found);
    229	irq_set_chained_handler(cascade, pseries_8259_cascade);
    230}
    231
    232static void __init pseries_init_irq(void)
    233{
    234	/* Try using a XIVE if available, otherwise use a XICS */
    235	if (!xive_spapr_init()) {
    236		xics_init();
    237		pseries_setup_i8259_cascade();
    238	}
    239}
    240
    241static void pseries_lpar_enable_pmcs(void)
    242{
    243	unsigned long set, reset;
    244
    245	set = 1UL << 63;
    246	reset = 0;
    247	plpar_hcall_norets(H_PERFMON, set, reset);
    248}
    249
    250static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
    251{
    252	struct of_reconfig_data *rd = data;
    253	struct device_node *parent, *np = rd->dn;
    254	struct pci_dn *pdn;
    255	int err = NOTIFY_OK;
    256
    257	switch (action) {
    258	case OF_RECONFIG_ATTACH_NODE:
    259		parent = of_get_parent(np);
    260		pdn = parent ? PCI_DN(parent) : NULL;
    261		if (pdn)
    262			pci_add_device_node_info(pdn->phb, np);
    263
    264		of_node_put(parent);
    265		break;
    266	case OF_RECONFIG_DETACH_NODE:
    267		pdn = PCI_DN(np);
    268		if (pdn)
    269			list_del(&pdn->list);
    270		break;
    271	default:
    272		err = NOTIFY_DONE;
    273		break;
    274	}
    275	return err;
    276}
    277
    278static struct notifier_block pci_dn_reconfig_nb = {
    279	.notifier_call = pci_dn_reconfig_notifier,
    280};
    281
    282struct kmem_cache *dtl_cache;
    283
    284#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
    285/*
    286 * Allocate space for the dispatch trace log for all possible cpus
    287 * and register the buffers with the hypervisor.  This is used for
    288 * computing time stolen by the hypervisor.
    289 */
    290static int alloc_dispatch_logs(void)
    291{
    292	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
    293		return 0;
    294
    295	if (!dtl_cache)
    296		return 0;
    297
    298	alloc_dtl_buffers(0);
    299
    300	/* Register the DTL for the current (boot) cpu */
    301	register_dtl_buffer(smp_processor_id());
    302
    303	return 0;
    304}
    305#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
    306static inline int alloc_dispatch_logs(void)
    307{
    308	return 0;
    309}
    310#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
    311
    312static int alloc_dispatch_log_kmem_cache(void)
    313{
    314	void (*ctor)(void *) = get_dtl_cache_ctor();
    315
    316	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
    317						DISPATCH_LOG_BYTES, 0, ctor);
    318	if (!dtl_cache) {
    319		pr_warn("Failed to create dispatch trace log buffer cache\n");
    320		pr_warn("Stolen time statistics will be unreliable\n");
    321		return 0;
    322	}
    323
    324	return alloc_dispatch_logs();
    325}
    326machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
    327
    328DEFINE_PER_CPU(u64, idle_spurr_cycles);
    329DEFINE_PER_CPU(u64, idle_entry_purr_snap);
    330DEFINE_PER_CPU(u64, idle_entry_spurr_snap);
    331static void pseries_lpar_idle(void)
    332{
    333	/*
    334	 * Default handler to go into low thread priority and possibly
    335	 * low power mode by ceding processor to hypervisor
    336	 */
    337
    338	if (!prep_irq_for_idle())
    339		return;
    340
    341	/* Indicate to hypervisor that we are idle. */
    342	pseries_idle_prolog();
    343
    344	/*
    345	 * Yield the processor to the hypervisor.  We return if
    346	 * an external interrupt occurs (which are driven prior
    347	 * to returning here) or if a prod occurs from another
    348	 * processor. When returning here, external interrupts
    349	 * are enabled.
    350	 */
    351	cede_processor();
    352
    353	pseries_idle_epilog();
    354}
    355
    356static bool pseries_reloc_on_exception_enabled;
    357
    358bool pseries_reloc_on_exception(void)
    359{
    360	return pseries_reloc_on_exception_enabled;
    361}
    362EXPORT_SYMBOL_GPL(pseries_reloc_on_exception);
    363
    364/*
    365 * Enable relocation on during exceptions. This has partition wide scope and
    366 * may take a while to complete, if it takes longer than one second we will
    367 * just give up rather than wasting any more time on this - if that turns out
    368 * to ever be a problem in practice we can move this into a kernel thread to
    369 * finish off the process later in boot.
    370 */
    371bool pseries_enable_reloc_on_exc(void)
    372{
    373	long rc;
    374	unsigned int delay, total_delay = 0;
    375
    376	while (1) {
    377		rc = enable_reloc_on_exceptions();
    378		if (!H_IS_LONG_BUSY(rc)) {
    379			if (rc == H_P2) {
    380				pr_info("Relocation on exceptions not"
    381					" supported\n");
    382				return false;
    383			} else if (rc != H_SUCCESS) {
    384				pr_warn("Unable to enable relocation"
    385					" on exceptions: %ld\n", rc);
    386				return false;
    387			}
    388			pseries_reloc_on_exception_enabled = true;
    389			return true;
    390		}
    391
    392		delay = get_longbusy_msecs(rc);
    393		total_delay += delay;
    394		if (total_delay > 1000) {
    395			pr_warn("Warning: Giving up waiting to enable "
    396				"relocation on exceptions (%u msec)!\n",
    397				total_delay);
    398			return false;
    399		}
    400
    401		mdelay(delay);
    402	}
    403}
    404EXPORT_SYMBOL(pseries_enable_reloc_on_exc);
    405
    406void pseries_disable_reloc_on_exc(void)
    407{
    408	long rc;
    409
    410	while (1) {
    411		rc = disable_reloc_on_exceptions();
    412		if (!H_IS_LONG_BUSY(rc))
    413			break;
    414		mdelay(get_longbusy_msecs(rc));
    415	}
    416	if (rc == H_SUCCESS)
    417		pseries_reloc_on_exception_enabled = false;
    418	else
    419		pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
    420			rc);
    421}
    422EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
    423
    424#ifdef __LITTLE_ENDIAN__
    425void pseries_big_endian_exceptions(void)
    426{
    427	long rc;
    428
    429	while (1) {
    430		rc = enable_big_endian_exceptions();
    431		if (!H_IS_LONG_BUSY(rc))
    432			break;
    433		mdelay(get_longbusy_msecs(rc));
    434	}
    435
    436	/*
    437	 * At this point it is unlikely panic() will get anything
    438	 * out to the user, since this is called very late in kexec
    439	 * but at least this will stop us from continuing on further
    440	 * and creating an even more difficult to debug situation.
    441	 *
    442	 * There is a known problem when kdump'ing, if cpus are offline
    443	 * the above call will fail. Rather than panicking again, keep
    444	 * going and hope the kdump kernel is also little endian, which
    445	 * it usually is.
    446	 */
    447	if (rc && !kdump_in_progress())
    448		panic("Could not enable big endian exceptions");
    449}
    450
    451void __init pseries_little_endian_exceptions(void)
    452{
    453	long rc;
    454
    455	while (1) {
    456		rc = enable_little_endian_exceptions();
    457		if (!H_IS_LONG_BUSY(rc))
    458			break;
    459		mdelay(get_longbusy_msecs(rc));
    460	}
    461	if (rc) {
    462		ppc_md.progress("H_SET_MODE LE exception fail", 0);
    463		panic("Could not enable little endian exceptions");
    464	}
    465}
    466#endif
    467
    468static void __init pSeries_discover_phbs(void)
    469{
    470	struct device_node *node;
    471	struct pci_controller *phb;
    472	struct device_node *root = of_find_node_by_path("/");
    473
    474	for_each_child_of_node(root, node) {
    475		if (!of_node_is_type(node, "pci") &&
    476		    !of_node_is_type(node, "pciex"))
    477			continue;
    478
    479		phb = pcibios_alloc_controller(node);
    480		if (!phb)
    481			continue;
    482		rtas_setup_phb(phb);
    483		pci_process_bridge_OF_ranges(phb, node, 0);
    484		isa_bridge_find_early(phb);
    485		phb->controller_ops = pseries_pci_controller_ops;
    486
    487		/* create pci_dn's for DT nodes under this PHB */
    488		pci_devs_phb_init_dynamic(phb);
    489
    490		pseries_msi_allocate_domains(phb);
    491	}
    492
    493	of_node_put(root);
    494
    495	/*
    496	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
    497	 * in chosen.
    498	 */
    499	of_pci_check_probe_only();
    500}
    501
    502static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
    503{
    504	/*
    505	 * The features below are disabled by default, so we instead look to see
    506	 * if firmware has *enabled* them, and set them if so.
    507	 */
    508	if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
    509		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
    510
    511	if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
    512		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
    513
    514	if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
    515		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
    516
    517	if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
    518		security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
    519
    520	if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
    521		security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
    522
    523	if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
    524		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
    525
    526	if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
    527		security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
    528
    529	if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST)
    530		security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST);
    531
    532	if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
    533		security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
    534
    535	if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK)
    536		security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
    537
    538	/*
    539	 * The features below are enabled by default, so we instead look to see
    540	 * if firmware has *disabled* them, and clear them if so.
    541	 * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if
    542	 * H_CPU_BEHAV_FAVOUR_SECURITY is.
    543	 */
    544	if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) {
    545		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
    546		pseries_security_flavor = 0;
    547	} else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
    548		pseries_security_flavor = 1;
    549	else
    550		pseries_security_flavor = 2;
    551
    552	if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
    553		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
    554
    555	if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY)
    556		security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
    557
    558	if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS)
    559		security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
    560
    561	if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER)
    562		security_ftr_clear(SEC_FTR_STF_BARRIER);
    563
    564	if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
    565		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
    566}
    567
    568void pseries_setup_security_mitigations(void)
    569{
    570	struct h_cpu_char_result result;
    571	enum l1d_flush_type types;
    572	bool enable;
    573	long rc;
    574
    575	/*
    576	 * Set features to the defaults assumed by init_cpu_char_feature_flags()
    577	 * so it can set/clear again any features that might have changed after
    578	 * migration, and in case the hypercall fails and it is not even called.
    579	 */
    580	powerpc_security_features = SEC_FTR_DEFAULT;
    581
    582	rc = plpar_get_cpu_characteristics(&result);
    583	if (rc == H_SUCCESS)
    584		init_cpu_char_feature_flags(&result);
    585
    586	/*
    587	 * We're the guest so this doesn't apply to us, clear it to simplify
    588	 * handling of it elsewhere.
    589	 */
    590	security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
    591
    592	types = L1D_FLUSH_FALLBACK;
    593
    594	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
    595		types |= L1D_FLUSH_MTTRIG;
    596
    597	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
    598		types |= L1D_FLUSH_ORI;
    599
    600	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
    601		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
    602
    603	setup_rfi_flush(types, enable);
    604	setup_count_cache_flush();
    605
    606	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
    607		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
    608	setup_entry_flush(enable);
    609
    610	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
    611		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
    612	setup_uaccess_flush(enable);
    613
    614	setup_stf_barrier();
    615}
    616
    617#ifdef CONFIG_PCI_IOV
    618enum rtas_iov_fw_value_map {
    619	NUM_RES_PROPERTY  = 0, /* Number of Resources */
    620	LOW_INT           = 1, /* Lowest 32 bits of Address */
    621	START_OF_ENTRIES  = 2, /* Always start of entry */
    622	APERTURE_PROPERTY = 2, /* Start of entry+ to  Aperture Size */
    623	WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
    624	NEXT_ENTRY        = 7  /* Go to next entry on array */
    625};
    626
    627enum get_iov_fw_value_index {
    628	BAR_ADDRS     = 1,    /*  Get Bar Address */
    629	APERTURE_SIZE = 2,    /*  Get Aperture Size */
    630	WDW_SIZE      = 3     /*  Get Window Size */
    631};
    632
    633static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
    634						enum get_iov_fw_value_index value)
    635{
    636	const int *indexes;
    637	struct device_node *dn = pci_device_to_OF_node(dev);
    638	int i, num_res, ret = 0;
    639
    640	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
    641	if (!indexes)
    642		return  0;
    643
    644	/*
    645	 * First element in the array is the number of Bars
    646	 * returned.  Search through the list to find the matching
    647	 * bar
    648	 */
    649	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
    650	if (resno >= num_res)
    651		return 0; /* or an error */
    652
    653	i = START_OF_ENTRIES + NEXT_ENTRY * resno;
    654	switch (value) {
    655	case BAR_ADDRS:
    656		ret = of_read_number(&indexes[i], 2);
    657		break;
    658	case APERTURE_SIZE:
    659		ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
    660		break;
    661	case WDW_SIZE:
    662		ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
    663		break;
    664	}
    665
    666	return ret;
    667}
    668
    669static void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
    670{
    671	struct resource *res;
    672	resource_size_t base, size;
    673	int i, r, num_res;
    674
    675	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
    676	num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
    677	for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
    678	     i += NEXT_ENTRY, r++) {
    679		res = &dev->resource[r + PCI_IOV_RESOURCES];
    680		base = of_read_number(&indexes[i], 2);
    681		size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
    682		res->flags = pci_parse_of_flags(of_read_number
    683						(&indexes[i + LOW_INT], 1), 0);
    684		res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
    685		res->name = pci_name(dev);
    686		res->start = base;
    687		res->end = base + size - 1;
    688	}
    689}
    690
    691static void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
    692{
    693	struct resource *res, *root, *conflict;
    694	resource_size_t base, size;
    695	int i, r, num_res;
    696
    697	/*
    698	 * First element in the array is the number of Bars
    699	 * returned.  Search through the list to find the matching
    700	 * bars assign them from firmware into resources structure.
    701	 */
    702	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
    703	for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
    704	     i += NEXT_ENTRY, r++) {
    705		res = &dev->resource[r + PCI_IOV_RESOURCES];
    706		base = of_read_number(&indexes[i], 2);
    707		size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
    708		res->name = pci_name(dev);
    709		res->start = base;
    710		res->end = base + size - 1;
    711		root = &iomem_resource;
    712		dev_dbg(&dev->dev,
    713			"pSeries IOV BAR %d: trying firmware assignment %pR\n",
    714			 r + PCI_IOV_RESOURCES, res);
    715		conflict = request_resource_conflict(root, res);
    716		if (conflict) {
    717			dev_info(&dev->dev,
    718				 "BAR %d: %pR conflicts with %s %pR\n",
    719				 r + PCI_IOV_RESOURCES, res,
    720				 conflict->name, conflict);
    721			res->flags |= IORESOURCE_UNSET;
    722		}
    723	}
    724}
    725
    726static void pseries_disable_sriov_resources(struct pci_dev *pdev)
    727{
    728	int i;
    729
    730	pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
    731	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
    732		pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
    733}
    734
    735static void pseries_pci_fixup_resources(struct pci_dev *pdev)
    736{
    737	const int *indexes;
    738	struct device_node *dn = pci_device_to_OF_node(pdev);
    739
    740	/*Firmware must support open sriov otherwise dont configure*/
    741	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
    742	if (indexes)
    743		of_pci_set_vf_bar_size(pdev, indexes);
    744	else
    745		pseries_disable_sriov_resources(pdev);
    746}
    747
    748static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
    749{
    750	const int *indexes;
    751	struct device_node *dn = pci_device_to_OF_node(pdev);
    752
    753	if (!pdev->is_physfn)
    754		return;
    755	/*Firmware must support open sriov otherwise don't configure*/
    756	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
    757	if (indexes)
    758		of_pci_parse_iov_addrs(pdev, indexes);
    759	else
    760		pseries_disable_sriov_resources(pdev);
    761}
    762
    763static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
    764							  int resno)
    765{
    766	const __be32 *reg;
    767	struct device_node *dn = pci_device_to_OF_node(pdev);
    768
    769	/*Firmware must support open sriov otherwise report regular alignment*/
    770	reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
    771	if (!reg)
    772		return pci_iov_resource_size(pdev, resno);
    773
    774	if (!pdev->is_physfn)
    775		return 0;
    776	return pseries_get_iov_fw_value(pdev,
    777					resno - PCI_IOV_RESOURCES,
    778					APERTURE_SIZE);
    779}
    780#endif
    781
    782static void __init pSeries_setup_arch(void)
    783{
    784	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
    785
    786	/* Discover PIC type and setup ppc_md accordingly */
    787	smp_init_pseries();
    788
    789
    790	if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE))
    791		if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
    792			panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n");
    793
    794
    795	/* openpic global configuration register (64-bit format). */
    796	/* openpic Interrupt Source Unit pointer (64-bit format). */
    797	/* python0 facility area (mmio) (64-bit format) REAL address. */
    798
    799	/* init to some ~sane value until calibrate_delay() runs */
    800	loops_per_jiffy = 50000000;
    801
    802	fwnmi_init();
    803
    804	pseries_setup_security_mitigations();
    805#ifdef CONFIG_PPC_64S_HASH_MMU
    806	pseries_lpar_read_hblkrm_characteristics();
    807#endif
    808
    809	/* By default, only probe PCI (can be overridden by rtas_pci) */
    810	pci_add_flags(PCI_PROBE_ONLY);
    811
    812	/* Find and initialize PCI host bridges */
    813	init_pci_config_tokens();
    814	of_reconfig_notifier_register(&pci_dn_reconfig_nb);
    815
    816	pSeries_nvram_init();
    817
    818	if (firmware_has_feature(FW_FEATURE_LPAR)) {
    819		vpa_init(boot_cpuid);
    820
    821		if (lppaca_shared_proc(get_lppaca())) {
    822			static_branch_enable(&shared_processor);
    823			pv_spinlocks_init();
    824		}
    825
    826		ppc_md.power_save = pseries_lpar_idle;
    827		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
    828#ifdef CONFIG_PCI_IOV
    829		ppc_md.pcibios_fixup_resources =
    830			pseries_pci_fixup_resources;
    831		ppc_md.pcibios_fixup_sriov =
    832			pseries_pci_fixup_iov_resources;
    833		ppc_md.pcibios_iov_resource_alignment =
    834			pseries_pci_iov_resource_alignment;
    835#endif
    836	} else {
    837		/* No special idle routine */
    838		ppc_md.enable_pmcs = power4_enable_pmcs;
    839	}
    840
    841	ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
    842	pseries_rng_init();
    843}
    844
    845static void pseries_panic(char *str)
    846{
    847	panic_flush_kmsg_end();
    848	rtas_os_term(str);
    849}
    850
    851static int __init pSeries_init_panel(void)
    852{
    853	/* Manually leave the kernel version on the panel. */
    854#ifdef __BIG_ENDIAN__
    855	ppc_md.progress("Linux ppc64\n", 0);
    856#else
    857	ppc_md.progress("Linux ppc64le\n", 0);
    858#endif
    859	ppc_md.progress(init_utsname()->version, 0);
    860
    861	return 0;
    862}
    863machine_arch_initcall(pseries, pSeries_init_panel);
    864
    865static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
    866{
    867	return plpar_hcall_norets(H_SET_DABR, dabr);
    868}
    869
    870static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
    871{
    872	/* Have to set at least one bit in the DABRX according to PAPR */
    873	if (dabrx == 0 && dabr == 0)
    874		dabrx = DABRX_USER;
    875	/* PAPR says we can only set kernel and user bits */
    876	dabrx &= DABRX_KERNEL | DABRX_USER;
    877
    878	return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
    879}
    880
    881static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx)
    882{
    883	/* PAPR says we can't set HYP */
    884	dawrx &= ~DAWRX_HYP;
    885
    886	if (nr == 0)
    887		return plpar_set_watchpoint0(dawr, dawrx);
    888	else
    889		return plpar_set_watchpoint1(dawr, dawrx);
    890}
    891
    892#define CMO_CHARACTERISTICS_TOKEN 44
    893#define CMO_MAXLENGTH 1026
    894
    895void pSeries_coalesce_init(void)
    896{
    897	struct hvcall_mpp_x_data mpp_x_data;
    898
    899	if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
    900		powerpc_firmware_features |= FW_FEATURE_XCMO;
    901	else
    902		powerpc_firmware_features &= ~FW_FEATURE_XCMO;
    903}
    904
    905/**
    906 * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
    907 * handle that here. (Stolen from parse_system_parameter_string)
    908 */
    909static void __init pSeries_cmo_feature_init(void)
    910{
    911	char *ptr, *key, *value, *end;
    912	int call_status;
    913	int page_order = IOMMU_PAGE_SHIFT_4K;
    914
    915	pr_debug(" -> fw_cmo_feature_init()\n");
    916	spin_lock(&rtas_data_buf_lock);
    917	memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
    918	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
    919				NULL,
    920				CMO_CHARACTERISTICS_TOKEN,
    921				__pa(rtas_data_buf),
    922				RTAS_DATA_BUF_SIZE);
    923
    924	if (call_status != 0) {
    925		spin_unlock(&rtas_data_buf_lock);
    926		pr_debug("CMO not available\n");
    927		pr_debug(" <- fw_cmo_feature_init()\n");
    928		return;
    929	}
    930
    931	end = rtas_data_buf + CMO_MAXLENGTH - 2;
    932	ptr = rtas_data_buf + 2;	/* step over strlen value */
    933	key = value = ptr;
    934
    935	while (*ptr && (ptr <= end)) {
    936		/* Separate the key and value by replacing '=' with '\0' and
    937		 * point the value at the string after the '='
    938		 */
    939		if (ptr[0] == '=') {
    940			ptr[0] = '\0';
    941			value = ptr + 1;
    942		} else if (ptr[0] == '\0' || ptr[0] == ',') {
    943			/* Terminate the string containing the key/value pair */
    944			ptr[0] = '\0';
    945
    946			if (key == value) {
    947				pr_debug("Malformed key/value pair\n");
    948				/* Never found a '=', end processing */
    949				break;
    950			}
    951
    952			if (0 == strcmp(key, "CMOPageSize"))
    953				page_order = simple_strtol(value, NULL, 10);
    954			else if (0 == strcmp(key, "PrPSP"))
    955				CMO_PrPSP = simple_strtol(value, NULL, 10);
    956			else if (0 == strcmp(key, "SecPSP"))
    957				CMO_SecPSP = simple_strtol(value, NULL, 10);
    958			value = key = ptr + 1;
    959		}
    960		ptr++;
    961	}
    962
    963	/* Page size is returned as the power of 2 of the page size,
    964	 * convert to the page size in bytes before returning
    965	 */
    966	CMO_PageSize = 1 << page_order;
    967	pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);
    968
    969	if (CMO_PrPSP != -1 || CMO_SecPSP != -1) {
    970		pr_info("CMO enabled\n");
    971		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
    972		         CMO_SecPSP);
    973		powerpc_firmware_features |= FW_FEATURE_CMO;
    974		pSeries_coalesce_init();
    975	} else
    976		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
    977		         CMO_SecPSP);
    978	spin_unlock(&rtas_data_buf_lock);
    979	pr_debug(" <- fw_cmo_feature_init()\n");
    980}
    981
    982/*
    983 * Early initialization.  Relocation is on but do not reference unbolted pages
    984 */
    985static void __init pseries_init(void)
    986{
    987	pr_debug(" -> pseries_init()\n");
    988
    989#ifdef CONFIG_HVC_CONSOLE
    990	if (firmware_has_feature(FW_FEATURE_LPAR))
    991		hvc_vio_init_early();
    992#endif
    993	if (firmware_has_feature(FW_FEATURE_XDABR))
    994		ppc_md.set_dabr = pseries_set_xdabr;
    995	else if (firmware_has_feature(FW_FEATURE_DABR))
    996		ppc_md.set_dabr = pseries_set_dabr;
    997
    998	if (firmware_has_feature(FW_FEATURE_SET_MODE))
    999		ppc_md.set_dawr = pseries_set_dawr;
   1000
   1001	pSeries_cmo_feature_init();
   1002	iommu_init_early_pSeries();
   1003
   1004	pr_debug(" <- pseries_init()\n");
   1005}
   1006
   1007/**
   1008 * pseries_power_off - tell firmware about how to power off the system.
   1009 *
   1010 * This function calls either the power-off rtas token in normal cases
   1011 * or the ibm,power-off-ups token (if present & requested) in case of
   1012 * a power failure. If power-off token is used, power on will only be
   1013 * possible with power button press. If ibm,power-off-ups token is used
   1014 * it will allow auto poweron after power is restored.
   1015 */
   1016static void pseries_power_off(void)
   1017{
   1018	int rc;
   1019	int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups");
   1020
   1021	if (rtas_flash_term_hook)
   1022		rtas_flash_term_hook(SYS_POWER_OFF);
   1023
   1024	if (rtas_poweron_auto == 0 ||
   1025		rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) {
   1026		rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1);
   1027		printk(KERN_INFO "RTAS power-off returned %d\n", rc);
   1028	} else {
   1029		rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL);
   1030		printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc);
   1031	}
   1032	for (;;);
   1033}
   1034
   1035static int __init pSeries_probe(void)
   1036{
   1037	if (!of_node_is_type(of_root, "chrp"))
   1038		return 0;
   1039
   1040	/* Cell blades firmware claims to be chrp while it's not. Until this
   1041	 * is fixed, we need to avoid those here.
   1042	 */
   1043	if (of_machine_is_compatible("IBM,CPBW-1.0") ||
   1044	    of_machine_is_compatible("IBM,CBEA"))
   1045		return 0;
   1046
   1047	pm_power_off = pseries_power_off;
   1048
   1049	pr_debug("Machine is%s LPAR !\n",
   1050	         (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
   1051
   1052	pseries_init();
   1053
   1054	return 1;
   1055}
   1056
   1057static int pSeries_pci_probe_mode(struct pci_bus *bus)
   1058{
   1059	if (firmware_has_feature(FW_FEATURE_LPAR))
   1060		return PCI_PROBE_DEVTREE;
   1061	return PCI_PROBE_NORMAL;
   1062}
   1063
   1064struct pci_controller_ops pseries_pci_controller_ops = {
   1065	.probe_mode		= pSeries_pci_probe_mode,
   1066};
   1067
   1068define_machine(pseries) {
   1069	.name			= "pSeries",
   1070	.probe			= pSeries_probe,
   1071	.setup_arch		= pSeries_setup_arch,
   1072	.init_IRQ		= pseries_init_irq,
   1073	.show_cpuinfo		= pSeries_show_cpuinfo,
   1074	.log_error		= pSeries_log_error,
   1075	.discover_phbs		= pSeries_discover_phbs,
   1076	.pcibios_fixup		= pSeries_final_fixup,
   1077	.restart		= rtas_restart,
   1078	.halt			= rtas_halt,
   1079	.panic			= pseries_panic,
   1080	.get_boot_time		= rtas_get_boot_time,
   1081	.get_rtc_time		= rtas_get_rtc_time,
   1082	.set_rtc_time		= rtas_set_rtc_time,
   1083	.calibrate_decr		= generic_calibrate_decr,
   1084	.progress		= rtas_progress,
   1085	.system_reset_exception = pSeries_system_reset_exception,
   1086	.machine_check_early	= pseries_machine_check_realmode,
   1087	.machine_check_exception = pSeries_machine_check_exception,
   1088	.machine_check_log_err	= pSeries_machine_check_log_err,
   1089#ifdef CONFIG_KEXEC_CORE
   1090	.machine_kexec          = pseries_machine_kexec,
   1091	.kexec_cpu_down         = pseries_kexec_cpu_down,
   1092#endif
   1093#ifdef CONFIG_MEMORY_HOTPLUG
   1094	.memory_block_size	= pseries_memory_block_size,
   1095#endif
   1096};