cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

core.c (24885B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Resource Director Technology(RDT)
      4 * - Cache Allocation code.
      5 *
      6 * Copyright (C) 2016 Intel Corporation
      7 *
      8 * Authors:
      9 *    Fenghua Yu <fenghua.yu@intel.com>
     10 *    Tony Luck <tony.luck@intel.com>
     11 *    Vikas Shivappa <vikas.shivappa@intel.com>
     12 *
     13 * More information about RDT be found in the Intel (R) x86 Architecture
     14 * Software Developer Manual June 2016, volume 3, section 17.17.
     15 */
     16
     17#define pr_fmt(fmt)	"resctrl: " fmt
     18
     19#include <linux/slab.h>
     20#include <linux/err.h>
     21#include <linux/cacheinfo.h>
     22#include <linux/cpuhotplug.h>
     23
     24#include <asm/intel-family.h>
     25#include <asm/resctrl.h>
     26#include "internal.h"
     27
     28/* Mutex to protect rdtgroup access. */
     29DEFINE_MUTEX(rdtgroup_mutex);
     30
     31/*
     32 * The cached resctrl_pqr_state is strictly per CPU and can never be
     33 * updated from a remote CPU. Functions which modify the state
     34 * are called with interrupts disabled and no preemption, which
     35 * is sufficient for the protection.
     36 */
     37DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
     38
     39/*
     40 * Used to store the max resource name width and max resource data width
     41 * to display the schemata in a tabular format
     42 */
     43int max_name_width, max_data_width;
     44
     45/*
     46 * Global boolean for rdt_alloc which is true if any
     47 * resource allocation is enabled.
     48 */
     49bool rdt_alloc_capable;
     50
     51static void
     52mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
     53		struct rdt_resource *r);
     54static void
     55cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
     56static void
     57mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
     58	      struct rdt_resource *r);
     59
     60#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)
     61
     62struct rdt_hw_resource rdt_resources_all[] = {
     63	[RDT_RESOURCE_L3] =
     64	{
     65		.r_resctrl = {
     66			.rid			= RDT_RESOURCE_L3,
     67			.name			= "L3",
     68			.cache_level		= 3,
     69			.cache = {
     70				.min_cbm_bits	= 1,
     71			},
     72			.domains		= domain_init(RDT_RESOURCE_L3),
     73			.parse_ctrlval		= parse_cbm,
     74			.format_str		= "%d=%0*x",
     75			.fflags			= RFTYPE_RES_CACHE,
     76		},
     77		.msr_base		= MSR_IA32_L3_CBM_BASE,
     78		.msr_update		= cat_wrmsr,
     79	},
     80	[RDT_RESOURCE_L2] =
     81	{
     82		.r_resctrl = {
     83			.rid			= RDT_RESOURCE_L2,
     84			.name			= "L2",
     85			.cache_level		= 2,
     86			.cache = {
     87				.min_cbm_bits	= 1,
     88			},
     89			.domains		= domain_init(RDT_RESOURCE_L2),
     90			.parse_ctrlval		= parse_cbm,
     91			.format_str		= "%d=%0*x",
     92			.fflags			= RFTYPE_RES_CACHE,
     93		},
     94		.msr_base		= MSR_IA32_L2_CBM_BASE,
     95		.msr_update		= cat_wrmsr,
     96	},
     97	[RDT_RESOURCE_MBA] =
     98	{
     99		.r_resctrl = {
    100			.rid			= RDT_RESOURCE_MBA,
    101			.name			= "MB",
    102			.cache_level		= 3,
    103			.domains		= domain_init(RDT_RESOURCE_MBA),
    104			.parse_ctrlval		= parse_bw,
    105			.format_str		= "%d=%*u",
    106			.fflags			= RFTYPE_RES_MB,
    107		},
    108	},
    109};
    110
    111/*
    112 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
    113 * as they do not have CPUID enumeration support for Cache allocation.
    114 * The check for Vendor/Family/Model is not enough to guarantee that
    115 * the MSRs won't #GP fault because only the following SKUs support
    116 * CAT:
    117 *	Intel(R) Xeon(R)  CPU E5-2658  v3  @  2.20GHz
    118 *	Intel(R) Xeon(R)  CPU E5-2648L v3  @  1.80GHz
    119 *	Intel(R) Xeon(R)  CPU E5-2628L v3  @  2.00GHz
    120 *	Intel(R) Xeon(R)  CPU E5-2618L v3  @  2.30GHz
    121 *	Intel(R) Xeon(R)  CPU E5-2608L v3  @  2.00GHz
    122 *	Intel(R) Xeon(R)  CPU E5-2658A v3  @  2.20GHz
    123 *
    124 * Probe by trying to write the first of the L3 cache mask registers
    125 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length
    126 * is always 20 on hsw server parts. The minimum cache bitmask length
    127 * allowed for HSW server is always 2 bits. Hardcode all of them.
    128 */
    129static inline void cache_alloc_hsw_probe(void)
    130{
    131	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
    132	struct rdt_resource *r  = &hw_res->r_resctrl;
    133	u32 l, h, max_cbm = BIT_MASK(20) - 1;
    134
    135	if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0))
    136		return;
    137
    138	rdmsr(MSR_IA32_L3_CBM_BASE, l, h);
    139
    140	/* If all the bits were set in MSR, return success */
    141	if (l != max_cbm)
    142		return;
    143
    144	hw_res->num_closid = 4;
    145	r->default_ctrl = max_cbm;
    146	r->cache.cbm_len = 20;
    147	r->cache.shareable_bits = 0xc0000;
    148	r->cache.min_cbm_bits = 2;
    149	r->alloc_capable = true;
    150	r->alloc_enabled = true;
    151
    152	rdt_alloc_capable = true;
    153}
    154
    155bool is_mba_sc(struct rdt_resource *r)
    156{
    157	if (!r)
    158		return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc;
    159
    160	return r->membw.mba_sc;
    161}
    162
    163/*
    164 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
    165 * exposed to user interface and the h/w understandable delay values.
    166 *
    167 * The non-linear delay values have the granularity of power of two
    168 * and also the h/w does not guarantee a curve for configured delay
    169 * values vs. actual b/w enforced.
    170 * Hence we need a mapping that is pre calibrated so the user can
    171 * express the memory b/w as a percentage value.
    172 */
    173static inline bool rdt_get_mb_table(struct rdt_resource *r)
    174{
    175	/*
    176	 * There are no Intel SKUs as of now to support non-linear delay.
    177	 */
    178	pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
    179		boot_cpu_data.x86, boot_cpu_data.x86_model);
    180
    181	return false;
    182}
    183
    184static bool __get_mem_config_intel(struct rdt_resource *r)
    185{
    186	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
    187	union cpuid_0x10_3_eax eax;
    188	union cpuid_0x10_x_edx edx;
    189	u32 ebx, ecx, max_delay;
    190
    191	cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
    192	hw_res->num_closid = edx.split.cos_max + 1;
    193	max_delay = eax.split.max_delay + 1;
    194	r->default_ctrl = MAX_MBA_BW;
    195	r->membw.arch_needs_linear = true;
    196	if (ecx & MBA_IS_LINEAR) {
    197		r->membw.delay_linear = true;
    198		r->membw.min_bw = MAX_MBA_BW - max_delay;
    199		r->membw.bw_gran = MAX_MBA_BW - max_delay;
    200	} else {
    201		if (!rdt_get_mb_table(r))
    202			return false;
    203		r->membw.arch_needs_linear = false;
    204	}
    205	r->data_width = 3;
    206
    207	if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
    208		r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
    209	else
    210		r->membw.throttle_mode = THREAD_THROTTLE_MAX;
    211	thread_throttle_mode_init();
    212
    213	r->alloc_capable = true;
    214	r->alloc_enabled = true;
    215
    216	return true;
    217}
    218
    219static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
    220{
    221	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
    222	union cpuid_0x10_3_eax eax;
    223	union cpuid_0x10_x_edx edx;
    224	u32 ebx, ecx;
    225
    226	cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full);
    227	hw_res->num_closid = edx.split.cos_max + 1;
    228	r->default_ctrl = MAX_MBA_BW_AMD;
    229
    230	/* AMD does not use delay */
    231	r->membw.delay_linear = false;
    232	r->membw.arch_needs_linear = false;
    233
    234	/*
    235	 * AMD does not use memory delay throttle model to control
    236	 * the allocation like Intel does.
    237	 */
    238	r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
    239	r->membw.min_bw = 0;
    240	r->membw.bw_gran = 1;
    241	/* Max value is 2048, Data width should be 4 in decimal */
    242	r->data_width = 4;
    243
    244	r->alloc_capable = true;
    245	r->alloc_enabled = true;
    246
    247	return true;
    248}
    249
    250static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
    251{
    252	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
    253	union cpuid_0x10_1_eax eax;
    254	union cpuid_0x10_x_edx edx;
    255	u32 ebx, ecx;
    256
    257	cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
    258	hw_res->num_closid = edx.split.cos_max + 1;
    259	r->cache.cbm_len = eax.split.cbm_len + 1;
    260	r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
    261	r->cache.shareable_bits = ebx & r->default_ctrl;
    262	r->data_width = (r->cache.cbm_len + 3) / 4;
    263	r->alloc_capable = true;
    264	r->alloc_enabled = true;
    265}
    266
    267static void rdt_get_cdp_config(int level)
    268{
    269	/*
    270	 * By default, CDP is disabled. CDP can be enabled by mount parameter
    271	 * "cdp" during resctrl file system mount time.
    272	 */
    273	rdt_resources_all[level].cdp_enabled = false;
    274	rdt_resources_all[level].r_resctrl.cdp_capable = true;
    275}
    276
    277static void rdt_get_cdp_l3_config(void)
    278{
    279	rdt_get_cdp_config(RDT_RESOURCE_L3);
    280}
    281
    282static void rdt_get_cdp_l2_config(void)
    283{
    284	rdt_get_cdp_config(RDT_RESOURCE_L2);
    285}
    286
    287static void
    288mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
    289{
    290	unsigned int i;
    291	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
    292	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
    293
    294	for (i = m->low; i < m->high; i++)
    295		wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
    296}
    297
    298/*
    299 * Map the memory b/w percentage value to delay values
    300 * that can be written to QOS_MSRs.
    301 * There are currently no SKUs which support non linear delay values.
    302 */
    303u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
    304{
    305	if (r->membw.delay_linear)
    306		return MAX_MBA_BW - bw;
    307
    308	pr_warn_once("Non Linear delay-bw map not supported but queried\n");
    309	return r->default_ctrl;
    310}
    311
    312static void
    313mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
    314		struct rdt_resource *r)
    315{
    316	unsigned int i;
    317	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
    318	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
    319
    320	/*  Write the delay values for mba. */
    321	for (i = m->low; i < m->high; i++)
    322		wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r));
    323}
    324
    325static void
    326cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
    327{
    328	unsigned int i;
    329	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
    330	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
    331
    332	for (i = m->low; i < m->high; i++)
    333		wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
    334}
    335
    336struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
    337{
    338	struct rdt_domain *d;
    339
    340	list_for_each_entry(d, &r->domains, list) {
    341		/* Find the domain that contains this CPU */
    342		if (cpumask_test_cpu(cpu, &d->cpu_mask))
    343			return d;
    344	}
    345
    346	return NULL;
    347}
    348
    349u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
    350{
    351	return resctrl_to_arch_res(r)->num_closid;
    352}
    353
    354void rdt_ctrl_update(void *arg)
    355{
    356	struct msr_param *m = arg;
    357	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
    358	struct rdt_resource *r = m->res;
    359	int cpu = smp_processor_id();
    360	struct rdt_domain *d;
    361
    362	d = get_domain_from_cpu(cpu, r);
    363	if (d) {
    364		hw_res->msr_update(d, m, r);
    365		return;
    366	}
    367	pr_warn_once("cpu %d not found in any domain for resource %s\n",
    368		     cpu, r->name);
    369}
    370
    371/*
    372 * rdt_find_domain - Find a domain in a resource that matches input resource id
    373 *
    374 * Search resource r's domain list to find the resource id. If the resource
    375 * id is found in a domain, return the domain. Otherwise, if requested by
    376 * caller, return the first domain whose id is bigger than the input id.
    377 * The domain list is sorted by id in ascending order.
    378 */
    379struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
    380				   struct list_head **pos)
    381{
    382	struct rdt_domain *d;
    383	struct list_head *l;
    384
    385	if (id < 0)
    386		return ERR_PTR(-ENODEV);
    387
    388	list_for_each(l, &r->domains) {
    389		d = list_entry(l, struct rdt_domain, list);
    390		/* When id is found, return its domain. */
    391		if (id == d->id)
    392			return d;
    393		/* Stop searching when finding id's position in sorted list. */
    394		if (id < d->id)
    395			break;
    396	}
    397
    398	if (pos)
    399		*pos = l;
    400
    401	return NULL;
    402}
    403
    404void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
    405{
    406	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
    407	int i;
    408
    409	/*
    410	 * Initialize the Control MSRs to having no control.
    411	 * For Cache Allocation: Set all bits in cbm
    412	 * For Memory Allocation: Set b/w requested to 100%
    413	 * and the bandwidth in MBps to U32_MAX
    414	 */
    415	for (i = 0; i < hw_res->num_closid; i++, dc++, dm++) {
    416		*dc = r->default_ctrl;
    417		*dm = MBA_MAX_MBPS;
    418	}
    419}
    420
    421static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
    422{
    423	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
    424	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
    425	struct msr_param m;
    426	u32 *dc, *dm;
    427
    428	dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val),
    429			   GFP_KERNEL);
    430	if (!dc)
    431		return -ENOMEM;
    432
    433	dm = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->mbps_val),
    434			   GFP_KERNEL);
    435	if (!dm) {
    436		kfree(dc);
    437		return -ENOMEM;
    438	}
    439
    440	hw_dom->ctrl_val = dc;
    441	hw_dom->mbps_val = dm;
    442	setup_default_ctrlval(r, dc, dm);
    443
    444	m.low = 0;
    445	m.high = hw_res->num_closid;
    446	hw_res->msr_update(d, &m, r);
    447	return 0;
    448}
    449
    450static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
    451{
    452	size_t tsize;
    453
    454	if (is_llc_occupancy_enabled()) {
    455		d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
    456		if (!d->rmid_busy_llc)
    457			return -ENOMEM;
    458		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
    459	}
    460	if (is_mbm_total_enabled()) {
    461		tsize = sizeof(*d->mbm_total);
    462		d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
    463		if (!d->mbm_total) {
    464			bitmap_free(d->rmid_busy_llc);
    465			return -ENOMEM;
    466		}
    467	}
    468	if (is_mbm_local_enabled()) {
    469		tsize = sizeof(*d->mbm_local);
    470		d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
    471		if (!d->mbm_local) {
    472			bitmap_free(d->rmid_busy_llc);
    473			kfree(d->mbm_total);
    474			return -ENOMEM;
    475		}
    476	}
    477
    478	if (is_mbm_enabled()) {
    479		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
    480		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
    481	}
    482
    483	return 0;
    484}
    485
    486/*
    487 * domain_add_cpu - Add a cpu to a resource's domain list.
    488 *
    489 * If an existing domain in the resource r's domain list matches the cpu's
    490 * resource id, add the cpu in the domain.
    491 *
    492 * Otherwise, a new domain is allocated and inserted into the right position
    493 * in the domain list sorted by id in ascending order.
    494 *
    495 * The order in the domain list is visible to users when we print entries
    496 * in the schemata file and schemata input is validated to have the same order
    497 * as this list.
    498 */
    499static void domain_add_cpu(int cpu, struct rdt_resource *r)
    500{
    501	int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
    502	struct list_head *add_pos = NULL;
    503	struct rdt_hw_domain *hw_dom;
    504	struct rdt_domain *d;
    505
    506	d = rdt_find_domain(r, id, &add_pos);
    507	if (IS_ERR(d)) {
    508		pr_warn("Couldn't find cache id for CPU %d\n", cpu);
    509		return;
    510	}
    511
    512	if (d) {
    513		cpumask_set_cpu(cpu, &d->cpu_mask);
    514		if (r->cache.arch_has_per_cpu_cfg)
    515			rdt_domain_reconfigure_cdp(r);
    516		return;
    517	}
    518
    519	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
    520	if (!hw_dom)
    521		return;
    522
    523	d = &hw_dom->d_resctrl;
    524	d->id = id;
    525	cpumask_set_cpu(cpu, &d->cpu_mask);
    526
    527	rdt_domain_reconfigure_cdp(r);
    528
    529	if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
    530		kfree(hw_dom);
    531		return;
    532	}
    533
    534	if (r->mon_capable && domain_setup_mon_state(r, d)) {
    535		kfree(hw_dom->ctrl_val);
    536		kfree(hw_dom->mbps_val);
    537		kfree(hw_dom);
    538		return;
    539	}
    540
    541	list_add_tail(&d->list, add_pos);
    542
    543	/*
    544	 * If resctrl is mounted, add
    545	 * per domain monitor data directories.
    546	 */
    547	if (static_branch_unlikely(&rdt_mon_enable_key))
    548		mkdir_mondata_subdir_allrdtgrp(r, d);
    549}
    550
    551static void domain_remove_cpu(int cpu, struct rdt_resource *r)
    552{
    553	int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
    554	struct rdt_hw_domain *hw_dom;
    555	struct rdt_domain *d;
    556
    557	d = rdt_find_domain(r, id, NULL);
    558	if (IS_ERR_OR_NULL(d)) {
    559		pr_warn("Couldn't find cache id for CPU %d\n", cpu);
    560		return;
    561	}
    562	hw_dom = resctrl_to_arch_dom(d);
    563
    564	cpumask_clear_cpu(cpu, &d->cpu_mask);
    565	if (cpumask_empty(&d->cpu_mask)) {
    566		/*
    567		 * If resctrl is mounted, remove all the
    568		 * per domain monitor data directories.
    569		 */
    570		if (static_branch_unlikely(&rdt_mon_enable_key))
    571			rmdir_mondata_subdir_allrdtgrp(r, d->id);
    572		list_del(&d->list);
    573		if (r->mon_capable && is_mbm_enabled())
    574			cancel_delayed_work(&d->mbm_over);
    575		if (is_llc_occupancy_enabled() &&  has_busy_rmid(r, d)) {
    576			/*
    577			 * When a package is going down, forcefully
    578			 * decrement rmid->ebusy. There is no way to know
    579			 * that the L3 was flushed and hence may lead to
    580			 * incorrect counts in rare scenarios, but leaving
    581			 * the RMID as busy creates RMID leaks if the
    582			 * package never comes back.
    583			 */
    584			__check_limbo(d, true);
    585			cancel_delayed_work(&d->cqm_limbo);
    586		}
    587
    588		/*
    589		 * rdt_domain "d" is going to be freed below, so clear
    590		 * its pointer from pseudo_lock_region struct.
    591		 */
    592		if (d->plr)
    593			d->plr->d = NULL;
    594
    595		kfree(hw_dom->ctrl_val);
    596		kfree(hw_dom->mbps_val);
    597		bitmap_free(d->rmid_busy_llc);
    598		kfree(d->mbm_total);
    599		kfree(d->mbm_local);
    600		kfree(hw_dom);
    601		return;
    602	}
    603
    604	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
    605		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
    606			cancel_delayed_work(&d->mbm_over);
    607			mbm_setup_overflow_handler(d, 0);
    608		}
    609		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
    610		    has_busy_rmid(r, d)) {
    611			cancel_delayed_work(&d->cqm_limbo);
    612			cqm_setup_limbo_handler(d, 0);
    613		}
    614	}
    615}
    616
    617static void clear_closid_rmid(int cpu)
    618{
    619	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
    620
    621	state->default_closid = 0;
    622	state->default_rmid = 0;
    623	state->cur_closid = 0;
    624	state->cur_rmid = 0;
    625	wrmsr(IA32_PQR_ASSOC, 0, 0);
    626}
    627
    628static int resctrl_online_cpu(unsigned int cpu)
    629{
    630	struct rdt_resource *r;
    631
    632	mutex_lock(&rdtgroup_mutex);
    633	for_each_capable_rdt_resource(r)
    634		domain_add_cpu(cpu, r);
    635	/* The cpu is set in default rdtgroup after online. */
    636	cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
    637	clear_closid_rmid(cpu);
    638	mutex_unlock(&rdtgroup_mutex);
    639
    640	return 0;
    641}
    642
    643static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
    644{
    645	struct rdtgroup *cr;
    646
    647	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
    648		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) {
    649			break;
    650		}
    651	}
    652}
    653
    654static int resctrl_offline_cpu(unsigned int cpu)
    655{
    656	struct rdtgroup *rdtgrp;
    657	struct rdt_resource *r;
    658
    659	mutex_lock(&rdtgroup_mutex);
    660	for_each_capable_rdt_resource(r)
    661		domain_remove_cpu(cpu, r);
    662	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
    663		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
    664			clear_childcpus(rdtgrp, cpu);
    665			break;
    666		}
    667	}
    668	clear_closid_rmid(cpu);
    669	mutex_unlock(&rdtgroup_mutex);
    670
    671	return 0;
    672}
    673
    674/*
    675 * Choose a width for the resource name and resource data based on the
    676 * resource that has widest name and cbm.
    677 */
    678static __init void rdt_init_padding(void)
    679{
    680	struct rdt_resource *r;
    681
    682	for_each_alloc_capable_rdt_resource(r) {
    683		if (r->data_width > max_data_width)
    684			max_data_width = r->data_width;
    685	}
    686}
    687
    688enum {
    689	RDT_FLAG_CMT,
    690	RDT_FLAG_MBM_TOTAL,
    691	RDT_FLAG_MBM_LOCAL,
    692	RDT_FLAG_L3_CAT,
    693	RDT_FLAG_L3_CDP,
    694	RDT_FLAG_L2_CAT,
    695	RDT_FLAG_L2_CDP,
    696	RDT_FLAG_MBA,
    697};
    698
    699#define RDT_OPT(idx, n, f)	\
    700[idx] = {			\
    701	.name = n,		\
    702	.flag = f		\
    703}
    704
    705struct rdt_options {
    706	char	*name;
    707	int	flag;
    708	bool	force_off, force_on;
    709};
    710
    711static struct rdt_options rdt_options[]  __initdata = {
    712	RDT_OPT(RDT_FLAG_CMT,	    "cmt",	X86_FEATURE_CQM_OCCUP_LLC),
    713	RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
    714	RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
    715	RDT_OPT(RDT_FLAG_L3_CAT,    "l3cat",	X86_FEATURE_CAT_L3),
    716	RDT_OPT(RDT_FLAG_L3_CDP,    "l3cdp",	X86_FEATURE_CDP_L3),
    717	RDT_OPT(RDT_FLAG_L2_CAT,    "l2cat",	X86_FEATURE_CAT_L2),
    718	RDT_OPT(RDT_FLAG_L2_CDP,    "l2cdp",	X86_FEATURE_CDP_L2),
    719	RDT_OPT(RDT_FLAG_MBA,	    "mba",	X86_FEATURE_MBA),
    720};
    721#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
    722
    723static int __init set_rdt_options(char *str)
    724{
    725	struct rdt_options *o;
    726	bool force_off;
    727	char *tok;
    728
    729	if (*str == '=')
    730		str++;
    731	while ((tok = strsep(&str, ",")) != NULL) {
    732		force_off = *tok == '!';
    733		if (force_off)
    734			tok++;
    735		for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
    736			if (strcmp(tok, o->name) == 0) {
    737				if (force_off)
    738					o->force_off = true;
    739				else
    740					o->force_on = true;
    741				break;
    742			}
    743		}
    744	}
    745	return 1;
    746}
    747__setup("rdt", set_rdt_options);
    748
    749static bool __init rdt_cpu_has(int flag)
    750{
    751	bool ret = boot_cpu_has(flag);
    752	struct rdt_options *o;
    753
    754	if (!ret)
    755		return ret;
    756
    757	for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
    758		if (flag == o->flag) {
    759			if (o->force_off)
    760				ret = false;
    761			if (o->force_on)
    762				ret = true;
    763			break;
    764		}
    765	}
    766	return ret;
    767}
    768
    769static __init bool get_mem_config(void)
    770{
    771	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
    772
    773	if (!rdt_cpu_has(X86_FEATURE_MBA))
    774		return false;
    775
    776	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
    777		return __get_mem_config_intel(&hw_res->r_resctrl);
    778	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
    779		return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
    780
    781	return false;
    782}
    783
    784static __init bool get_rdt_alloc_resources(void)
    785{
    786	struct rdt_resource *r;
    787	bool ret = false;
    788
    789	if (rdt_alloc_capable)
    790		return true;
    791
    792	if (!boot_cpu_has(X86_FEATURE_RDT_A))
    793		return false;
    794
    795	if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
    796		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
    797		rdt_get_cache_alloc_cfg(1, r);
    798		if (rdt_cpu_has(X86_FEATURE_CDP_L3))
    799			rdt_get_cdp_l3_config();
    800		ret = true;
    801	}
    802	if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
    803		/* CPUID 0x10.2 fields are same format at 0x10.1 */
    804		r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl;
    805		rdt_get_cache_alloc_cfg(2, r);
    806		if (rdt_cpu_has(X86_FEATURE_CDP_L2))
    807			rdt_get_cdp_l2_config();
    808		ret = true;
    809	}
    810
    811	if (get_mem_config())
    812		ret = true;
    813
    814	return ret;
    815}
    816
    817static __init bool get_rdt_mon_resources(void)
    818{
    819	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
    820
    821	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
    822		rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
    823	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
    824		rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
    825	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
    826		rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
    827
    828	if (!rdt_mon_features)
    829		return false;
    830
    831	return !rdt_get_mon_l3_config(r);
    832}
    833
    834static __init void __check_quirks_intel(void)
    835{
    836	switch (boot_cpu_data.x86_model) {
    837	case INTEL_FAM6_HASWELL_X:
    838		if (!rdt_options[RDT_FLAG_L3_CAT].force_off)
    839			cache_alloc_hsw_probe();
    840		break;
    841	case INTEL_FAM6_SKYLAKE_X:
    842		if (boot_cpu_data.x86_stepping <= 4)
    843			set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
    844		else
    845			set_rdt_options("!l3cat");
    846		fallthrough;
    847	case INTEL_FAM6_BROADWELL_X:
    848		intel_rdt_mbm_apply_quirk();
    849		break;
    850	}
    851}
    852
    853static __init void check_quirks(void)
    854{
    855	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
    856		__check_quirks_intel();
    857}
    858
    859static __init bool get_rdt_resources(void)
    860{
    861	rdt_alloc_capable = get_rdt_alloc_resources();
    862	rdt_mon_capable = get_rdt_mon_resources();
    863
    864	return (rdt_mon_capable || rdt_alloc_capable);
    865}
    866
    867static __init void rdt_init_res_defs_intel(void)
    868{
    869	struct rdt_hw_resource *hw_res;
    870	struct rdt_resource *r;
    871
    872	for_each_rdt_resource(r) {
    873		hw_res = resctrl_to_arch_res(r);
    874
    875		if (r->rid == RDT_RESOURCE_L3 ||
    876		    r->rid == RDT_RESOURCE_L2) {
    877			r->cache.arch_has_sparse_bitmaps = false;
    878			r->cache.arch_has_empty_bitmaps = false;
    879			r->cache.arch_has_per_cpu_cfg = false;
    880		} else if (r->rid == RDT_RESOURCE_MBA) {
    881			hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
    882			hw_res->msr_update = mba_wrmsr_intel;
    883		}
    884	}
    885}
    886
    887static __init void rdt_init_res_defs_amd(void)
    888{
    889	struct rdt_hw_resource *hw_res;
    890	struct rdt_resource *r;
    891
    892	for_each_rdt_resource(r) {
    893		hw_res = resctrl_to_arch_res(r);
    894
    895		if (r->rid == RDT_RESOURCE_L3 ||
    896		    r->rid == RDT_RESOURCE_L2) {
    897			r->cache.arch_has_sparse_bitmaps = true;
    898			r->cache.arch_has_empty_bitmaps = true;
    899			r->cache.arch_has_per_cpu_cfg = true;
    900		} else if (r->rid == RDT_RESOURCE_MBA) {
    901			hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
    902			hw_res->msr_update = mba_wrmsr_amd;
    903		}
    904	}
    905}
    906
    907static __init void rdt_init_res_defs(void)
    908{
    909	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
    910		rdt_init_res_defs_intel();
    911	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
    912		rdt_init_res_defs_amd();
    913}
    914
    915static enum cpuhp_state rdt_online;
    916
    917/* Runs once on the BSP during boot. */
    918void resctrl_cpu_detect(struct cpuinfo_x86 *c)
    919{
    920	if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
    921		c->x86_cache_max_rmid  = -1;
    922		c->x86_cache_occ_scale = -1;
    923		c->x86_cache_mbm_width_offset = -1;
    924		return;
    925	}
    926
    927	/* will be overridden if occupancy monitoring exists */
    928	c->x86_cache_max_rmid = cpuid_ebx(0xf);
    929
    930	if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
    931	    cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
    932	    cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
    933		u32 eax, ebx, ecx, edx;
    934
    935		/* QoS sub-leaf, EAX=0Fh, ECX=1 */
    936		cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
    937
    938		c->x86_cache_max_rmid  = ecx;
    939		c->x86_cache_occ_scale = ebx;
    940		c->x86_cache_mbm_width_offset = eax & 0xff;
    941
    942		if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset)
    943			c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD;
    944	}
    945}
    946
    947static int __init resctrl_late_init(void)
    948{
    949	struct rdt_resource *r;
    950	int state, ret;
    951
    952	/*
    953	 * Initialize functions(or definitions) that are different
    954	 * between vendors here.
    955	 */
    956	rdt_init_res_defs();
    957
    958	check_quirks();
    959
    960	if (!get_rdt_resources())
    961		return -ENODEV;
    962
    963	rdt_init_padding();
    964
    965	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
    966				  "x86/resctrl/cat:online:",
    967				  resctrl_online_cpu, resctrl_offline_cpu);
    968	if (state < 0)
    969		return state;
    970
    971	ret = rdtgroup_init();
    972	if (ret) {
    973		cpuhp_remove_state(state);
    974		return ret;
    975	}
    976	rdt_online = state;
    977
    978	for_each_alloc_capable_rdt_resource(r)
    979		pr_info("%s allocation detected\n", r->name);
    980
    981	for_each_mon_capable_rdt_resource(r)
    982		pr_info("%s monitoring detected\n", r->name);
    983
    984	return 0;
    985}
    986
    987late_initcall(resctrl_late_init);
    988
    989static void __exit resctrl_exit(void)
    990{
    991	cpuhp_remove_state(rdt_online);
    992	rdtgroup_exit();
    993}
    994
    995__exitcall(resctrl_exit);