cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cacheinfo.c (28949B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *	Routines to identify caches on Intel CPU.
      4 *
      5 *	Changes:
      6 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
      7 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
      8 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
      9 */
     10
     11#include <linux/slab.h>
     12#include <linux/cacheinfo.h>
     13#include <linux/cpu.h>
     14#include <linux/sched.h>
     15#include <linux/capability.h>
     16#include <linux/sysfs.h>
     17#include <linux/pci.h>
     18
     19#include <asm/cpufeature.h>
     20#include <asm/cacheinfo.h>
     21#include <asm/amd_nb.h>
     22#include <asm/smp.h>
     23
     24#include "cpu.h"
     25
     26#define LVL_1_INST	1
     27#define LVL_1_DATA	2
     28#define LVL_2		3
     29#define LVL_3		4
     30#define LVL_TRACE	5
     31
     32struct _cache_table {
     33	unsigned char descriptor;
     34	char cache_type;
     35	short size;
     36};
     37
     38#define MB(x)	((x) * 1024)
     39
     40/* All the cache descriptor types we care about (no TLB or
     41   trace cache entries) */
     42
     43static const struct _cache_table cache_table[] =
     44{
     45	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
     46	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
     47	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
     48	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
     49	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
     50	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
     51	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
     52	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
     53	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
     54	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
     55	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
     56	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
     57	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
     58	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
     59	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
     60	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
     61	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
     62	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
     63	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
     64	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
     65	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
     66	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
     67	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
     68	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
     69	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
     70	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
     71	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
     72	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
     73	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
     74	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
     75	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
     76	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
     77	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
     78	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
     79	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
     80	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
     81	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
     82	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
     83	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
     84	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
     85	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
     86	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
     87	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
     88	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
     89	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
     90	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
     91	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
     92	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
     93	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
     94	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
     95	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
     96	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
     97	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
     98	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
     99	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
    100	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
    101	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
    102	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
    103	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
    104	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
    105	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
    106	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
    107	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
    108	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
    109	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
    110	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
    111	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
    112	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
    113	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
    114	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
    115	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
    116	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
    117	{ 0x00, 0, 0}
    118};
    119
    120
    121enum _cache_type {
    122	CTYPE_NULL = 0,
    123	CTYPE_DATA = 1,
    124	CTYPE_INST = 2,
    125	CTYPE_UNIFIED = 3
    126};
    127
    128union _cpuid4_leaf_eax {
    129	struct {
    130		enum _cache_type	type:5;
    131		unsigned int		level:3;
    132		unsigned int		is_self_initializing:1;
    133		unsigned int		is_fully_associative:1;
    134		unsigned int		reserved:4;
    135		unsigned int		num_threads_sharing:12;
    136		unsigned int		num_cores_on_die:6;
    137	} split;
    138	u32 full;
    139};
    140
    141union _cpuid4_leaf_ebx {
    142	struct {
    143		unsigned int		coherency_line_size:12;
    144		unsigned int		physical_line_partition:10;
    145		unsigned int		ways_of_associativity:10;
    146	} split;
    147	u32 full;
    148};
    149
    150union _cpuid4_leaf_ecx {
    151	struct {
    152		unsigned int		number_of_sets:32;
    153	} split;
    154	u32 full;
    155};
    156
    157struct _cpuid4_info_regs {
    158	union _cpuid4_leaf_eax eax;
    159	union _cpuid4_leaf_ebx ebx;
    160	union _cpuid4_leaf_ecx ecx;
    161	unsigned int id;
    162	unsigned long size;
    163	struct amd_northbridge *nb;
    164};
    165
    166static unsigned short num_cache_leaves;
    167
    168/* AMD doesn't have CPUID4. Emulate it here to report the same
    169   information to the user.  This makes some assumptions about the machine:
    170   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
    171
    172   In theory the TLBs could be reported as fake type (they are in "dummy").
    173   Maybe later */
    174union l1_cache {
    175	struct {
    176		unsigned line_size:8;
    177		unsigned lines_per_tag:8;
    178		unsigned assoc:8;
    179		unsigned size_in_kb:8;
    180	};
    181	unsigned val;
    182};
    183
    184union l2_cache {
    185	struct {
    186		unsigned line_size:8;
    187		unsigned lines_per_tag:4;
    188		unsigned assoc:4;
    189		unsigned size_in_kb:16;
    190	};
    191	unsigned val;
    192};
    193
    194union l3_cache {
    195	struct {
    196		unsigned line_size:8;
    197		unsigned lines_per_tag:4;
    198		unsigned assoc:4;
    199		unsigned res:2;
    200		unsigned size_encoded:14;
    201	};
    202	unsigned val;
    203};
    204
    205static const unsigned short assocs[] = {
    206	[1] = 1,
    207	[2] = 2,
    208	[4] = 4,
    209	[6] = 8,
    210	[8] = 16,
    211	[0xa] = 32,
    212	[0xb] = 48,
    213	[0xc] = 64,
    214	[0xd] = 96,
    215	[0xe] = 128,
    216	[0xf] = 0xffff /* fully associative - no way to show this currently */
    217};
    218
    219static const unsigned char levels[] = { 1, 1, 2, 3 };
    220static const unsigned char types[] = { 1, 2, 3, 3 };
    221
    222static const enum cache_type cache_type_map[] = {
    223	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
    224	[CTYPE_DATA] = CACHE_TYPE_DATA,
    225	[CTYPE_INST] = CACHE_TYPE_INST,
    226	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
    227};
    228
    229static void
    230amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
    231		     union _cpuid4_leaf_ebx *ebx,
    232		     union _cpuid4_leaf_ecx *ecx)
    233{
    234	unsigned dummy;
    235	unsigned line_size, lines_per_tag, assoc, size_in_kb;
    236	union l1_cache l1i, l1d;
    237	union l2_cache l2;
    238	union l3_cache l3;
    239	union l1_cache *l1 = &l1d;
    240
    241	eax->full = 0;
    242	ebx->full = 0;
    243	ecx->full = 0;
    244
    245	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
    246	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
    247
    248	switch (leaf) {
    249	case 1:
    250		l1 = &l1i;
    251		fallthrough;
    252	case 0:
    253		if (!l1->val)
    254			return;
    255		assoc = assocs[l1->assoc];
    256		line_size = l1->line_size;
    257		lines_per_tag = l1->lines_per_tag;
    258		size_in_kb = l1->size_in_kb;
    259		break;
    260	case 2:
    261		if (!l2.val)
    262			return;
    263		assoc = assocs[l2.assoc];
    264		line_size = l2.line_size;
    265		lines_per_tag = l2.lines_per_tag;
    266		/* cpu_data has errata corrections for K7 applied */
    267		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
    268		break;
    269	case 3:
    270		if (!l3.val)
    271			return;
    272		assoc = assocs[l3.assoc];
    273		line_size = l3.line_size;
    274		lines_per_tag = l3.lines_per_tag;
    275		size_in_kb = l3.size_encoded * 512;
    276		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
    277			size_in_kb = size_in_kb >> 1;
    278			assoc = assoc >> 1;
    279		}
    280		break;
    281	default:
    282		return;
    283	}
    284
    285	eax->split.is_self_initializing = 1;
    286	eax->split.type = types[leaf];
    287	eax->split.level = levels[leaf];
    288	eax->split.num_threads_sharing = 0;
    289	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
    290
    291
    292	if (assoc == 0xffff)
    293		eax->split.is_fully_associative = 1;
    294	ebx->split.coherency_line_size = line_size - 1;
    295	ebx->split.ways_of_associativity = assoc - 1;
    296	ebx->split.physical_line_partition = lines_per_tag - 1;
    297	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
    298		(ebx->split.ways_of_associativity + 1) - 1;
    299}
    300
    301#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
    302
    303/*
    304 * L3 cache descriptors
    305 */
    306static void amd_calc_l3_indices(struct amd_northbridge *nb)
    307{
    308	struct amd_l3_cache *l3 = &nb->l3_cache;
    309	unsigned int sc0, sc1, sc2, sc3;
    310	u32 val = 0;
    311
    312	pci_read_config_dword(nb->misc, 0x1C4, &val);
    313
    314	/* calculate subcache sizes */
    315	l3->subcaches[0] = sc0 = !(val & BIT(0));
    316	l3->subcaches[1] = sc1 = !(val & BIT(4));
    317
    318	if (boot_cpu_data.x86 == 0x15) {
    319		l3->subcaches[0] = sc0 += !(val & BIT(1));
    320		l3->subcaches[1] = sc1 += !(val & BIT(5));
    321	}
    322
    323	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
    324	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
    325
    326	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
    327}
    328
    329/*
    330 * check whether a slot used for disabling an L3 index is occupied.
    331 * @l3: L3 cache descriptor
    332 * @slot: slot number (0..1)
    333 *
    334 * @returns: the disabled index if used or negative value if slot free.
    335 */
    336static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
    337{
    338	unsigned int reg = 0;
    339
    340	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
    341
    342	/* check whether this slot is activated already */
    343	if (reg & (3UL << 30))
    344		return reg & 0xfff;
    345
    346	return -1;
    347}
    348
    349static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
    350				  unsigned int slot)
    351{
    352	int index;
    353	struct amd_northbridge *nb = this_leaf->priv;
    354
    355	index = amd_get_l3_disable_slot(nb, slot);
    356	if (index >= 0)
    357		return sprintf(buf, "%d\n", index);
    358
    359	return sprintf(buf, "FREE\n");
    360}
    361
    362#define SHOW_CACHE_DISABLE(slot)					\
    363static ssize_t								\
    364cache_disable_##slot##_show(struct device *dev,				\
    365			    struct device_attribute *attr, char *buf)	\
    366{									\
    367	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
    368	return show_cache_disable(this_leaf, buf, slot);		\
    369}
    370SHOW_CACHE_DISABLE(0)
    371SHOW_CACHE_DISABLE(1)
    372
    373static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
    374				 unsigned slot, unsigned long idx)
    375{
    376	int i;
    377
    378	idx |= BIT(30);
    379
    380	/*
    381	 *  disable index in all 4 subcaches
    382	 */
    383	for (i = 0; i < 4; i++) {
    384		u32 reg = idx | (i << 20);
    385
    386		if (!nb->l3_cache.subcaches[i])
    387			continue;
    388
    389		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
    390
    391		/*
    392		 * We need to WBINVD on a core on the node containing the L3
    393		 * cache which indices we disable therefore a simple wbinvd()
    394		 * is not sufficient.
    395		 */
    396		wbinvd_on_cpu(cpu);
    397
    398		reg |= BIT(31);
    399		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
    400	}
    401}
    402
    403/*
    404 * disable a L3 cache index by using a disable-slot
    405 *
    406 * @l3:    L3 cache descriptor
    407 * @cpu:   A CPU on the node containing the L3 cache
    408 * @slot:  slot number (0..1)
    409 * @index: index to disable
    410 *
    411 * @return: 0 on success, error status on failure
    412 */
    413static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
    414			    unsigned slot, unsigned long index)
    415{
    416	int ret = 0;
    417
    418	/*  check if @slot is already used or the index is already disabled */
    419	ret = amd_get_l3_disable_slot(nb, slot);
    420	if (ret >= 0)
    421		return -EEXIST;
    422
    423	if (index > nb->l3_cache.indices)
    424		return -EINVAL;
    425
    426	/* check whether the other slot has disabled the same index already */
    427	if (index == amd_get_l3_disable_slot(nb, !slot))
    428		return -EEXIST;
    429
    430	amd_l3_disable_index(nb, cpu, slot, index);
    431
    432	return 0;
    433}
    434
    435static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
    436				   const char *buf, size_t count,
    437				   unsigned int slot)
    438{
    439	unsigned long val = 0;
    440	int cpu, err = 0;
    441	struct amd_northbridge *nb = this_leaf->priv;
    442
    443	if (!capable(CAP_SYS_ADMIN))
    444		return -EPERM;
    445
    446	cpu = cpumask_first(&this_leaf->shared_cpu_map);
    447
    448	if (kstrtoul(buf, 10, &val) < 0)
    449		return -EINVAL;
    450
    451	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
    452	if (err) {
    453		if (err == -EEXIST)
    454			pr_warn("L3 slot %d in use/index already disabled!\n",
    455				   slot);
    456		return err;
    457	}
    458	return count;
    459}
    460
    461#define STORE_CACHE_DISABLE(slot)					\
    462static ssize_t								\
    463cache_disable_##slot##_store(struct device *dev,			\
    464			     struct device_attribute *attr,		\
    465			     const char *buf, size_t count)		\
    466{									\
    467	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
    468	return store_cache_disable(this_leaf, buf, count, slot);	\
    469}
    470STORE_CACHE_DISABLE(0)
    471STORE_CACHE_DISABLE(1)
    472
    473static ssize_t subcaches_show(struct device *dev,
    474			      struct device_attribute *attr, char *buf)
    475{
    476	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
    477	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
    478
    479	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
    480}
    481
    482static ssize_t subcaches_store(struct device *dev,
    483			       struct device_attribute *attr,
    484			       const char *buf, size_t count)
    485{
    486	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
    487	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
    488	unsigned long val;
    489
    490	if (!capable(CAP_SYS_ADMIN))
    491		return -EPERM;
    492
    493	if (kstrtoul(buf, 16, &val) < 0)
    494		return -EINVAL;
    495
    496	if (amd_set_subcaches(cpu, val))
    497		return -EINVAL;
    498
    499	return count;
    500}
    501
    502static DEVICE_ATTR_RW(cache_disable_0);
    503static DEVICE_ATTR_RW(cache_disable_1);
    504static DEVICE_ATTR_RW(subcaches);
    505
    506static umode_t
    507cache_private_attrs_is_visible(struct kobject *kobj,
    508			       struct attribute *attr, int unused)
    509{
    510	struct device *dev = kobj_to_dev(kobj);
    511	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
    512	umode_t mode = attr->mode;
    513
    514	if (!this_leaf->priv)
    515		return 0;
    516
    517	if ((attr == &dev_attr_subcaches.attr) &&
    518	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
    519		return mode;
    520
    521	if ((attr == &dev_attr_cache_disable_0.attr ||
    522	     attr == &dev_attr_cache_disable_1.attr) &&
    523	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
    524		return mode;
    525
    526	return 0;
    527}
    528
    529static struct attribute_group cache_private_group = {
    530	.is_visible = cache_private_attrs_is_visible,
    531};
    532
    533static void init_amd_l3_attrs(void)
    534{
    535	int n = 1;
    536	static struct attribute **amd_l3_attrs;
    537
    538	if (amd_l3_attrs) /* already initialized */
    539		return;
    540
    541	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
    542		n += 2;
    543	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
    544		n += 1;
    545
    546	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
    547	if (!amd_l3_attrs)
    548		return;
    549
    550	n = 0;
    551	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
    552		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
    553		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
    554	}
    555	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
    556		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
    557
    558	cache_private_group.attrs = amd_l3_attrs;
    559}
    560
    561const struct attribute_group *
    562cache_get_priv_group(struct cacheinfo *this_leaf)
    563{
    564	struct amd_northbridge *nb = this_leaf->priv;
    565
    566	if (this_leaf->level < 3 || !nb)
    567		return NULL;
    568
    569	if (nb && nb->l3_cache.indices)
    570		init_amd_l3_attrs();
    571
    572	return &cache_private_group;
    573}
    574
    575static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
    576{
    577	int node;
    578
    579	/* only for L3, and not in virtualized environments */
    580	if (index < 3)
    581		return;
    582
    583	node = topology_die_id(smp_processor_id());
    584	this_leaf->nb = node_to_amd_nb(node);
    585	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
    586		amd_calc_l3_indices(this_leaf->nb);
    587}
    588#else
    589#define amd_init_l3_cache(x, y)
    590#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
    591
    592static int
    593cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
    594{
    595	union _cpuid4_leaf_eax	eax;
    596	union _cpuid4_leaf_ebx	ebx;
    597	union _cpuid4_leaf_ecx	ecx;
    598	unsigned		edx;
    599
    600	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
    601		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
    602			cpuid_count(0x8000001d, index, &eax.full,
    603				    &ebx.full, &ecx.full, &edx);
    604		else
    605			amd_cpuid4(index, &eax, &ebx, &ecx);
    606		amd_init_l3_cache(this_leaf, index);
    607	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
    608		cpuid_count(0x8000001d, index, &eax.full,
    609			    &ebx.full, &ecx.full, &edx);
    610		amd_init_l3_cache(this_leaf, index);
    611	} else {
    612		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
    613	}
    614
    615	if (eax.split.type == CTYPE_NULL)
    616		return -EIO; /* better error ? */
    617
    618	this_leaf->eax = eax;
    619	this_leaf->ebx = ebx;
    620	this_leaf->ecx = ecx;
    621	this_leaf->size = (ecx.split.number_of_sets          + 1) *
    622			  (ebx.split.coherency_line_size     + 1) *
    623			  (ebx.split.physical_line_partition + 1) *
    624			  (ebx.split.ways_of_associativity   + 1);
    625	return 0;
    626}
    627
    628static int find_num_cache_leaves(struct cpuinfo_x86 *c)
    629{
    630	unsigned int		eax, ebx, ecx, edx, op;
    631	union _cpuid4_leaf_eax	cache_eax;
    632	int 			i = -1;
    633
    634	if (c->x86_vendor == X86_VENDOR_AMD ||
    635	    c->x86_vendor == X86_VENDOR_HYGON)
    636		op = 0x8000001d;
    637	else
    638		op = 4;
    639
    640	do {
    641		++i;
    642		/* Do cpuid(op) loop to find out num_cache_leaves */
    643		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
    644		cache_eax.full = eax;
    645	} while (cache_eax.split.type != CTYPE_NULL);
    646	return i;
    647}
    648
    649void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
    650{
    651	/*
    652	 * We may have multiple LLCs if L3 caches exist, so check if we
    653	 * have an L3 cache by looking at the L3 cache CPUID leaf.
    654	 */
    655	if (!cpuid_edx(0x80000006))
    656		return;
    657
    658	if (c->x86 < 0x17) {
    659		/* LLC is at the node level. */
    660		per_cpu(cpu_llc_id, cpu) = c->cpu_die_id;
    661	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
    662		/*
    663		 * LLC is at the core complex level.
    664		 * Core complex ID is ApicId[3] for these processors.
    665		 */
    666		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
    667	} else {
    668		/*
    669		 * LLC ID is calculated from the number of threads sharing the
    670		 * cache.
    671		 * */
    672		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
    673		u32 llc_index = find_num_cache_leaves(c) - 1;
    674
    675		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
    676		if (eax)
    677			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
    678
    679		if (num_sharing_cache) {
    680			int bits = get_count_order(num_sharing_cache);
    681
    682			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
    683		}
    684	}
    685}
    686
    687void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
    688{
    689	/*
    690	 * We may have multiple LLCs if L3 caches exist, so check if we
    691	 * have an L3 cache by looking at the L3 cache CPUID leaf.
    692	 */
    693	if (!cpuid_edx(0x80000006))
    694		return;
    695
    696	/*
    697	 * LLC is at the core complex level.
    698	 * Core complex ID is ApicId[3] for these processors.
    699	 */
    700	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
    701}
    702
    703void init_amd_cacheinfo(struct cpuinfo_x86 *c)
    704{
    705
    706	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
    707		num_cache_leaves = find_num_cache_leaves(c);
    708	} else if (c->extended_cpuid_level >= 0x80000006) {
    709		if (cpuid_edx(0x80000006) & 0xf000)
    710			num_cache_leaves = 4;
    711		else
    712			num_cache_leaves = 3;
    713	}
    714}
    715
    716void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
    717{
    718	num_cache_leaves = find_num_cache_leaves(c);
    719}
    720
    721void init_intel_cacheinfo(struct cpuinfo_x86 *c)
    722{
    723	/* Cache sizes */
    724	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
    725	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
    726	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
    727	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
    728#ifdef CONFIG_SMP
    729	unsigned int cpu = c->cpu_index;
    730#endif
    731
    732	if (c->cpuid_level > 3) {
    733		static int is_initialized;
    734
    735		if (is_initialized == 0) {
    736			/* Init num_cache_leaves from boot CPU */
    737			num_cache_leaves = find_num_cache_leaves(c);
    738			is_initialized++;
    739		}
    740
    741		/*
    742		 * Whenever possible use cpuid(4), deterministic cache
    743		 * parameters cpuid leaf to find the cache details
    744		 */
    745		for (i = 0; i < num_cache_leaves; i++) {
    746			struct _cpuid4_info_regs this_leaf = {};
    747			int retval;
    748
    749			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
    750			if (retval < 0)
    751				continue;
    752
    753			switch (this_leaf.eax.split.level) {
    754			case 1:
    755				if (this_leaf.eax.split.type == CTYPE_DATA)
    756					new_l1d = this_leaf.size/1024;
    757				else if (this_leaf.eax.split.type == CTYPE_INST)
    758					new_l1i = this_leaf.size/1024;
    759				break;
    760			case 2:
    761				new_l2 = this_leaf.size/1024;
    762				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
    763				index_msb = get_count_order(num_threads_sharing);
    764				l2_id = c->apicid & ~((1 << index_msb) - 1);
    765				break;
    766			case 3:
    767				new_l3 = this_leaf.size/1024;
    768				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
    769				index_msb = get_count_order(num_threads_sharing);
    770				l3_id = c->apicid & ~((1 << index_msb) - 1);
    771				break;
    772			default:
    773				break;
    774			}
    775		}
    776	}
    777	/*
    778	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
    779	 * trace cache
    780	 */
    781	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
    782		/* supports eax=2  call */
    783		int j, n;
    784		unsigned int regs[4];
    785		unsigned char *dp = (unsigned char *)regs;
    786		int only_trace = 0;
    787
    788		if (num_cache_leaves != 0 && c->x86 == 15)
    789			only_trace = 1;
    790
    791		/* Number of times to iterate */
    792		n = cpuid_eax(2) & 0xFF;
    793
    794		for (i = 0 ; i < n ; i++) {
    795			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
    796
    797			/* If bit 31 is set, this is an unknown format */
    798			for (j = 0 ; j < 3 ; j++)
    799				if (regs[j] & (1 << 31))
    800					regs[j] = 0;
    801
    802			/* Byte 0 is level count, not a descriptor */
    803			for (j = 1 ; j < 16 ; j++) {
    804				unsigned char des = dp[j];
    805				unsigned char k = 0;
    806
    807				/* look up this descriptor in the table */
    808				while (cache_table[k].descriptor != 0) {
    809					if (cache_table[k].descriptor == des) {
    810						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
    811							break;
    812						switch (cache_table[k].cache_type) {
    813						case LVL_1_INST:
    814							l1i += cache_table[k].size;
    815							break;
    816						case LVL_1_DATA:
    817							l1d += cache_table[k].size;
    818							break;
    819						case LVL_2:
    820							l2 += cache_table[k].size;
    821							break;
    822						case LVL_3:
    823							l3 += cache_table[k].size;
    824							break;
    825						case LVL_TRACE:
    826							trace += cache_table[k].size;
    827							break;
    828						}
    829
    830						break;
    831					}
    832
    833					k++;
    834				}
    835			}
    836		}
    837	}
    838
    839	if (new_l1d)
    840		l1d = new_l1d;
    841
    842	if (new_l1i)
    843		l1i = new_l1i;
    844
    845	if (new_l2) {
    846		l2 = new_l2;
    847#ifdef CONFIG_SMP
    848		per_cpu(cpu_llc_id, cpu) = l2_id;
    849		per_cpu(cpu_l2c_id, cpu) = l2_id;
    850#endif
    851	}
    852
    853	if (new_l3) {
    854		l3 = new_l3;
    855#ifdef CONFIG_SMP
    856		per_cpu(cpu_llc_id, cpu) = l3_id;
    857#endif
    858	}
    859
    860#ifdef CONFIG_SMP
    861	/*
    862	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
    863	 * turns means that the only possibility is SMT (as indicated in
    864	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
    865	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
    866	 * c->phys_proc_id.
    867	 */
    868	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
    869		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
    870#endif
    871
    872	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
    873
    874	if (!l2)
    875		cpu_detect_cache_sizes(c);
    876}
    877
    878static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
    879				    struct _cpuid4_info_regs *base)
    880{
    881	struct cpu_cacheinfo *this_cpu_ci;
    882	struct cacheinfo *this_leaf;
    883	int i, sibling;
    884
    885	/*
    886	 * For L3, always use the pre-calculated cpu_llc_shared_mask
    887	 * to derive shared_cpu_map.
    888	 */
    889	if (index == 3) {
    890		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
    891			this_cpu_ci = get_cpu_cacheinfo(i);
    892			if (!this_cpu_ci->info_list)
    893				continue;
    894			this_leaf = this_cpu_ci->info_list + index;
    895			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
    896				if (!cpu_online(sibling))
    897					continue;
    898				cpumask_set_cpu(sibling,
    899						&this_leaf->shared_cpu_map);
    900			}
    901		}
    902	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
    903		unsigned int apicid, nshared, first, last;
    904
    905		nshared = base->eax.split.num_threads_sharing + 1;
    906		apicid = cpu_data(cpu).apicid;
    907		first = apicid - (apicid % nshared);
    908		last = first + nshared - 1;
    909
    910		for_each_online_cpu(i) {
    911			this_cpu_ci = get_cpu_cacheinfo(i);
    912			if (!this_cpu_ci->info_list)
    913				continue;
    914
    915			apicid = cpu_data(i).apicid;
    916			if ((apicid < first) || (apicid > last))
    917				continue;
    918
    919			this_leaf = this_cpu_ci->info_list + index;
    920
    921			for_each_online_cpu(sibling) {
    922				apicid = cpu_data(sibling).apicid;
    923				if ((apicid < first) || (apicid > last))
    924					continue;
    925				cpumask_set_cpu(sibling,
    926						&this_leaf->shared_cpu_map);
    927			}
    928		}
    929	} else
    930		return 0;
    931
    932	return 1;
    933}
    934
    935static void __cache_cpumap_setup(unsigned int cpu, int index,
    936				 struct _cpuid4_info_regs *base)
    937{
    938	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
    939	struct cacheinfo *this_leaf, *sibling_leaf;
    940	unsigned long num_threads_sharing;
    941	int index_msb, i;
    942	struct cpuinfo_x86 *c = &cpu_data(cpu);
    943
    944	if (c->x86_vendor == X86_VENDOR_AMD ||
    945	    c->x86_vendor == X86_VENDOR_HYGON) {
    946		if (__cache_amd_cpumap_setup(cpu, index, base))
    947			return;
    948	}
    949
    950	this_leaf = this_cpu_ci->info_list + index;
    951	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
    952
    953	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
    954	if (num_threads_sharing == 1)
    955		return;
    956
    957	index_msb = get_count_order(num_threads_sharing);
    958
    959	for_each_online_cpu(i)
    960		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
    961			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
    962
    963			if (i == cpu || !sib_cpu_ci->info_list)
    964				continue;/* skip if itself or no cacheinfo */
    965			sibling_leaf = sib_cpu_ci->info_list + index;
    966			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
    967			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
    968		}
    969}
    970
    971static void ci_leaf_init(struct cacheinfo *this_leaf,
    972			 struct _cpuid4_info_regs *base)
    973{
    974	this_leaf->id = base->id;
    975	this_leaf->attributes = CACHE_ID;
    976	this_leaf->level = base->eax.split.level;
    977	this_leaf->type = cache_type_map[base->eax.split.type];
    978	this_leaf->coherency_line_size =
    979				base->ebx.split.coherency_line_size + 1;
    980	this_leaf->ways_of_associativity =
    981				base->ebx.split.ways_of_associativity + 1;
    982	this_leaf->size = base->size;
    983	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
    984	this_leaf->physical_line_partition =
    985				base->ebx.split.physical_line_partition + 1;
    986	this_leaf->priv = base->nb;
    987}
    988
    989int init_cache_level(unsigned int cpu)
    990{
    991	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
    992
    993	if (!num_cache_leaves)
    994		return -ENOENT;
    995	if (!this_cpu_ci)
    996		return -EINVAL;
    997	this_cpu_ci->num_levels = 3;
    998	this_cpu_ci->num_leaves = num_cache_leaves;
    999	return 0;
   1000}
   1001
   1002/*
   1003 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
   1004 * ECX as cache index. Then right shift apicid by the number's order to get
   1005 * cache id for this cache node.
   1006 */
   1007static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
   1008{
   1009	struct cpuinfo_x86 *c = &cpu_data(cpu);
   1010	unsigned long num_threads_sharing;
   1011	int index_msb;
   1012
   1013	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
   1014	index_msb = get_count_order(num_threads_sharing);
   1015	id4_regs->id = c->apicid >> index_msb;
   1016}
   1017
   1018int populate_cache_leaves(unsigned int cpu)
   1019{
   1020	unsigned int idx, ret;
   1021	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
   1022	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
   1023	struct _cpuid4_info_regs id4_regs = {};
   1024
   1025	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
   1026		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
   1027		if (ret)
   1028			return ret;
   1029		get_cache_id(cpu, &id4_regs);
   1030		ci_leaf_init(this_leaf++, &id4_regs);
   1031		__cache_cpumap_setup(cpu, idx, &id4_regs);
   1032	}
   1033	this_cpu_ci->cpu_map_populated = true;
   1034
   1035	return 0;
   1036}