cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

uncore.c (48157B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2#include <linux/module.h>
      3
      4#include <asm/cpu_device_id.h>
      5#include <asm/intel-family.h>
      6#include "uncore.h"
      7#include "uncore_discovery.h"
      8
      9static bool uncore_no_discover;
     10module_param(uncore_no_discover, bool, 0);
     11MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism "
     12				     "(default: enable the discovery mechanism).");
     13struct intel_uncore_type *empty_uncore[] = { NULL, };
     14struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
     15struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
     16struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
     17
     18static bool pcidrv_registered;
     19struct pci_driver *uncore_pci_driver;
     20/* The PCI driver for the device which the uncore doesn't own. */
     21struct pci_driver *uncore_pci_sub_driver;
     22/* pci bus to socket mapping */
     23DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
     24struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
     25struct pci_extra_dev *uncore_extra_pci_dev;
     26int __uncore_max_dies;
     27
     28/* mask of cpus that collect uncore events */
     29static cpumask_t uncore_cpu_mask;
     30
     31/* constraint for the fixed counter */
     32static struct event_constraint uncore_constraint_fixed =
     33	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
     34struct event_constraint uncore_constraint_empty =
     35	EVENT_CONSTRAINT(0, 0, 0);
     36
     37MODULE_LICENSE("GPL");
     38
     39int uncore_pcibus_to_dieid(struct pci_bus *bus)
     40{
     41	struct pci2phy_map *map;
     42	int die_id = -1;
     43
     44	raw_spin_lock(&pci2phy_map_lock);
     45	list_for_each_entry(map, &pci2phy_map_head, list) {
     46		if (map->segment == pci_domain_nr(bus)) {
     47			die_id = map->pbus_to_dieid[bus->number];
     48			break;
     49		}
     50	}
     51	raw_spin_unlock(&pci2phy_map_lock);
     52
     53	return die_id;
     54}
     55
     56int uncore_die_to_segment(int die)
     57{
     58	struct pci_bus *bus = NULL;
     59
     60	/* Find first pci bus which attributes to specified die. */
     61	while ((bus = pci_find_next_bus(bus)) &&
     62	       (die != uncore_pcibus_to_dieid(bus)))
     63		;
     64
     65	return bus ? pci_domain_nr(bus) : -EINVAL;
     66}
     67
     68static void uncore_free_pcibus_map(void)
     69{
     70	struct pci2phy_map *map, *tmp;
     71
     72	list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
     73		list_del(&map->list);
     74		kfree(map);
     75	}
     76}
     77
     78struct pci2phy_map *__find_pci2phy_map(int segment)
     79{
     80	struct pci2phy_map *map, *alloc = NULL;
     81	int i;
     82
     83	lockdep_assert_held(&pci2phy_map_lock);
     84
     85lookup:
     86	list_for_each_entry(map, &pci2phy_map_head, list) {
     87		if (map->segment == segment)
     88			goto end;
     89	}
     90
     91	if (!alloc) {
     92		raw_spin_unlock(&pci2phy_map_lock);
     93		alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
     94		raw_spin_lock(&pci2phy_map_lock);
     95
     96		if (!alloc)
     97			return NULL;
     98
     99		goto lookup;
    100	}
    101
    102	map = alloc;
    103	alloc = NULL;
    104	map->segment = segment;
    105	for (i = 0; i < 256; i++)
    106		map->pbus_to_dieid[i] = -1;
    107	list_add_tail(&map->list, &pci2phy_map_head);
    108
    109end:
    110	kfree(alloc);
    111	return map;
    112}
    113
    114ssize_t uncore_event_show(struct device *dev,
    115			  struct device_attribute *attr, char *buf)
    116{
    117	struct uncore_event_desc *event =
    118		container_of(attr, struct uncore_event_desc, attr);
    119	return sprintf(buf, "%s", event->config);
    120}
    121
    122struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
    123{
    124	unsigned int dieid = topology_logical_die_id(cpu);
    125
    126	/*
    127	 * The unsigned check also catches the '-1' return value for non
    128	 * existent mappings in the topology map.
    129	 */
    130	return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
    131}
    132
    133u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
    134{
    135	u64 count;
    136
    137	rdmsrl(event->hw.event_base, count);
    138
    139	return count;
    140}
    141
    142void uncore_mmio_exit_box(struct intel_uncore_box *box)
    143{
    144	if (box->io_addr)
    145		iounmap(box->io_addr);
    146}
    147
    148u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
    149			     struct perf_event *event)
    150{
    151	if (!box->io_addr)
    152		return 0;
    153
    154	if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
    155		return 0;
    156
    157	return readq(box->io_addr + event->hw.event_base);
    158}
    159
    160/*
    161 * generic get constraint function for shared match/mask registers.
    162 */
    163struct event_constraint *
    164uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
    165{
    166	struct intel_uncore_extra_reg *er;
    167	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
    168	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
    169	unsigned long flags;
    170	bool ok = false;
    171
    172	/*
    173	 * reg->alloc can be set due to existing state, so for fake box we
    174	 * need to ignore this, otherwise we might fail to allocate proper
    175	 * fake state for this extra reg constraint.
    176	 */
    177	if (reg1->idx == EXTRA_REG_NONE ||
    178	    (!uncore_box_is_fake(box) && reg1->alloc))
    179		return NULL;
    180
    181	er = &box->shared_regs[reg1->idx];
    182	raw_spin_lock_irqsave(&er->lock, flags);
    183	if (!atomic_read(&er->ref) ||
    184	    (er->config1 == reg1->config && er->config2 == reg2->config)) {
    185		atomic_inc(&er->ref);
    186		er->config1 = reg1->config;
    187		er->config2 = reg2->config;
    188		ok = true;
    189	}
    190	raw_spin_unlock_irqrestore(&er->lock, flags);
    191
    192	if (ok) {
    193		if (!uncore_box_is_fake(box))
    194			reg1->alloc = 1;
    195		return NULL;
    196	}
    197
    198	return &uncore_constraint_empty;
    199}
    200
    201void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
    202{
    203	struct intel_uncore_extra_reg *er;
    204	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
    205
    206	/*
    207	 * Only put constraint if extra reg was actually allocated. Also
    208	 * takes care of event which do not use an extra shared reg.
    209	 *
    210	 * Also, if this is a fake box we shouldn't touch any event state
    211	 * (reg->alloc) and we don't care about leaving inconsistent box
    212	 * state either since it will be thrown out.
    213	 */
    214	if (uncore_box_is_fake(box) || !reg1->alloc)
    215		return;
    216
    217	er = &box->shared_regs[reg1->idx];
    218	atomic_dec(&er->ref);
    219	reg1->alloc = 0;
    220}
    221
    222u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
    223{
    224	struct intel_uncore_extra_reg *er;
    225	unsigned long flags;
    226	u64 config;
    227
    228	er = &box->shared_regs[idx];
    229
    230	raw_spin_lock_irqsave(&er->lock, flags);
    231	config = er->config;
    232	raw_spin_unlock_irqrestore(&er->lock, flags);
    233
    234	return config;
    235}
    236
    237static void uncore_assign_hw_event(struct intel_uncore_box *box,
    238				   struct perf_event *event, int idx)
    239{
    240	struct hw_perf_event *hwc = &event->hw;
    241
    242	hwc->idx = idx;
    243	hwc->last_tag = ++box->tags[idx];
    244
    245	if (uncore_pmc_fixed(hwc->idx)) {
    246		hwc->event_base = uncore_fixed_ctr(box);
    247		hwc->config_base = uncore_fixed_ctl(box);
    248		return;
    249	}
    250
    251	hwc->config_base = uncore_event_ctl(box, hwc->idx);
    252	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
    253}
    254
    255void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
    256{
    257	u64 prev_count, new_count, delta;
    258	int shift;
    259
    260	if (uncore_pmc_freerunning(event->hw.idx))
    261		shift = 64 - uncore_freerunning_bits(box, event);
    262	else if (uncore_pmc_fixed(event->hw.idx))
    263		shift = 64 - uncore_fixed_ctr_bits(box);
    264	else
    265		shift = 64 - uncore_perf_ctr_bits(box);
    266
    267	/* the hrtimer might modify the previous event value */
    268again:
    269	prev_count = local64_read(&event->hw.prev_count);
    270	new_count = uncore_read_counter(box, event);
    271	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
    272		goto again;
    273
    274	delta = (new_count << shift) - (prev_count << shift);
    275	delta >>= shift;
    276
    277	local64_add(delta, &event->count);
    278}
    279
    280/*
    281 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
    282 * for SandyBridge. So we use hrtimer to periodically poll the counter
    283 * to avoid overflow.
    284 */
    285static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
    286{
    287	struct intel_uncore_box *box;
    288	struct perf_event *event;
    289	unsigned long flags;
    290	int bit;
    291
    292	box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
    293	if (!box->n_active || box->cpu != smp_processor_id())
    294		return HRTIMER_NORESTART;
    295	/*
    296	 * disable local interrupt to prevent uncore_pmu_event_start/stop
    297	 * to interrupt the update process
    298	 */
    299	local_irq_save(flags);
    300
    301	/*
    302	 * handle boxes with an active event list as opposed to active
    303	 * counters
    304	 */
    305	list_for_each_entry(event, &box->active_list, active_entry) {
    306		uncore_perf_event_update(box, event);
    307	}
    308
    309	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
    310		uncore_perf_event_update(box, box->events[bit]);
    311
    312	local_irq_restore(flags);
    313
    314	hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
    315	return HRTIMER_RESTART;
    316}
    317
    318void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
    319{
    320	hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
    321		      HRTIMER_MODE_REL_PINNED);
    322}
    323
    324void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
    325{
    326	hrtimer_cancel(&box->hrtimer);
    327}
    328
    329static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
    330{
    331	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
    332	box->hrtimer.function = uncore_pmu_hrtimer;
    333}
    334
    335static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
    336						 int node)
    337{
    338	int i, size, numshared = type->num_shared_regs ;
    339	struct intel_uncore_box *box;
    340
    341	size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
    342
    343	box = kzalloc_node(size, GFP_KERNEL, node);
    344	if (!box)
    345		return NULL;
    346
    347	for (i = 0; i < numshared; i++)
    348		raw_spin_lock_init(&box->shared_regs[i].lock);
    349
    350	uncore_pmu_init_hrtimer(box);
    351	box->cpu = -1;
    352	box->dieid = -1;
    353
    354	/* set default hrtimer timeout */
    355	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
    356
    357	INIT_LIST_HEAD(&box->active_list);
    358
    359	return box;
    360}
    361
    362/*
    363 * Using uncore_pmu_event_init pmu event_init callback
    364 * as a detection point for uncore events.
    365 */
    366static int uncore_pmu_event_init(struct perf_event *event);
    367
    368static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
    369{
    370	return &box->pmu->pmu == event->pmu;
    371}
    372
    373static int
    374uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
    375		      bool dogrp)
    376{
    377	struct perf_event *event;
    378	int n, max_count;
    379
    380	max_count = box->pmu->type->num_counters;
    381	if (box->pmu->type->fixed_ctl)
    382		max_count++;
    383
    384	if (box->n_events >= max_count)
    385		return -EINVAL;
    386
    387	n = box->n_events;
    388
    389	if (is_box_event(box, leader)) {
    390		box->event_list[n] = leader;
    391		n++;
    392	}
    393
    394	if (!dogrp)
    395		return n;
    396
    397	for_each_sibling_event(event, leader) {
    398		if (!is_box_event(box, event) ||
    399		    event->state <= PERF_EVENT_STATE_OFF)
    400			continue;
    401
    402		if (n >= max_count)
    403			return -EINVAL;
    404
    405		box->event_list[n] = event;
    406		n++;
    407	}
    408	return n;
    409}
    410
    411static struct event_constraint *
    412uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
    413{
    414	struct intel_uncore_type *type = box->pmu->type;
    415	struct event_constraint *c;
    416
    417	if (type->ops->get_constraint) {
    418		c = type->ops->get_constraint(box, event);
    419		if (c)
    420			return c;
    421	}
    422
    423	if (event->attr.config == UNCORE_FIXED_EVENT)
    424		return &uncore_constraint_fixed;
    425
    426	if (type->constraints) {
    427		for_each_event_constraint(c, type->constraints) {
    428			if ((event->hw.config & c->cmask) == c->code)
    429				return c;
    430		}
    431	}
    432
    433	return &type->unconstrainted;
    434}
    435
    436static void uncore_put_event_constraint(struct intel_uncore_box *box,
    437					struct perf_event *event)
    438{
    439	if (box->pmu->type->ops->put_constraint)
    440		box->pmu->type->ops->put_constraint(box, event);
    441}
    442
    443static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
    444{
    445	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
    446	struct event_constraint *c;
    447	int i, wmin, wmax, ret = 0;
    448	struct hw_perf_event *hwc;
    449
    450	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
    451
    452	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
    453		c = uncore_get_event_constraint(box, box->event_list[i]);
    454		box->event_constraint[i] = c;
    455		wmin = min(wmin, c->weight);
    456		wmax = max(wmax, c->weight);
    457	}
    458
    459	/* fastpath, try to reuse previous register */
    460	for (i = 0; i < n; i++) {
    461		hwc = &box->event_list[i]->hw;
    462		c = box->event_constraint[i];
    463
    464		/* never assigned */
    465		if (hwc->idx == -1)
    466			break;
    467
    468		/* constraint still honored */
    469		if (!test_bit(hwc->idx, c->idxmsk))
    470			break;
    471
    472		/* not already used */
    473		if (test_bit(hwc->idx, used_mask))
    474			break;
    475
    476		__set_bit(hwc->idx, used_mask);
    477		if (assign)
    478			assign[i] = hwc->idx;
    479	}
    480	/* slow path */
    481	if (i != n)
    482		ret = perf_assign_events(box->event_constraint, n,
    483					 wmin, wmax, n, assign);
    484
    485	if (!assign || ret) {
    486		for (i = 0; i < n; i++)
    487			uncore_put_event_constraint(box, box->event_list[i]);
    488	}
    489	return ret ? -EINVAL : 0;
    490}
    491
    492void uncore_pmu_event_start(struct perf_event *event, int flags)
    493{
    494	struct intel_uncore_box *box = uncore_event_to_box(event);
    495	int idx = event->hw.idx;
    496
    497	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
    498		return;
    499
    500	/*
    501	 * Free running counter is read-only and always active.
    502	 * Use the current counter value as start point.
    503	 * There is no overflow interrupt for free running counter.
    504	 * Use hrtimer to periodically poll the counter to avoid overflow.
    505	 */
    506	if (uncore_pmc_freerunning(event->hw.idx)) {
    507		list_add_tail(&event->active_entry, &box->active_list);
    508		local64_set(&event->hw.prev_count,
    509			    uncore_read_counter(box, event));
    510		if (box->n_active++ == 0)
    511			uncore_pmu_start_hrtimer(box);
    512		return;
    513	}
    514
    515	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
    516		return;
    517
    518	event->hw.state = 0;
    519	box->events[idx] = event;
    520	box->n_active++;
    521	__set_bit(idx, box->active_mask);
    522
    523	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
    524	uncore_enable_event(box, event);
    525
    526	if (box->n_active == 1)
    527		uncore_pmu_start_hrtimer(box);
    528}
    529
    530void uncore_pmu_event_stop(struct perf_event *event, int flags)
    531{
    532	struct intel_uncore_box *box = uncore_event_to_box(event);
    533	struct hw_perf_event *hwc = &event->hw;
    534
    535	/* Cannot disable free running counter which is read-only */
    536	if (uncore_pmc_freerunning(hwc->idx)) {
    537		list_del(&event->active_entry);
    538		if (--box->n_active == 0)
    539			uncore_pmu_cancel_hrtimer(box);
    540		uncore_perf_event_update(box, event);
    541		return;
    542	}
    543
    544	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
    545		uncore_disable_event(box, event);
    546		box->n_active--;
    547		box->events[hwc->idx] = NULL;
    548		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
    549		hwc->state |= PERF_HES_STOPPED;
    550
    551		if (box->n_active == 0)
    552			uncore_pmu_cancel_hrtimer(box);
    553	}
    554
    555	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
    556		/*
    557		 * Drain the remaining delta count out of a event
    558		 * that we are disabling:
    559		 */
    560		uncore_perf_event_update(box, event);
    561		hwc->state |= PERF_HES_UPTODATE;
    562	}
    563}
    564
    565int uncore_pmu_event_add(struct perf_event *event, int flags)
    566{
    567	struct intel_uncore_box *box = uncore_event_to_box(event);
    568	struct hw_perf_event *hwc = &event->hw;
    569	int assign[UNCORE_PMC_IDX_MAX];
    570	int i, n, ret;
    571
    572	if (!box)
    573		return -ENODEV;
    574
    575	/*
    576	 * The free funning counter is assigned in event_init().
    577	 * The free running counter event and free running counter
    578	 * are 1:1 mapped. It doesn't need to be tracked in event_list.
    579	 */
    580	if (uncore_pmc_freerunning(hwc->idx)) {
    581		if (flags & PERF_EF_START)
    582			uncore_pmu_event_start(event, 0);
    583		return 0;
    584	}
    585
    586	ret = n = uncore_collect_events(box, event, false);
    587	if (ret < 0)
    588		return ret;
    589
    590	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
    591	if (!(flags & PERF_EF_START))
    592		hwc->state |= PERF_HES_ARCH;
    593
    594	ret = uncore_assign_events(box, assign, n);
    595	if (ret)
    596		return ret;
    597
    598	/* save events moving to new counters */
    599	for (i = 0; i < box->n_events; i++) {
    600		event = box->event_list[i];
    601		hwc = &event->hw;
    602
    603		if (hwc->idx == assign[i] &&
    604			hwc->last_tag == box->tags[assign[i]])
    605			continue;
    606		/*
    607		 * Ensure we don't accidentally enable a stopped
    608		 * counter simply because we rescheduled.
    609		 */
    610		if (hwc->state & PERF_HES_STOPPED)
    611			hwc->state |= PERF_HES_ARCH;
    612
    613		uncore_pmu_event_stop(event, PERF_EF_UPDATE);
    614	}
    615
    616	/* reprogram moved events into new counters */
    617	for (i = 0; i < n; i++) {
    618		event = box->event_list[i];
    619		hwc = &event->hw;
    620
    621		if (hwc->idx != assign[i] ||
    622			hwc->last_tag != box->tags[assign[i]])
    623			uncore_assign_hw_event(box, event, assign[i]);
    624		else if (i < box->n_events)
    625			continue;
    626
    627		if (hwc->state & PERF_HES_ARCH)
    628			continue;
    629
    630		uncore_pmu_event_start(event, 0);
    631	}
    632	box->n_events = n;
    633
    634	return 0;
    635}
    636
    637void uncore_pmu_event_del(struct perf_event *event, int flags)
    638{
    639	struct intel_uncore_box *box = uncore_event_to_box(event);
    640	int i;
    641
    642	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
    643
    644	/*
    645	 * The event for free running counter is not tracked by event_list.
    646	 * It doesn't need to force event->hw.idx = -1 to reassign the counter.
    647	 * Because the event and the free running counter are 1:1 mapped.
    648	 */
    649	if (uncore_pmc_freerunning(event->hw.idx))
    650		return;
    651
    652	for (i = 0; i < box->n_events; i++) {
    653		if (event == box->event_list[i]) {
    654			uncore_put_event_constraint(box, event);
    655
    656			for (++i; i < box->n_events; i++)
    657				box->event_list[i - 1] = box->event_list[i];
    658
    659			--box->n_events;
    660			break;
    661		}
    662	}
    663
    664	event->hw.idx = -1;
    665	event->hw.last_tag = ~0ULL;
    666}
    667
    668void uncore_pmu_event_read(struct perf_event *event)
    669{
    670	struct intel_uncore_box *box = uncore_event_to_box(event);
    671	uncore_perf_event_update(box, event);
    672}
    673
    674/*
    675 * validation ensures the group can be loaded onto the
    676 * PMU if it was the only group available.
    677 */
    678static int uncore_validate_group(struct intel_uncore_pmu *pmu,
    679				struct perf_event *event)
    680{
    681	struct perf_event *leader = event->group_leader;
    682	struct intel_uncore_box *fake_box;
    683	int ret = -EINVAL, n;
    684
    685	/* The free running counter is always active. */
    686	if (uncore_pmc_freerunning(event->hw.idx))
    687		return 0;
    688
    689	fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
    690	if (!fake_box)
    691		return -ENOMEM;
    692
    693	fake_box->pmu = pmu;
    694	/*
    695	 * the event is not yet connected with its
    696	 * siblings therefore we must first collect
    697	 * existing siblings, then add the new event
    698	 * before we can simulate the scheduling
    699	 */
    700	n = uncore_collect_events(fake_box, leader, true);
    701	if (n < 0)
    702		goto out;
    703
    704	fake_box->n_events = n;
    705	n = uncore_collect_events(fake_box, event, false);
    706	if (n < 0)
    707		goto out;
    708
    709	fake_box->n_events = n;
    710
    711	ret = uncore_assign_events(fake_box, NULL, n);
    712out:
    713	kfree(fake_box);
    714	return ret;
    715}
    716
    717static int uncore_pmu_event_init(struct perf_event *event)
    718{
    719	struct intel_uncore_pmu *pmu;
    720	struct intel_uncore_box *box;
    721	struct hw_perf_event *hwc = &event->hw;
    722	int ret;
    723
    724	if (event->attr.type != event->pmu->type)
    725		return -ENOENT;
    726
    727	pmu = uncore_event_to_pmu(event);
    728	/* no device found for this pmu */
    729	if (pmu->func_id < 0)
    730		return -ENOENT;
    731
    732	/* Sampling not supported yet */
    733	if (hwc->sample_period)
    734		return -EINVAL;
    735
    736	/*
    737	 * Place all uncore events for a particular physical package
    738	 * onto a single cpu
    739	 */
    740	if (event->cpu < 0)
    741		return -EINVAL;
    742	box = uncore_pmu_to_box(pmu, event->cpu);
    743	if (!box || box->cpu < 0)
    744		return -EINVAL;
    745	event->cpu = box->cpu;
    746	event->pmu_private = box;
    747
    748	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
    749
    750	event->hw.idx = -1;
    751	event->hw.last_tag = ~0ULL;
    752	event->hw.extra_reg.idx = EXTRA_REG_NONE;
    753	event->hw.branch_reg.idx = EXTRA_REG_NONE;
    754
    755	if (event->attr.config == UNCORE_FIXED_EVENT) {
    756		/* no fixed counter */
    757		if (!pmu->type->fixed_ctl)
    758			return -EINVAL;
    759		/*
    760		 * if there is only one fixed counter, only the first pmu
    761		 * can access the fixed counter
    762		 */
    763		if (pmu->type->single_fixed && pmu->pmu_idx > 0)
    764			return -EINVAL;
    765
    766		/* fixed counters have event field hardcoded to zero */
    767		hwc->config = 0ULL;
    768	} else if (is_freerunning_event(event)) {
    769		hwc->config = event->attr.config;
    770		if (!check_valid_freerunning_event(box, event))
    771			return -EINVAL;
    772		event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
    773		/*
    774		 * The free running counter event and free running counter
    775		 * are always 1:1 mapped.
    776		 * The free running counter is always active.
    777		 * Assign the free running counter here.
    778		 */
    779		event->hw.event_base = uncore_freerunning_counter(box, event);
    780	} else {
    781		hwc->config = event->attr.config &
    782			      (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
    783		if (pmu->type->ops->hw_config) {
    784			ret = pmu->type->ops->hw_config(box, event);
    785			if (ret)
    786				return ret;
    787		}
    788	}
    789
    790	if (event->group_leader != event)
    791		ret = uncore_validate_group(pmu, event);
    792	else
    793		ret = 0;
    794
    795	return ret;
    796}
    797
    798static void uncore_pmu_enable(struct pmu *pmu)
    799{
    800	struct intel_uncore_pmu *uncore_pmu;
    801	struct intel_uncore_box *box;
    802
    803	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
    804
    805	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
    806	if (!box)
    807		return;
    808
    809	if (uncore_pmu->type->ops->enable_box)
    810		uncore_pmu->type->ops->enable_box(box);
    811}
    812
    813static void uncore_pmu_disable(struct pmu *pmu)
    814{
    815	struct intel_uncore_pmu *uncore_pmu;
    816	struct intel_uncore_box *box;
    817
    818	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
    819
    820	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
    821	if (!box)
    822		return;
    823
    824	if (uncore_pmu->type->ops->disable_box)
    825		uncore_pmu->type->ops->disable_box(box);
    826}
    827
    828static ssize_t uncore_get_attr_cpumask(struct device *dev,
    829				struct device_attribute *attr, char *buf)
    830{
    831	return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
    832}
    833
    834static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
    835
    836static struct attribute *uncore_pmu_attrs[] = {
    837	&dev_attr_cpumask.attr,
    838	NULL,
    839};
    840
    841static const struct attribute_group uncore_pmu_attr_group = {
    842	.attrs = uncore_pmu_attrs,
    843};
    844
    845void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
    846{
    847	struct intel_uncore_type *type = pmu->type;
    848
    849	if (type->num_boxes == 1)
    850		sprintf(pmu_name, "uncore_type_%u", type->type_id);
    851	else {
    852		sprintf(pmu_name, "uncore_type_%u_%d",
    853			type->type_id, type->box_ids[pmu->pmu_idx]);
    854	}
    855}
    856
    857static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
    858{
    859	struct intel_uncore_type *type = pmu->type;
    860
    861	/*
    862	 * No uncore block name in discovery table.
    863	 * Use uncore_type_&typeid_&boxid as name.
    864	 */
    865	if (!type->name) {
    866		uncore_get_alias_name(pmu->name, pmu);
    867		return;
    868	}
    869
    870	if (type->num_boxes == 1) {
    871		if (strlen(type->name) > 0)
    872			sprintf(pmu->name, "uncore_%s", type->name);
    873		else
    874			sprintf(pmu->name, "uncore");
    875	} else {
    876		/*
    877		 * Use the box ID from the discovery table if applicable.
    878		 */
    879		sprintf(pmu->name, "uncore_%s_%d", type->name,
    880			type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
    881	}
    882}
    883
    884static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
    885{
    886	int ret;
    887
    888	if (!pmu->type->pmu) {
    889		pmu->pmu = (struct pmu) {
    890			.attr_groups	= pmu->type->attr_groups,
    891			.task_ctx_nr	= perf_invalid_context,
    892			.pmu_enable	= uncore_pmu_enable,
    893			.pmu_disable	= uncore_pmu_disable,
    894			.event_init	= uncore_pmu_event_init,
    895			.add		= uncore_pmu_event_add,
    896			.del		= uncore_pmu_event_del,
    897			.start		= uncore_pmu_event_start,
    898			.stop		= uncore_pmu_event_stop,
    899			.read		= uncore_pmu_event_read,
    900			.module		= THIS_MODULE,
    901			.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
    902			.attr_update	= pmu->type->attr_update,
    903		};
    904	} else {
    905		pmu->pmu = *pmu->type->pmu;
    906		pmu->pmu.attr_groups = pmu->type->attr_groups;
    907		pmu->pmu.attr_update = pmu->type->attr_update;
    908	}
    909
    910	uncore_get_pmu_name(pmu);
    911
    912	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
    913	if (!ret)
    914		pmu->registered = true;
    915	return ret;
    916}
    917
    918static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
    919{
    920	if (!pmu->registered)
    921		return;
    922	perf_pmu_unregister(&pmu->pmu);
    923	pmu->registered = false;
    924}
    925
    926static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
    927{
    928	int die;
    929
    930	for (die = 0; die < uncore_max_dies(); die++)
    931		kfree(pmu->boxes[die]);
    932	kfree(pmu->boxes);
    933}
    934
    935static void uncore_type_exit(struct intel_uncore_type *type)
    936{
    937	struct intel_uncore_pmu *pmu = type->pmus;
    938	int i;
    939
    940	if (type->cleanup_mapping)
    941		type->cleanup_mapping(type);
    942
    943	if (pmu) {
    944		for (i = 0; i < type->num_boxes; i++, pmu++) {
    945			uncore_pmu_unregister(pmu);
    946			uncore_free_boxes(pmu);
    947		}
    948		kfree(type->pmus);
    949		type->pmus = NULL;
    950	}
    951	if (type->box_ids) {
    952		kfree(type->box_ids);
    953		type->box_ids = NULL;
    954	}
    955	kfree(type->events_group);
    956	type->events_group = NULL;
    957}
    958
    959static void uncore_types_exit(struct intel_uncore_type **types)
    960{
    961	for (; *types; types++)
    962		uncore_type_exit(*types);
    963}
    964
    965static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
    966{
    967	struct intel_uncore_pmu *pmus;
    968	size_t size;
    969	int i, j;
    970
    971	pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
    972	if (!pmus)
    973		return -ENOMEM;
    974
    975	size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
    976
    977	for (i = 0; i < type->num_boxes; i++) {
    978		pmus[i].func_id	= setid ? i : -1;
    979		pmus[i].pmu_idx	= i;
    980		pmus[i].type	= type;
    981		pmus[i].boxes	= kzalloc(size, GFP_KERNEL);
    982		if (!pmus[i].boxes)
    983			goto err;
    984	}
    985
    986	type->pmus = pmus;
    987	type->unconstrainted = (struct event_constraint)
    988		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
    989				0, type->num_counters, 0, 0);
    990
    991	if (type->event_descs) {
    992		struct {
    993			struct attribute_group group;
    994			struct attribute *attrs[];
    995		} *attr_group;
    996		for (i = 0; type->event_descs[i].attr.attr.name; i++);
    997
    998		attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
    999								GFP_KERNEL);
   1000		if (!attr_group)
   1001			goto err;
   1002
   1003		attr_group->group.name = "events";
   1004		attr_group->group.attrs = attr_group->attrs;
   1005
   1006		for (j = 0; j < i; j++)
   1007			attr_group->attrs[j] = &type->event_descs[j].attr.attr;
   1008
   1009		type->events_group = &attr_group->group;
   1010	}
   1011
   1012	type->pmu_group = &uncore_pmu_attr_group;
   1013
   1014	if (type->set_mapping)
   1015		type->set_mapping(type);
   1016
   1017	return 0;
   1018
   1019err:
   1020	for (i = 0; i < type->num_boxes; i++)
   1021		kfree(pmus[i].boxes);
   1022	kfree(pmus);
   1023
   1024	return -ENOMEM;
   1025}
   1026
   1027static int __init
   1028uncore_types_init(struct intel_uncore_type **types, bool setid)
   1029{
   1030	int ret;
   1031
   1032	for (; *types; types++) {
   1033		ret = uncore_type_init(*types, setid);
   1034		if (ret)
   1035			return ret;
   1036	}
   1037	return 0;
   1038}
   1039
   1040/*
   1041 * Get the die information of a PCI device.
   1042 * @pdev: The PCI device.
   1043 * @die: The die id which the device maps to.
   1044 */
   1045static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
   1046{
   1047	*die = uncore_pcibus_to_dieid(pdev->bus);
   1048	if (*die < 0)
   1049		return -EINVAL;
   1050
   1051	return 0;
   1052}
   1053
   1054static struct intel_uncore_pmu *
   1055uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
   1056{
   1057	struct intel_uncore_type **types = uncore_pci_uncores;
   1058	struct intel_uncore_type *type;
   1059	u64 box_ctl;
   1060	int i, die;
   1061
   1062	for (; *types; types++) {
   1063		type = *types;
   1064		for (die = 0; die < __uncore_max_dies; die++) {
   1065			for (i = 0; i < type->num_boxes; i++) {
   1066				if (!type->box_ctls[die])
   1067					continue;
   1068				box_ctl = type->box_ctls[die] + type->pci_offsets[i];
   1069				if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
   1070				    pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
   1071				    pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
   1072					return &type->pmus[i];
   1073			}
   1074		}
   1075	}
   1076
   1077	return NULL;
   1078}
   1079
   1080/*
   1081 * Find the PMU of a PCI device.
   1082 * @pdev: The PCI device.
   1083 * @ids: The ID table of the available PCI devices with a PMU.
   1084 *       If NULL, search the whole uncore_pci_uncores.
   1085 */
   1086static struct intel_uncore_pmu *
   1087uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
   1088{
   1089	struct intel_uncore_pmu *pmu = NULL;
   1090	struct intel_uncore_type *type;
   1091	kernel_ulong_t data;
   1092	unsigned int devfn;
   1093
   1094	if (!ids)
   1095		return uncore_pci_find_dev_pmu_from_types(pdev);
   1096
   1097	while (ids && ids->vendor) {
   1098		if ((ids->vendor == pdev->vendor) &&
   1099		    (ids->device == pdev->device)) {
   1100			data = ids->driver_data;
   1101			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
   1102					  UNCORE_PCI_DEV_FUNC(data));
   1103			if (devfn == pdev->devfn) {
   1104				type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
   1105				pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
   1106				break;
   1107			}
   1108		}
   1109		ids++;
   1110	}
   1111	return pmu;
   1112}
   1113
   1114/*
   1115 * Register the PMU for a PCI device
   1116 * @pdev: The PCI device.
   1117 * @type: The corresponding PMU type of the device.
   1118 * @pmu: The corresponding PMU of the device.
   1119 * @die: The die id which the device maps to.
   1120 */
   1121static int uncore_pci_pmu_register(struct pci_dev *pdev,
   1122				   struct intel_uncore_type *type,
   1123				   struct intel_uncore_pmu *pmu,
   1124				   int die)
   1125{
   1126	struct intel_uncore_box *box;
   1127	int ret;
   1128
   1129	if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
   1130		return -EINVAL;
   1131
   1132	box = uncore_alloc_box(type, NUMA_NO_NODE);
   1133	if (!box)
   1134		return -ENOMEM;
   1135
   1136	if (pmu->func_id < 0)
   1137		pmu->func_id = pdev->devfn;
   1138	else
   1139		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
   1140
   1141	atomic_inc(&box->refcnt);
   1142	box->dieid = die;
   1143	box->pci_dev = pdev;
   1144	box->pmu = pmu;
   1145	uncore_box_init(box);
   1146
   1147	pmu->boxes[die] = box;
   1148	if (atomic_inc_return(&pmu->activeboxes) > 1)
   1149		return 0;
   1150
   1151	/* First active box registers the pmu */
   1152	ret = uncore_pmu_register(pmu);
   1153	if (ret) {
   1154		pmu->boxes[die] = NULL;
   1155		uncore_box_exit(box);
   1156		kfree(box);
   1157	}
   1158	return ret;
   1159}
   1160
   1161/*
   1162 * add a pci uncore device
   1163 */
   1164static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
   1165{
   1166	struct intel_uncore_type *type;
   1167	struct intel_uncore_pmu *pmu = NULL;
   1168	int die, ret;
   1169
   1170	ret = uncore_pci_get_dev_die_info(pdev, &die);
   1171	if (ret)
   1172		return ret;
   1173
   1174	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
   1175		int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
   1176
   1177		uncore_extra_pci_dev[die].dev[idx] = pdev;
   1178		pci_set_drvdata(pdev, NULL);
   1179		return 0;
   1180	}
   1181
   1182	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
   1183
   1184	/*
   1185	 * Some platforms, e.g.  Knights Landing, use a common PCI device ID
   1186	 * for multiple instances of an uncore PMU device type. We should check
   1187	 * PCI slot and func to indicate the uncore box.
   1188	 */
   1189	if (id->driver_data & ~0xffff) {
   1190		struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver);
   1191
   1192		pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
   1193		if (pmu == NULL)
   1194			return -ENODEV;
   1195	} else {
   1196		/*
   1197		 * for performance monitoring unit with multiple boxes,
   1198		 * each box has a different function id.
   1199		 */
   1200		pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
   1201	}
   1202
   1203	ret = uncore_pci_pmu_register(pdev, type, pmu, die);
   1204
   1205	pci_set_drvdata(pdev, pmu->boxes[die]);
   1206
   1207	return ret;
   1208}
   1209
   1210/*
   1211 * Unregister the PMU of a PCI device
   1212 * @pmu: The corresponding PMU is unregistered.
   1213 * @die: The die id which the device maps to.
   1214 */
   1215static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
   1216{
   1217	struct intel_uncore_box *box = pmu->boxes[die];
   1218
   1219	pmu->boxes[die] = NULL;
   1220	if (atomic_dec_return(&pmu->activeboxes) == 0)
   1221		uncore_pmu_unregister(pmu);
   1222	uncore_box_exit(box);
   1223	kfree(box);
   1224}
   1225
   1226static void uncore_pci_remove(struct pci_dev *pdev)
   1227{
   1228	struct intel_uncore_box *box;
   1229	struct intel_uncore_pmu *pmu;
   1230	int i, die;
   1231
   1232	if (uncore_pci_get_dev_die_info(pdev, &die))
   1233		return;
   1234
   1235	box = pci_get_drvdata(pdev);
   1236	if (!box) {
   1237		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
   1238			if (uncore_extra_pci_dev[die].dev[i] == pdev) {
   1239				uncore_extra_pci_dev[die].dev[i] = NULL;
   1240				break;
   1241			}
   1242		}
   1243		WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
   1244		return;
   1245	}
   1246
   1247	pmu = box->pmu;
   1248
   1249	pci_set_drvdata(pdev, NULL);
   1250
   1251	uncore_pci_pmu_unregister(pmu, die);
   1252}
   1253
   1254static int uncore_bus_notify(struct notifier_block *nb,
   1255			     unsigned long action, void *data,
   1256			     const struct pci_device_id *ids)
   1257{
   1258	struct device *dev = data;
   1259	struct pci_dev *pdev = to_pci_dev(dev);
   1260	struct intel_uncore_pmu *pmu;
   1261	int die;
   1262
   1263	/* Unregister the PMU when the device is going to be deleted. */
   1264	if (action != BUS_NOTIFY_DEL_DEVICE)
   1265		return NOTIFY_DONE;
   1266
   1267	pmu = uncore_pci_find_dev_pmu(pdev, ids);
   1268	if (!pmu)
   1269		return NOTIFY_DONE;
   1270
   1271	if (uncore_pci_get_dev_die_info(pdev, &die))
   1272		return NOTIFY_DONE;
   1273
   1274	uncore_pci_pmu_unregister(pmu, die);
   1275
   1276	return NOTIFY_OK;
   1277}
   1278
   1279static int uncore_pci_sub_bus_notify(struct notifier_block *nb,
   1280				     unsigned long action, void *data)
   1281{
   1282	return uncore_bus_notify(nb, action, data,
   1283				 uncore_pci_sub_driver->id_table);
   1284}
   1285
   1286static struct notifier_block uncore_pci_sub_notifier = {
   1287	.notifier_call = uncore_pci_sub_bus_notify,
   1288};
   1289
   1290static void uncore_pci_sub_driver_init(void)
   1291{
   1292	const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
   1293	struct intel_uncore_type *type;
   1294	struct intel_uncore_pmu *pmu;
   1295	struct pci_dev *pci_sub_dev;
   1296	bool notify = false;
   1297	unsigned int devfn;
   1298	int die;
   1299
   1300	while (ids && ids->vendor) {
   1301		pci_sub_dev = NULL;
   1302		type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
   1303		/*
   1304		 * Search the available device, and register the
   1305		 * corresponding PMU.
   1306		 */
   1307		while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
   1308						     ids->device, pci_sub_dev))) {
   1309			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
   1310					  UNCORE_PCI_DEV_FUNC(ids->driver_data));
   1311			if (devfn != pci_sub_dev->devfn)
   1312				continue;
   1313
   1314			pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
   1315			if (!pmu)
   1316				continue;
   1317
   1318			if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
   1319				continue;
   1320
   1321			if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
   1322						     die))
   1323				notify = true;
   1324		}
   1325		ids++;
   1326	}
   1327
   1328	if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier))
   1329		notify = false;
   1330
   1331	if (!notify)
   1332		uncore_pci_sub_driver = NULL;
   1333}
   1334
   1335static int uncore_pci_bus_notify(struct notifier_block *nb,
   1336				     unsigned long action, void *data)
   1337{
   1338	return uncore_bus_notify(nb, action, data, NULL);
   1339}
   1340
   1341static struct notifier_block uncore_pci_notifier = {
   1342	.notifier_call = uncore_pci_bus_notify,
   1343};
   1344
   1345
   1346static void uncore_pci_pmus_register(void)
   1347{
   1348	struct intel_uncore_type **types = uncore_pci_uncores;
   1349	struct intel_uncore_type *type;
   1350	struct intel_uncore_pmu *pmu;
   1351	struct pci_dev *pdev;
   1352	u64 box_ctl;
   1353	int i, die;
   1354
   1355	for (; *types; types++) {
   1356		type = *types;
   1357		for (die = 0; die < __uncore_max_dies; die++) {
   1358			for (i = 0; i < type->num_boxes; i++) {
   1359				if (!type->box_ctls[die])
   1360					continue;
   1361				box_ctl = type->box_ctls[die] + type->pci_offsets[i];
   1362				pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
   1363								   UNCORE_DISCOVERY_PCI_BUS(box_ctl),
   1364								   UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
   1365				if (!pdev)
   1366					continue;
   1367				pmu = &type->pmus[i];
   1368
   1369				uncore_pci_pmu_register(pdev, type, pmu, die);
   1370			}
   1371		}
   1372	}
   1373
   1374	bus_register_notifier(&pci_bus_type, &uncore_pci_notifier);
   1375}
   1376
   1377static int __init uncore_pci_init(void)
   1378{
   1379	size_t size;
   1380	int ret;
   1381
   1382	size = uncore_max_dies() * sizeof(struct pci_extra_dev);
   1383	uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
   1384	if (!uncore_extra_pci_dev) {
   1385		ret = -ENOMEM;
   1386		goto err;
   1387	}
   1388
   1389	ret = uncore_types_init(uncore_pci_uncores, false);
   1390	if (ret)
   1391		goto errtype;
   1392
   1393	if (uncore_pci_driver) {
   1394		uncore_pci_driver->probe = uncore_pci_probe;
   1395		uncore_pci_driver->remove = uncore_pci_remove;
   1396
   1397		ret = pci_register_driver(uncore_pci_driver);
   1398		if (ret)
   1399			goto errtype;
   1400	} else
   1401		uncore_pci_pmus_register();
   1402
   1403	if (uncore_pci_sub_driver)
   1404		uncore_pci_sub_driver_init();
   1405
   1406	pcidrv_registered = true;
   1407	return 0;
   1408
   1409errtype:
   1410	uncore_types_exit(uncore_pci_uncores);
   1411	kfree(uncore_extra_pci_dev);
   1412	uncore_extra_pci_dev = NULL;
   1413	uncore_free_pcibus_map();
   1414err:
   1415	uncore_pci_uncores = empty_uncore;
   1416	return ret;
   1417}
   1418
   1419static void uncore_pci_exit(void)
   1420{
   1421	if (pcidrv_registered) {
   1422		pcidrv_registered = false;
   1423		if (uncore_pci_sub_driver)
   1424			bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier);
   1425		if (uncore_pci_driver)
   1426			pci_unregister_driver(uncore_pci_driver);
   1427		else
   1428			bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
   1429		uncore_types_exit(uncore_pci_uncores);
   1430		kfree(uncore_extra_pci_dev);
   1431		uncore_free_pcibus_map();
   1432	}
   1433}
   1434
   1435static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
   1436				   int new_cpu)
   1437{
   1438	struct intel_uncore_pmu *pmu = type->pmus;
   1439	struct intel_uncore_box *box;
   1440	int i, die;
   1441
   1442	die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
   1443	for (i = 0; i < type->num_boxes; i++, pmu++) {
   1444		box = pmu->boxes[die];
   1445		if (!box)
   1446			continue;
   1447
   1448		if (old_cpu < 0) {
   1449			WARN_ON_ONCE(box->cpu != -1);
   1450			box->cpu = new_cpu;
   1451			continue;
   1452		}
   1453
   1454		WARN_ON_ONCE(box->cpu != old_cpu);
   1455		box->cpu = -1;
   1456		if (new_cpu < 0)
   1457			continue;
   1458
   1459		uncore_pmu_cancel_hrtimer(box);
   1460		perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
   1461		box->cpu = new_cpu;
   1462	}
   1463}
   1464
   1465static void uncore_change_context(struct intel_uncore_type **uncores,
   1466				  int old_cpu, int new_cpu)
   1467{
   1468	for (; *uncores; uncores++)
   1469		uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
   1470}
   1471
   1472static void uncore_box_unref(struct intel_uncore_type **types, int id)
   1473{
   1474	struct intel_uncore_type *type;
   1475	struct intel_uncore_pmu *pmu;
   1476	struct intel_uncore_box *box;
   1477	int i;
   1478
   1479	for (; *types; types++) {
   1480		type = *types;
   1481		pmu = type->pmus;
   1482		for (i = 0; i < type->num_boxes; i++, pmu++) {
   1483			box = pmu->boxes[id];
   1484			if (box && atomic_dec_return(&box->refcnt) == 0)
   1485				uncore_box_exit(box);
   1486		}
   1487	}
   1488}
   1489
   1490static int uncore_event_cpu_offline(unsigned int cpu)
   1491{
   1492	int die, target;
   1493
   1494	/* Check if exiting cpu is used for collecting uncore events */
   1495	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
   1496		goto unref;
   1497	/* Find a new cpu to collect uncore events */
   1498	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
   1499
   1500	/* Migrate uncore events to the new target */
   1501	if (target < nr_cpu_ids)
   1502		cpumask_set_cpu(target, &uncore_cpu_mask);
   1503	else
   1504		target = -1;
   1505
   1506	uncore_change_context(uncore_msr_uncores, cpu, target);
   1507	uncore_change_context(uncore_mmio_uncores, cpu, target);
   1508	uncore_change_context(uncore_pci_uncores, cpu, target);
   1509
   1510unref:
   1511	/* Clear the references */
   1512	die = topology_logical_die_id(cpu);
   1513	uncore_box_unref(uncore_msr_uncores, die);
   1514	uncore_box_unref(uncore_mmio_uncores, die);
   1515	return 0;
   1516}
   1517
   1518static int allocate_boxes(struct intel_uncore_type **types,
   1519			 unsigned int die, unsigned int cpu)
   1520{
   1521	struct intel_uncore_box *box, *tmp;
   1522	struct intel_uncore_type *type;
   1523	struct intel_uncore_pmu *pmu;
   1524	LIST_HEAD(allocated);
   1525	int i;
   1526
   1527	/* Try to allocate all required boxes */
   1528	for (; *types; types++) {
   1529		type = *types;
   1530		pmu = type->pmus;
   1531		for (i = 0; i < type->num_boxes; i++, pmu++) {
   1532			if (pmu->boxes[die])
   1533				continue;
   1534			box = uncore_alloc_box(type, cpu_to_node(cpu));
   1535			if (!box)
   1536				goto cleanup;
   1537			box->pmu = pmu;
   1538			box->dieid = die;
   1539			list_add(&box->active_list, &allocated);
   1540		}
   1541	}
   1542	/* Install them in the pmus */
   1543	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
   1544		list_del_init(&box->active_list);
   1545		box->pmu->boxes[die] = box;
   1546	}
   1547	return 0;
   1548
   1549cleanup:
   1550	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
   1551		list_del_init(&box->active_list);
   1552		kfree(box);
   1553	}
   1554	return -ENOMEM;
   1555}
   1556
   1557static int uncore_box_ref(struct intel_uncore_type **types,
   1558			  int id, unsigned int cpu)
   1559{
   1560	struct intel_uncore_type *type;
   1561	struct intel_uncore_pmu *pmu;
   1562	struct intel_uncore_box *box;
   1563	int i, ret;
   1564
   1565	ret = allocate_boxes(types, id, cpu);
   1566	if (ret)
   1567		return ret;
   1568
   1569	for (; *types; types++) {
   1570		type = *types;
   1571		pmu = type->pmus;
   1572		for (i = 0; i < type->num_boxes; i++, pmu++) {
   1573			box = pmu->boxes[id];
   1574			if (box && atomic_inc_return(&box->refcnt) == 1)
   1575				uncore_box_init(box);
   1576		}
   1577	}
   1578	return 0;
   1579}
   1580
   1581static int uncore_event_cpu_online(unsigned int cpu)
   1582{
   1583	int die, target, msr_ret, mmio_ret;
   1584
   1585	die = topology_logical_die_id(cpu);
   1586	msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
   1587	mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
   1588	if (msr_ret && mmio_ret)
   1589		return -ENOMEM;
   1590
   1591	/*
   1592	 * Check if there is an online cpu in the package
   1593	 * which collects uncore events already.
   1594	 */
   1595	target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
   1596	if (target < nr_cpu_ids)
   1597		return 0;
   1598
   1599	cpumask_set_cpu(cpu, &uncore_cpu_mask);
   1600
   1601	if (!msr_ret)
   1602		uncore_change_context(uncore_msr_uncores, -1, cpu);
   1603	if (!mmio_ret)
   1604		uncore_change_context(uncore_mmio_uncores, -1, cpu);
   1605	uncore_change_context(uncore_pci_uncores, -1, cpu);
   1606	return 0;
   1607}
   1608
   1609static int __init type_pmu_register(struct intel_uncore_type *type)
   1610{
   1611	int i, ret;
   1612
   1613	for (i = 0; i < type->num_boxes; i++) {
   1614		ret = uncore_pmu_register(&type->pmus[i]);
   1615		if (ret)
   1616			return ret;
   1617	}
   1618	return 0;
   1619}
   1620
   1621static int __init uncore_msr_pmus_register(void)
   1622{
   1623	struct intel_uncore_type **types = uncore_msr_uncores;
   1624	int ret;
   1625
   1626	for (; *types; types++) {
   1627		ret = type_pmu_register(*types);
   1628		if (ret)
   1629			return ret;
   1630	}
   1631	return 0;
   1632}
   1633
   1634static int __init uncore_cpu_init(void)
   1635{
   1636	int ret;
   1637
   1638	ret = uncore_types_init(uncore_msr_uncores, true);
   1639	if (ret)
   1640		goto err;
   1641
   1642	ret = uncore_msr_pmus_register();
   1643	if (ret)
   1644		goto err;
   1645	return 0;
   1646err:
   1647	uncore_types_exit(uncore_msr_uncores);
   1648	uncore_msr_uncores = empty_uncore;
   1649	return ret;
   1650}
   1651
   1652static int __init uncore_mmio_init(void)
   1653{
   1654	struct intel_uncore_type **types = uncore_mmio_uncores;
   1655	int ret;
   1656
   1657	ret = uncore_types_init(types, true);
   1658	if (ret)
   1659		goto err;
   1660
   1661	for (; *types; types++) {
   1662		ret = type_pmu_register(*types);
   1663		if (ret)
   1664			goto err;
   1665	}
   1666	return 0;
   1667err:
   1668	uncore_types_exit(uncore_mmio_uncores);
   1669	uncore_mmio_uncores = empty_uncore;
   1670	return ret;
   1671}
   1672
   1673struct intel_uncore_init_fun {
   1674	void	(*cpu_init)(void);
   1675	int	(*pci_init)(void);
   1676	void	(*mmio_init)(void);
   1677	bool	use_discovery;
   1678};
   1679
   1680static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
   1681	.cpu_init = nhm_uncore_cpu_init,
   1682};
   1683
   1684static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
   1685	.cpu_init = snb_uncore_cpu_init,
   1686	.pci_init = snb_uncore_pci_init,
   1687};
   1688
   1689static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
   1690	.cpu_init = snb_uncore_cpu_init,
   1691	.pci_init = ivb_uncore_pci_init,
   1692};
   1693
   1694static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
   1695	.cpu_init = snb_uncore_cpu_init,
   1696	.pci_init = hsw_uncore_pci_init,
   1697};
   1698
   1699static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
   1700	.cpu_init = snb_uncore_cpu_init,
   1701	.pci_init = bdw_uncore_pci_init,
   1702};
   1703
   1704static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
   1705	.cpu_init = snbep_uncore_cpu_init,
   1706	.pci_init = snbep_uncore_pci_init,
   1707};
   1708
   1709static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
   1710	.cpu_init = nhmex_uncore_cpu_init,
   1711};
   1712
   1713static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
   1714	.cpu_init = ivbep_uncore_cpu_init,
   1715	.pci_init = ivbep_uncore_pci_init,
   1716};
   1717
   1718static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
   1719	.cpu_init = hswep_uncore_cpu_init,
   1720	.pci_init = hswep_uncore_pci_init,
   1721};
   1722
   1723static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
   1724	.cpu_init = bdx_uncore_cpu_init,
   1725	.pci_init = bdx_uncore_pci_init,
   1726};
   1727
   1728static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
   1729	.cpu_init = knl_uncore_cpu_init,
   1730	.pci_init = knl_uncore_pci_init,
   1731};
   1732
   1733static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
   1734	.cpu_init = skl_uncore_cpu_init,
   1735	.pci_init = skl_uncore_pci_init,
   1736};
   1737
   1738static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
   1739	.cpu_init = skx_uncore_cpu_init,
   1740	.pci_init = skx_uncore_pci_init,
   1741};
   1742
   1743static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
   1744	.cpu_init = icl_uncore_cpu_init,
   1745	.pci_init = skl_uncore_pci_init,
   1746};
   1747
   1748static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
   1749	.cpu_init = tgl_uncore_cpu_init,
   1750	.mmio_init = tgl_uncore_mmio_init,
   1751};
   1752
   1753static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
   1754	.cpu_init = tgl_uncore_cpu_init,
   1755	.mmio_init = tgl_l_uncore_mmio_init,
   1756};
   1757
   1758static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
   1759	.cpu_init = tgl_uncore_cpu_init,
   1760	.pci_init = skl_uncore_pci_init,
   1761};
   1762
   1763static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
   1764	.cpu_init = adl_uncore_cpu_init,
   1765	.mmio_init = adl_uncore_mmio_init,
   1766};
   1767
   1768static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
   1769	.cpu_init = icx_uncore_cpu_init,
   1770	.pci_init = icx_uncore_pci_init,
   1771	.mmio_init = icx_uncore_mmio_init,
   1772};
   1773
   1774static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
   1775	.cpu_init = snr_uncore_cpu_init,
   1776	.pci_init = snr_uncore_pci_init,
   1777	.mmio_init = snr_uncore_mmio_init,
   1778};
   1779
   1780static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
   1781	.cpu_init = spr_uncore_cpu_init,
   1782	.pci_init = spr_uncore_pci_init,
   1783	.mmio_init = spr_uncore_mmio_init,
   1784	.use_discovery = true,
   1785};
   1786
   1787static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
   1788	.cpu_init = intel_uncore_generic_uncore_cpu_init,
   1789	.pci_init = intel_uncore_generic_uncore_pci_init,
   1790	.mmio_init = intel_uncore_generic_uncore_mmio_init,
   1791};
   1792
   1793static const struct x86_cpu_id intel_uncore_match[] __initconst = {
   1794	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&nhm_uncore_init),
   1795	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&nhm_uncore_init),
   1796	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,		&nhm_uncore_init),
   1797	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,		&nhm_uncore_init),
   1798	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&snb_uncore_init),
   1799	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&ivb_uncore_init),
   1800	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		&hsw_uncore_init),
   1801	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		&hsw_uncore_init),
   1802	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		&hsw_uncore_init),
   1803	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		&bdw_uncore_init),
   1804	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		&bdw_uncore_init),
   1805	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&snbep_uncore_init),
   1806	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		&nhmex_uncore_init),
   1807	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,		&nhmex_uncore_init),
   1808	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		&ivbep_uncore_init),
   1809	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		&hswep_uncore_init),
   1810	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		&bdx_uncore_init),
   1811	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		&bdx_uncore_init),
   1812	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&knl_uncore_init),
   1813	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&knl_uncore_init),
   1814	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		&skl_uncore_init),
   1815	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		&skl_uncore_init),
   1816	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&skx_uncore_init),
   1817	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		&skl_uncore_init),
   1818	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&skl_uncore_init),
   1819	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,		&skl_uncore_init),
   1820	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,		&skl_uncore_init),
   1821	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		&icl_uncore_init),
   1822	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,	&icl_uncore_init),
   1823	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		&icl_uncore_init),
   1824	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&icx_uncore_init),
   1825	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&icx_uncore_init),
   1826	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,		&tgl_l_uncore_init),
   1827	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,		&tgl_uncore_init),
   1828	X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,		&rkl_uncore_init),
   1829	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&adl_uncore_init),
   1830	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&adl_uncore_init),
   1831	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,		&adl_uncore_init),
   1832	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		&adl_uncore_init),
   1833	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,	&adl_uncore_init),
   1834	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&spr_uncore_init),
   1835	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&snr_uncore_init),
   1836	{},
   1837};
   1838MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
   1839
   1840static int __init intel_uncore_init(void)
   1841{
   1842	const struct x86_cpu_id *id;
   1843	struct intel_uncore_init_fun *uncore_init;
   1844	int pret = 0, cret = 0, mret = 0, ret;
   1845
   1846	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
   1847		return -ENODEV;
   1848
   1849	__uncore_max_dies =
   1850		topology_max_packages() * topology_max_die_per_package();
   1851
   1852	id = x86_match_cpu(intel_uncore_match);
   1853	if (!id) {
   1854		if (!uncore_no_discover && intel_uncore_has_discovery_tables())
   1855			uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
   1856		else
   1857			return -ENODEV;
   1858	} else {
   1859		uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
   1860		if (uncore_no_discover && uncore_init->use_discovery)
   1861			return -ENODEV;
   1862		if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
   1863			return -ENODEV;
   1864	}
   1865
   1866	if (uncore_init->pci_init) {
   1867		pret = uncore_init->pci_init();
   1868		if (!pret)
   1869			pret = uncore_pci_init();
   1870	}
   1871
   1872	if (uncore_init->cpu_init) {
   1873		uncore_init->cpu_init();
   1874		cret = uncore_cpu_init();
   1875	}
   1876
   1877	if (uncore_init->mmio_init) {
   1878		uncore_init->mmio_init();
   1879		mret = uncore_mmio_init();
   1880	}
   1881
   1882	if (cret && pret && mret) {
   1883		ret = -ENODEV;
   1884		goto free_discovery;
   1885	}
   1886
   1887	/* Install hotplug callbacks to setup the targets for each package */
   1888	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
   1889				"perf/x86/intel/uncore:online",
   1890				uncore_event_cpu_online,
   1891				uncore_event_cpu_offline);
   1892	if (ret)
   1893		goto err;
   1894	return 0;
   1895
   1896err:
   1897	uncore_types_exit(uncore_msr_uncores);
   1898	uncore_types_exit(uncore_mmio_uncores);
   1899	uncore_pci_exit();
   1900free_discovery:
   1901	intel_uncore_clear_discovery_tables();
   1902	return ret;
   1903}
   1904module_init(intel_uncore_init);
   1905
   1906static void __exit intel_uncore_exit(void)
   1907{
   1908	cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
   1909	uncore_types_exit(uncore_msr_uncores);
   1910	uncore_types_exit(uncore_mmio_uncores);
   1911	uncore_pci_exit();
   1912	intel_uncore_clear_discovery_tables();
   1913}
   1914module_exit(intel_uncore_exit);