affinity.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
affinity.c (13255B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (C) 2016 Thomas Gleixner.
      4 * Copyright (C) 2016-2017 Christoph Hellwig.
      5 */
      6#include <linux/interrupt.h>
      7#include <linux/kernel.h>
      8#include <linux/slab.h>
      9#include <linux/cpu.h>
     10#include <linux/sort.h>
     11
     12static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
     13				unsigned int cpus_per_vec)
     14{
     15	const struct cpumask *siblmsk;
     16	int cpu, sibl;
     17
     18	for ( ; cpus_per_vec > 0; ) {
     19		cpu = cpumask_first(nmsk);
     20
     21		/* Should not happen, but I'm too lazy to think about it */
     22		if (cpu >= nr_cpu_ids)
     23			return;
     24
     25		cpumask_clear_cpu(cpu, nmsk);
     26		cpumask_set_cpu(cpu, irqmsk);
     27		cpus_per_vec--;
     28
     29		/* If the cpu has siblings, use them first */
     30		siblmsk = topology_sibling_cpumask(cpu);
     31		for (sibl = -1; cpus_per_vec > 0; ) {
     32			sibl = cpumask_next(sibl, siblmsk);
     33			if (sibl >= nr_cpu_ids)
     34				break;
     35			if (!cpumask_test_and_clear_cpu(sibl, nmsk))
     36				continue;
     37			cpumask_set_cpu(sibl, irqmsk);
     38			cpus_per_vec--;
     39		}
     40	}
     41}
     42
     43static cpumask_var_t *alloc_node_to_cpumask(void)
     44{
     45	cpumask_var_t *masks;
     46	int node;
     47
     48	masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
     49	if (!masks)
     50		return NULL;
     51
     52	for (node = 0; node < nr_node_ids; node++) {
     53		if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
     54			goto out_unwind;
     55	}
     56
     57	return masks;
     58
     59out_unwind:
     60	while (--node >= 0)
     61		free_cpumask_var(masks[node]);
     62	kfree(masks);
     63	return NULL;
     64}
     65
     66static void free_node_to_cpumask(cpumask_var_t *masks)
     67{
     68	int node;
     69
     70	for (node = 0; node < nr_node_ids; node++)
     71		free_cpumask_var(masks[node]);
     72	kfree(masks);
     73}
     74
     75static void build_node_to_cpumask(cpumask_var_t *masks)
     76{
     77	int cpu;
     78
     79	for_each_possible_cpu(cpu)
     80		cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
     81}
     82
     83static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
     84				const struct cpumask *mask, nodemask_t *nodemsk)
     85{
     86	int n, nodes = 0;
     87
     88	/* Calculate the number of nodes in the supplied affinity mask */
     89	for_each_node(n) {
     90		if (cpumask_intersects(mask, node_to_cpumask[n])) {
     91			node_set(n, *nodemsk);
     92			nodes++;
     93		}
     94	}
     95	return nodes;
     96}
     97
     98struct node_vectors {
     99	unsigned id;
    100
    101	union {
    102		unsigned nvectors;
    103		unsigned ncpus;
    104	};
    105};
    106
    107static int ncpus_cmp_func(const void *l, const void *r)
    108{
    109	const struct node_vectors *ln = l;
    110	const struct node_vectors *rn = r;
    111
    112	return ln->ncpus - rn->ncpus;
    113}
    114
    115/*
    116 * Allocate vector number for each node, so that for each node:
    117 *
    118 * 1) the allocated number is >= 1
    119 *
    120 * 2) the allocated numbver is <= active CPU number of this node
    121 *
    122 * The actual allocated total vectors may be less than @numvecs when
    123 * active total CPU number is less than @numvecs.
    124 *
    125 * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
    126 * for each node.
    127 */
    128static void alloc_nodes_vectors(unsigned int numvecs,
    129				cpumask_var_t *node_to_cpumask,
    130				const struct cpumask *cpu_mask,
    131				const nodemask_t nodemsk,
    132				struct cpumask *nmsk,
    133				struct node_vectors *node_vectors)
    134{
    135	unsigned n, remaining_ncpus = 0;
    136
    137	for (n = 0; n < nr_node_ids; n++) {
    138		node_vectors[n].id = n;
    139		node_vectors[n].ncpus = UINT_MAX;
    140	}
    141
    142	for_each_node_mask(n, nodemsk) {
    143		unsigned ncpus;
    144
    145		cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
    146		ncpus = cpumask_weight(nmsk);
    147
    148		if (!ncpus)
    149			continue;
    150		remaining_ncpus += ncpus;
    151		node_vectors[n].ncpus = ncpus;
    152	}
    153
    154	numvecs = min_t(unsigned, remaining_ncpus, numvecs);
    155
    156	sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]),
    157	     ncpus_cmp_func, NULL);
    158
    159	/*
    160	 * Allocate vectors for each node according to the ratio of this
    161	 * node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is
    162	 * bigger than number of active numa nodes. Always start the
    163	 * allocation from the node with minimized nr_cpus.
    164	 *
    165	 * This way guarantees that each active node gets allocated at
    166	 * least one vector, and the theory is simple: over-allocation
    167	 * is only done when this node is assigned by one vector, so
    168	 * other nodes will be allocated >= 1 vector, since 'numvecs' is
    169	 * bigger than number of numa nodes.
    170	 *
    171	 * One perfect invariant is that number of allocated vectors for
    172	 * each node is <= CPU count of this node:
    173	 *
    174	 * 1) suppose there are two nodes: A and B
    175	 * 	ncpu(X) is CPU count of node X
    176	 * 	vecs(X) is the vector count allocated to node X via this
    177	 * 	algorithm
    178	 *
    179	 * 	ncpu(A) <= ncpu(B)
    180	 * 	ncpu(A) + ncpu(B) = N
    181	 * 	vecs(A) + vecs(B) = V
    182	 *
    183	 * 	vecs(A) = max(1, round_down(V * ncpu(A) / N))
    184	 * 	vecs(B) = V - vecs(A)
    185	 *
    186	 * 	both N and V are integer, and 2 <= V <= N, suppose
    187	 * 	V = N - delta, and 0 <= delta <= N - 2
    188	 *
    189	 * 2) obviously vecs(A) <= ncpu(A) because:
    190	 *
    191	 * 	if vecs(A) is 1, then vecs(A) <= ncpu(A) given
    192	 * 	ncpu(A) >= 1
    193	 *
    194	 * 	otherwise,
    195	 * 		vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N
    196	 *
    197	 * 3) prove how vecs(B) <= ncpu(B):
    198	 *
    199	 * 	if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be
    200	 * 	over-allocated, so vecs(B) <= ncpu(B),
    201	 *
    202	 * 	otherwise:
    203	 *
    204	 * 	vecs(A) =
    205	 * 		round_down(V * ncpu(A) / N) =
    206	 * 		round_down((N - delta) * ncpu(A) / N) =
    207	 * 		round_down((N * ncpu(A) - delta * ncpu(A)) / N)	 >=
    208	 * 		round_down((N * ncpu(A) - delta * N) / N)	 =
    209	 * 		cpu(A) - delta
    210	 *
    211	 * 	then:
    212	 *
    213	 * 	vecs(A) - V >= ncpu(A) - delta - V
    214	 * 	=>
    215	 * 	V - vecs(A) <= V + delta - ncpu(A)
    216	 * 	=>
    217	 * 	vecs(B) <= N - ncpu(A)
    218	 * 	=>
    219	 * 	vecs(B) <= cpu(B)
    220	 *
    221	 * For nodes >= 3, it can be thought as one node and another big
    222	 * node given that is exactly what this algorithm is implemented,
    223	 * and we always re-calculate 'remaining_ncpus' & 'numvecs', and
    224	 * finally for each node X: vecs(X) <= ncpu(X).
    225	 *
    226	 */
    227	for (n = 0; n < nr_node_ids; n++) {
    228		unsigned nvectors, ncpus;
    229
    230		if (node_vectors[n].ncpus == UINT_MAX)
    231			continue;
    232
    233		WARN_ON_ONCE(numvecs == 0);
    234
    235		ncpus = node_vectors[n].ncpus;
    236		nvectors = max_t(unsigned, 1,
    237				 numvecs * ncpus / remaining_ncpus);
    238		WARN_ON_ONCE(nvectors > ncpus);
    239
    240		node_vectors[n].nvectors = nvectors;
    241
    242		remaining_ncpus -= ncpus;
    243		numvecs -= nvectors;
    244	}
    245}
    246
    247static int __irq_build_affinity_masks(unsigned int startvec,
    248				      unsigned int numvecs,
    249				      unsigned int firstvec,
    250				      cpumask_var_t *node_to_cpumask,
    251				      const struct cpumask *cpu_mask,
    252				      struct cpumask *nmsk,
    253				      struct irq_affinity_desc *masks)
    254{
    255	unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0;
    256	unsigned int last_affv = firstvec + numvecs;
    257	unsigned int curvec = startvec;
    258	nodemask_t nodemsk = NODE_MASK_NONE;
    259	struct node_vectors *node_vectors;
    260
    261	if (cpumask_empty(cpu_mask))
    262		return 0;
    263
    264	nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
    265
    266	/*
    267	 * If the number of nodes in the mask is greater than or equal the
    268	 * number of vectors we just spread the vectors across the nodes.
    269	 */
    270	if (numvecs <= nodes) {
    271		for_each_node_mask(n, nodemsk) {
    272			/* Ensure that only CPUs which are in both masks are set */
    273			cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
    274			cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk);
    275			if (++curvec == last_affv)
    276				curvec = firstvec;
    277		}
    278		return numvecs;
    279	}
    280
    281	node_vectors = kcalloc(nr_node_ids,
    282			       sizeof(struct node_vectors),
    283			       GFP_KERNEL);
    284	if (!node_vectors)
    285		return -ENOMEM;
    286
    287	/* allocate vector number for each node */
    288	alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask,
    289			    nodemsk, nmsk, node_vectors);
    290
    291	for (i = 0; i < nr_node_ids; i++) {
    292		unsigned int ncpus, v;
    293		struct node_vectors *nv = &node_vectors[i];
    294
    295		if (nv->nvectors == UINT_MAX)
    296			continue;
    297
    298		/* Get the cpus on this node which are in the mask */
    299		cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
    300		ncpus = cpumask_weight(nmsk);
    301		if (!ncpus)
    302			continue;
    303
    304		WARN_ON_ONCE(nv->nvectors > ncpus);
    305
    306		/* Account for rounding errors */
    307		extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors);
    308
    309		/* Spread allocated vectors on CPUs of the current node */
    310		for (v = 0; v < nv->nvectors; v++, curvec++) {
    311			cpus_per_vec = ncpus / nv->nvectors;
    312
    313			/* Account for extra vectors to compensate rounding errors */
    314			if (extra_vecs) {
    315				cpus_per_vec++;
    316				--extra_vecs;
    317			}
    318
    319			/*
    320			 * wrapping has to be considered given 'startvec'
    321			 * may start anywhere
    322			 */
    323			if (curvec >= last_affv)
    324				curvec = firstvec;
    325			irq_spread_init_one(&masks[curvec].mask, nmsk,
    326						cpus_per_vec);
    327		}
    328		done += nv->nvectors;
    329	}
    330	kfree(node_vectors);
    331	return done;
    332}
    333
    334/*
    335 * build affinity in two stages:
    336 *	1) spread present CPU on these vectors
    337 *	2) spread other possible CPUs on these vectors
    338 */
    339static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
    340				    unsigned int firstvec,
    341				    struct irq_affinity_desc *masks)
    342{
    343	unsigned int curvec = startvec, nr_present = 0, nr_others = 0;
    344	cpumask_var_t *node_to_cpumask;
    345	cpumask_var_t nmsk, npresmsk;
    346	int ret = -ENOMEM;
    347
    348	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
    349		return ret;
    350
    351	if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
    352		goto fail_nmsk;
    353
    354	node_to_cpumask = alloc_node_to_cpumask();
    355	if (!node_to_cpumask)
    356		goto fail_npresmsk;
    357
    358	/* Stabilize the cpumasks */
    359	cpus_read_lock();
    360	build_node_to_cpumask(node_to_cpumask);
    361
    362	/* Spread on present CPUs starting from affd->pre_vectors */
    363	ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
    364					 node_to_cpumask, cpu_present_mask,
    365					 nmsk, masks);
    366	if (ret < 0)
    367		goto fail_build_affinity;
    368	nr_present = ret;
    369
    370	/*
    371	 * Spread on non present CPUs starting from the next vector to be
    372	 * handled. If the spreading of present CPUs already exhausted the
    373	 * vector space, assign the non present CPUs to the already spread
    374	 * out vectors.
    375	 */
    376	if (nr_present >= numvecs)
    377		curvec = firstvec;
    378	else
    379		curvec = firstvec + nr_present;
    380	cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
    381	ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
    382					 node_to_cpumask, npresmsk, nmsk,
    383					 masks);
    384	if (ret >= 0)
    385		nr_others = ret;
    386
    387 fail_build_affinity:
    388	cpus_read_unlock();
    389
    390	if (ret >= 0)
    391		WARN_ON(nr_present + nr_others < numvecs);
    392
    393	free_node_to_cpumask(node_to_cpumask);
    394
    395 fail_npresmsk:
    396	free_cpumask_var(npresmsk);
    397
    398 fail_nmsk:
    399	free_cpumask_var(nmsk);
    400	return ret < 0 ? ret : 0;
    401}
    402
    403static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
    404{
    405	affd->nr_sets = 1;
    406	affd->set_size[0] = affvecs;
    407}
    408
    409/**
    410 * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
    411 * @nvecs:	The total number of vectors
    412 * @affd:	Description of the affinity requirements
    413 *
    414 * Returns the irq_affinity_desc pointer or NULL if allocation failed.
    415 */
    416struct irq_affinity_desc *
    417irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
    418{
    419	unsigned int affvecs, curvec, usedvecs, i;
    420	struct irq_affinity_desc *masks = NULL;
    421
    422	/*
    423	 * Determine the number of vectors which need interrupt affinities
    424	 * assigned. If the pre/post request exhausts the available vectors
    425	 * then nothing to do here except for invoking the calc_sets()
    426	 * callback so the device driver can adjust to the situation.
    427	 */
    428	if (nvecs > affd->pre_vectors + affd->post_vectors)
    429		affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
    430	else
    431		affvecs = 0;
    432
    433	/*
    434	 * Simple invocations do not provide a calc_sets() callback. Install
    435	 * the generic one.
    436	 */
    437	if (!affd->calc_sets)
    438		affd->calc_sets = default_calc_sets;
    439
    440	/* Recalculate the sets */
    441	affd->calc_sets(affd, affvecs);
    442
    443	if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
    444		return NULL;
    445
    446	/* Nothing to assign? */
    447	if (!affvecs)
    448		return NULL;
    449
    450	masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
    451	if (!masks)
    452		return NULL;
    453
    454	/* Fill out vectors at the beginning that don't need affinity */
    455	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
    456		cpumask_copy(&masks[curvec].mask, irq_default_affinity);
    457
    458	/*
    459	 * Spread on present CPUs starting from affd->pre_vectors. If we
    460	 * have multiple sets, build each sets affinity mask separately.
    461	 */
    462	for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
    463		unsigned int this_vecs = affd->set_size[i];
    464		int ret;
    465
    466		ret = irq_build_affinity_masks(curvec, this_vecs,
    467					       curvec, masks);
    468		if (ret) {
    469			kfree(masks);
    470			return NULL;
    471		}
    472		curvec += this_vecs;
    473		usedvecs += this_vecs;
    474	}
    475
    476	/* Fill out vectors at the end that don't need affinity */
    477	if (usedvecs >= affvecs)
    478		curvec = affd->pre_vectors + affvecs;
    479	else
    480		curvec = affd->pre_vectors + usedvecs;
    481	for (; curvec < nvecs; curvec++)
    482		cpumask_copy(&masks[curvec].mask, irq_default_affinity);
    483
    484	/* Mark the managed interrupts */
    485	for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
    486		masks[i].is_managed = 1;
    487
    488	return masks;
    489}
    490
    491/**
    492 * irq_calc_affinity_vectors - Calculate the optimal number of vectors
    493 * @minvec:	The minimum number of vectors available
    494 * @maxvec:	The maximum number of vectors available
    495 * @affd:	Description of the affinity requirements
    496 */
    497unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
    498				       const struct irq_affinity *affd)
    499{
    500	unsigned int resv = affd->pre_vectors + affd->post_vectors;
    501	unsigned int set_vecs;
    502
    503	if (resv > minvec)
    504		return 0;
    505
    506	if (affd->calc_sets) {
    507		set_vecs = maxvec - resv;
    508	} else {
    509		cpus_read_lock();
    510		set_vecs = cpumask_weight(cpu_possible_mask);
    511		cpus_read_unlock();
    512	}
    513
    514	return resv + min(set_vecs, maxvec - resv);
    515}