cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cpumap.c (11103B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* cpumap.c: used for optimizing CPU assignment
      3 *
      4 * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com>
      5 */
      6
      7#include <linux/export.h>
      8#include <linux/slab.h>
      9#include <linux/kernel.h>
     10#include <linux/cpumask.h>
     11#include <linux/spinlock.h>
     12#include <asm/cpudata.h>
     13#include "cpumap.h"
     14
     15
     16enum {
     17	CPUINFO_LVL_ROOT = 0,
     18	CPUINFO_LVL_NODE,
     19	CPUINFO_LVL_CORE,
     20	CPUINFO_LVL_PROC,
     21	CPUINFO_LVL_MAX,
     22};
     23
     24enum {
     25	ROVER_NO_OP              = 0,
     26	/* Increment rover every time level is visited */
     27	ROVER_INC_ON_VISIT       = 1 << 0,
     28	/* Increment parent's rover every time rover wraps around */
     29	ROVER_INC_PARENT_ON_LOOP = 1 << 1,
     30};
     31
     32struct cpuinfo_node {
     33	int id;
     34	int level;
     35	int num_cpus;    /* Number of CPUs in this hierarchy */
     36	int parent_index;
     37	int child_start; /* Array index of the first child node */
     38	int child_end;   /* Array index of the last child node */
     39	int rover;       /* Child node iterator */
     40};
     41
     42struct cpuinfo_level {
     43	int start_index; /* Index of first node of a level in a cpuinfo tree */
     44	int end_index;   /* Index of last node of a level in a cpuinfo tree */
     45	int num_nodes;   /* Number of nodes in a level in a cpuinfo tree */
     46};
     47
     48struct cpuinfo_tree {
     49	int total_nodes;
     50
     51	/* Offsets into nodes[] for each level of the tree */
     52	struct cpuinfo_level level[CPUINFO_LVL_MAX];
     53	struct cpuinfo_node  nodes[];
     54};
     55
     56
     57static struct cpuinfo_tree *cpuinfo_tree;
     58
     59static u16 cpu_distribution_map[NR_CPUS];
     60static DEFINE_SPINLOCK(cpu_map_lock);
     61
     62
     63/* Niagara optimized cpuinfo tree traversal. */
     64static const int niagara_iterate_method[] = {
     65	[CPUINFO_LVL_ROOT] = ROVER_NO_OP,
     66
     67	/* Strands (or virtual CPUs) within a core may not run concurrently
     68	 * on the Niagara, as instruction pipeline(s) are shared.  Distribute
     69	 * work to strands in different cores first for better concurrency.
     70	 * Go to next NUMA node when all cores are used.
     71	 */
     72	[CPUINFO_LVL_NODE] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
     73
     74	/* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
     75	 * a proc_id represents an instruction pipeline.  Distribute work to
     76	 * strands in different proc_id groups if the core has multiple
     77	 * instruction pipelines (e.g. the Niagara 2/2+ has two).
     78	 */
     79	[CPUINFO_LVL_CORE] = ROVER_INC_ON_VISIT,
     80
     81	/* Pick the next strand in the proc_id group. */
     82	[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT,
     83};
     84
     85/* Generic cpuinfo tree traversal.  Distribute work round robin across NUMA
     86 * nodes.
     87 */
     88static const int generic_iterate_method[] = {
     89	[CPUINFO_LVL_ROOT] = ROVER_INC_ON_VISIT,
     90	[CPUINFO_LVL_NODE] = ROVER_NO_OP,
     91	[CPUINFO_LVL_CORE] = ROVER_INC_PARENT_ON_LOOP,
     92	[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
     93};
     94
     95
     96static int cpuinfo_id(int cpu, int level)
     97{
     98	int id;
     99
    100	switch (level) {
    101	case CPUINFO_LVL_ROOT:
    102		id = 0;
    103		break;
    104	case CPUINFO_LVL_NODE:
    105		id = cpu_to_node(cpu);
    106		break;
    107	case CPUINFO_LVL_CORE:
    108		id = cpu_data(cpu).core_id;
    109		break;
    110	case CPUINFO_LVL_PROC:
    111		id = cpu_data(cpu).proc_id;
    112		break;
    113	default:
    114		id = -EINVAL;
    115	}
    116	return id;
    117}
    118
    119/*
    120 * Enumerate the CPU information in __cpu_data to determine the start index,
    121 * end index, and number of nodes for each level in the cpuinfo tree.  The
    122 * total number of cpuinfo nodes required to build the tree is returned.
    123 */
    124static int enumerate_cpuinfo_nodes(struct cpuinfo_level *tree_level)
    125{
    126	int prev_id[CPUINFO_LVL_MAX];
    127	int i, n, num_nodes;
    128
    129	for (i = CPUINFO_LVL_ROOT; i < CPUINFO_LVL_MAX; i++) {
    130		struct cpuinfo_level *lv = &tree_level[i];
    131
    132		prev_id[i] = -1;
    133		lv->start_index = lv->end_index = lv->num_nodes = 0;
    134	}
    135
    136	num_nodes = 1; /* Include the root node */
    137
    138	for (i = 0; i < num_possible_cpus(); i++) {
    139		if (!cpu_online(i))
    140			continue;
    141
    142		n = cpuinfo_id(i, CPUINFO_LVL_NODE);
    143		if (n > prev_id[CPUINFO_LVL_NODE]) {
    144			tree_level[CPUINFO_LVL_NODE].num_nodes++;
    145			prev_id[CPUINFO_LVL_NODE] = n;
    146			num_nodes++;
    147		}
    148		n = cpuinfo_id(i, CPUINFO_LVL_CORE);
    149		if (n > prev_id[CPUINFO_LVL_CORE]) {
    150			tree_level[CPUINFO_LVL_CORE].num_nodes++;
    151			prev_id[CPUINFO_LVL_CORE] = n;
    152			num_nodes++;
    153		}
    154		n = cpuinfo_id(i, CPUINFO_LVL_PROC);
    155		if (n > prev_id[CPUINFO_LVL_PROC]) {
    156			tree_level[CPUINFO_LVL_PROC].num_nodes++;
    157			prev_id[CPUINFO_LVL_PROC] = n;
    158			num_nodes++;
    159		}
    160	}
    161
    162	tree_level[CPUINFO_LVL_ROOT].num_nodes = 1;
    163
    164	n = tree_level[CPUINFO_LVL_NODE].num_nodes;
    165	tree_level[CPUINFO_LVL_NODE].start_index = 1;
    166	tree_level[CPUINFO_LVL_NODE].end_index   = n;
    167
    168	n++;
    169	tree_level[CPUINFO_LVL_CORE].start_index = n;
    170	n += tree_level[CPUINFO_LVL_CORE].num_nodes;
    171	tree_level[CPUINFO_LVL_CORE].end_index   = n - 1;
    172
    173	tree_level[CPUINFO_LVL_PROC].start_index = n;
    174	n += tree_level[CPUINFO_LVL_PROC].num_nodes;
    175	tree_level[CPUINFO_LVL_PROC].end_index   = n - 1;
    176
    177	return num_nodes;
    178}
    179
    180/* Build a tree representation of the CPU hierarchy using the per CPU
    181 * information in __cpu_data.  Entries in __cpu_data[0..NR_CPUS] are
    182 * assumed to be sorted in ascending order based on node, core_id, and
    183 * proc_id (in order of significance).
    184 */
    185static struct cpuinfo_tree *build_cpuinfo_tree(void)
    186{
    187	struct cpuinfo_tree *new_tree;
    188	struct cpuinfo_node *node;
    189	struct cpuinfo_level tmp_level[CPUINFO_LVL_MAX];
    190	int num_cpus[CPUINFO_LVL_MAX];
    191	int level_rover[CPUINFO_LVL_MAX];
    192	int prev_id[CPUINFO_LVL_MAX];
    193	int n, id, cpu, prev_cpu, last_cpu, level;
    194
    195	n = enumerate_cpuinfo_nodes(tmp_level);
    196
    197	new_tree = kzalloc(struct_size(new_tree, nodes, n), GFP_ATOMIC);
    198	if (!new_tree)
    199		return NULL;
    200
    201	new_tree->total_nodes = n;
    202	memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
    203
    204	prev_cpu = cpu = cpumask_first(cpu_online_mask);
    205
    206	/* Initialize all levels in the tree with the first CPU */
    207	for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
    208		n = new_tree->level[level].start_index;
    209
    210		level_rover[level] = n;
    211		node = &new_tree->nodes[n];
    212
    213		id = cpuinfo_id(cpu, level);
    214		if (unlikely(id < 0)) {
    215			kfree(new_tree);
    216			return NULL;
    217		}
    218		node->id = id;
    219		node->level = level;
    220		node->num_cpus = 1;
    221
    222		node->parent_index = (level > CPUINFO_LVL_ROOT)
    223		    ? new_tree->level[level - 1].start_index : -1;
    224
    225		node->child_start = node->child_end = node->rover =
    226		    (level == CPUINFO_LVL_PROC)
    227		    ? cpu : new_tree->level[level + 1].start_index;
    228
    229		prev_id[level] = node->id;
    230		num_cpus[level] = 1;
    231	}
    232
    233	for (last_cpu = (num_possible_cpus() - 1); last_cpu >= 0; last_cpu--) {
    234		if (cpu_online(last_cpu))
    235			break;
    236	}
    237
    238	while (++cpu <= last_cpu) {
    239		if (!cpu_online(cpu))
    240			continue;
    241
    242		for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT;
    243		     level--) {
    244			id = cpuinfo_id(cpu, level);
    245			if (unlikely(id < 0)) {
    246				kfree(new_tree);
    247				return NULL;
    248			}
    249
    250			if ((id != prev_id[level]) || (cpu == last_cpu)) {
    251				prev_id[level] = id;
    252				node = &new_tree->nodes[level_rover[level]];
    253				node->num_cpus = num_cpus[level];
    254				num_cpus[level] = 1;
    255
    256				if (cpu == last_cpu)
    257					node->num_cpus++;
    258
    259				/* Connect tree node to parent */
    260				if (level == CPUINFO_LVL_ROOT)
    261					node->parent_index = -1;
    262				else
    263					node->parent_index =
    264					    level_rover[level - 1];
    265
    266				if (level == CPUINFO_LVL_PROC) {
    267					node->child_end =
    268					    (cpu == last_cpu) ? cpu : prev_cpu;
    269				} else {
    270					node->child_end =
    271					    level_rover[level + 1] - 1;
    272				}
    273
    274				/* Initialize the next node in the same level */
    275				n = ++level_rover[level];
    276				if (n <= new_tree->level[level].end_index) {
    277					node = &new_tree->nodes[n];
    278					node->id = id;
    279					node->level = level;
    280
    281					/* Connect node to child */
    282					node->child_start = node->child_end =
    283					node->rover =
    284					    (level == CPUINFO_LVL_PROC)
    285					    ? cpu : level_rover[level + 1];
    286				}
    287			} else
    288				num_cpus[level]++;
    289		}
    290		prev_cpu = cpu;
    291	}
    292
    293	return new_tree;
    294}
    295
    296static void increment_rover(struct cpuinfo_tree *t, int node_index,
    297                            int root_index, const int *rover_inc_table)
    298{
    299	struct cpuinfo_node *node = &t->nodes[node_index];
    300	int top_level, level;
    301
    302	top_level = t->nodes[root_index].level;
    303	for (level = node->level; level >= top_level; level--) {
    304		node->rover++;
    305		if (node->rover <= node->child_end)
    306			return;
    307
    308		node->rover = node->child_start;
    309		/* If parent's rover does not need to be adjusted, stop here. */
    310		if ((level == top_level) ||
    311		    !(rover_inc_table[level] & ROVER_INC_PARENT_ON_LOOP))
    312			return;
    313
    314		node = &t->nodes[node->parent_index];
    315	}
    316}
    317
    318static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
    319{
    320	const int *rover_inc_table;
    321	int level, new_index, index = root_index;
    322
    323	switch (sun4v_chip_type) {
    324	case SUN4V_CHIP_NIAGARA1:
    325	case SUN4V_CHIP_NIAGARA2:
    326	case SUN4V_CHIP_NIAGARA3:
    327	case SUN4V_CHIP_NIAGARA4:
    328	case SUN4V_CHIP_NIAGARA5:
    329	case SUN4V_CHIP_SPARC_M6:
    330	case SUN4V_CHIP_SPARC_M7:
    331	case SUN4V_CHIP_SPARC_M8:
    332	case SUN4V_CHIP_SPARC_SN:
    333	case SUN4V_CHIP_SPARC64X:
    334		rover_inc_table = niagara_iterate_method;
    335		break;
    336	default:
    337		rover_inc_table = generic_iterate_method;
    338	}
    339
    340	for (level = t->nodes[root_index].level; level < CPUINFO_LVL_MAX;
    341	     level++) {
    342		new_index = t->nodes[index].rover;
    343		if (rover_inc_table[level] & ROVER_INC_ON_VISIT)
    344			increment_rover(t, index, root_index, rover_inc_table);
    345
    346		index = new_index;
    347	}
    348	return index;
    349}
    350
    351static void _cpu_map_rebuild(void)
    352{
    353	int i;
    354
    355	if (cpuinfo_tree) {
    356		kfree(cpuinfo_tree);
    357		cpuinfo_tree = NULL;
    358	}
    359
    360	cpuinfo_tree = build_cpuinfo_tree();
    361	if (!cpuinfo_tree)
    362		return;
    363
    364	/* Build CPU distribution map that spans all online CPUs.  No need
    365	 * to check if the CPU is online, as that is done when the cpuinfo
    366	 * tree is being built.
    367	 */
    368	for (i = 0; i < cpuinfo_tree->nodes[0].num_cpus; i++)
    369		cpu_distribution_map[i] = iterate_cpu(cpuinfo_tree, 0);
    370}
    371
    372/* Fallback if the cpuinfo tree could not be built.  CPU mapping is linear
    373 * round robin.
    374 */
    375static int simple_map_to_cpu(unsigned int index)
    376{
    377	int i, end, cpu_rover;
    378
    379	cpu_rover = 0;
    380	end = index % num_online_cpus();
    381	for (i = 0; i < num_possible_cpus(); i++) {
    382		if (cpu_online(cpu_rover)) {
    383			if (cpu_rover >= end)
    384				return cpu_rover;
    385
    386			cpu_rover++;
    387		}
    388	}
    389
    390	/* Impossible, since num_online_cpus() <= num_possible_cpus() */
    391	return cpumask_first(cpu_online_mask);
    392}
    393
    394static int _map_to_cpu(unsigned int index)
    395{
    396	struct cpuinfo_node *root_node;
    397
    398	if (unlikely(!cpuinfo_tree)) {
    399		_cpu_map_rebuild();
    400		if (!cpuinfo_tree)
    401			return simple_map_to_cpu(index);
    402	}
    403
    404	root_node = &cpuinfo_tree->nodes[0];
    405#ifdef CONFIG_HOTPLUG_CPU
    406	if (unlikely(root_node->num_cpus != num_online_cpus())) {
    407		_cpu_map_rebuild();
    408		if (!cpuinfo_tree)
    409			return simple_map_to_cpu(index);
    410	}
    411#endif
    412	return cpu_distribution_map[index % root_node->num_cpus];
    413}
    414
    415int map_to_cpu(unsigned int index)
    416{
    417	int mapped_cpu;
    418	unsigned long flag;
    419
    420	spin_lock_irqsave(&cpu_map_lock, flag);
    421	mapped_cpu = _map_to_cpu(index);
    422
    423#ifdef CONFIG_HOTPLUG_CPU
    424	while (unlikely(!cpu_online(mapped_cpu)))
    425		mapped_cpu = _map_to_cpu(index);
    426#endif
    427	spin_unlock_irqrestore(&cpu_map_lock, flag);
    428	return mapped_cpu;
    429}
    430EXPORT_SYMBOL(map_to_cpu);
    431
    432void cpu_map_rebuild(void)
    433{
    434	unsigned long flag;
    435
    436	spin_lock_irqsave(&cpu_map_lock, flag);
    437	_cpu_map_rebuild();
    438	spin_unlock_irqrestore(&cpu_map_lock, flag);
    439}