cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

arch_numa.c (11096B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * NUMA support, based on the x86 implementation.
      4 *
      5 * Copyright (C) 2015 Cavium Inc.
      6 * Author: Ganapatrao Kulkarni <gkulkarni@cavium.com>
      7 */
      8
      9#define pr_fmt(fmt) "NUMA: " fmt
     10
     11#include <linux/acpi.h>
     12#include <linux/memblock.h>
     13#include <linux/module.h>
     14#include <linux/of.h>
     15
     16#include <asm/sections.h>
     17
     18struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
     19EXPORT_SYMBOL(node_data);
     20nodemask_t numa_nodes_parsed __initdata;
     21static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
     22
     23static int numa_distance_cnt;
     24static u8 *numa_distance;
     25bool numa_off;
     26
     27static __init int numa_parse_early_param(char *opt)
     28{
     29	if (!opt)
     30		return -EINVAL;
     31	if (str_has_prefix(opt, "off"))
     32		numa_off = true;
     33
     34	return 0;
     35}
     36early_param("numa", numa_parse_early_param);
     37
     38cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
     39EXPORT_SYMBOL(node_to_cpumask_map);
     40
     41#ifdef CONFIG_DEBUG_PER_CPU_MAPS
     42
     43/*
     44 * Returns a pointer to the bitmask of CPUs on Node 'node'.
     45 */
     46const struct cpumask *cpumask_of_node(int node)
     47{
     48
     49	if (node == NUMA_NO_NODE)
     50		return cpu_all_mask;
     51
     52	if (WARN_ON(node < 0 || node >= nr_node_ids))
     53		return cpu_none_mask;
     54
     55	if (WARN_ON(node_to_cpumask_map[node] == NULL))
     56		return cpu_online_mask;
     57
     58	return node_to_cpumask_map[node];
     59}
     60EXPORT_SYMBOL(cpumask_of_node);
     61
     62#endif
     63
     64static void numa_update_cpu(unsigned int cpu, bool remove)
     65{
     66	int nid = cpu_to_node(cpu);
     67
     68	if (nid == NUMA_NO_NODE)
     69		return;
     70
     71	if (remove)
     72		cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]);
     73	else
     74		cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
     75}
     76
     77void numa_add_cpu(unsigned int cpu)
     78{
     79	numa_update_cpu(cpu, false);
     80}
     81
     82void numa_remove_cpu(unsigned int cpu)
     83{
     84	numa_update_cpu(cpu, true);
     85}
     86
     87void numa_clear_node(unsigned int cpu)
     88{
     89	numa_remove_cpu(cpu);
     90	set_cpu_numa_node(cpu, NUMA_NO_NODE);
     91}
     92
     93/*
     94 * Allocate node_to_cpumask_map based on number of available nodes
     95 * Requires node_possible_map to be valid.
     96 *
     97 * Note: cpumask_of_node() is not valid until after this is done.
     98 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
     99 */
    100static void __init setup_node_to_cpumask_map(void)
    101{
    102	int node;
    103
    104	/* setup nr_node_ids if not done yet */
    105	if (nr_node_ids == MAX_NUMNODES)
    106		setup_nr_node_ids();
    107
    108	/* allocate and clear the mapping */
    109	for (node = 0; node < nr_node_ids; node++) {
    110		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
    111		cpumask_clear(node_to_cpumask_map[node]);
    112	}
    113
    114	/* cpumask_of_node() will now work */
    115	pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
    116}
    117
    118/*
    119 * Set the cpu to node and mem mapping
    120 */
    121void numa_store_cpu_info(unsigned int cpu)
    122{
    123	set_cpu_numa_node(cpu, cpu_to_node_map[cpu]);
    124}
    125
    126void __init early_map_cpu_to_node(unsigned int cpu, int nid)
    127{
    128	/* fallback to node 0 */
    129	if (nid < 0 || nid >= MAX_NUMNODES || numa_off)
    130		nid = 0;
    131
    132	cpu_to_node_map[cpu] = nid;
    133
    134	/*
    135	 * We should set the numa node of cpu0 as soon as possible, because it
    136	 * has already been set up online before. cpu_to_node(0) will soon be
    137	 * called.
    138	 */
    139	if (!cpu)
    140		set_cpu_numa_node(cpu, nid);
    141}
    142
    143#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
    144unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
    145EXPORT_SYMBOL(__per_cpu_offset);
    146
    147static int __init early_cpu_to_node(int cpu)
    148{
    149	return cpu_to_node_map[cpu];
    150}
    151
    152static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
    153{
    154	return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
    155}
    156
    157void __init setup_per_cpu_areas(void)
    158{
    159	unsigned long delta;
    160	unsigned int cpu;
    161	int rc = -EINVAL;
    162
    163	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
    164		/*
    165		 * Always reserve area for module percpu variables.  That's
    166		 * what the legacy allocator did.
    167		 */
    168		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
    169					    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
    170					    pcpu_cpu_distance,
    171					    early_cpu_to_node);
    172#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
    173		if (rc < 0)
    174			pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
    175				   pcpu_fc_names[pcpu_chosen_fc], rc);
    176#endif
    177	}
    178
    179#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
    180	if (rc < 0)
    181		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
    182#endif
    183	if (rc < 0)
    184		panic("Failed to initialize percpu areas (err=%d).", rc);
    185
    186	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
    187	for_each_possible_cpu(cpu)
    188		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
    189}
    190#endif
    191
    192/**
    193 * numa_add_memblk() - Set node id to memblk
    194 * @nid: NUMA node ID of the new memblk
    195 * @start: Start address of the new memblk
    196 * @end:  End address of the new memblk
    197 *
    198 * RETURNS:
    199 * 0 on success, -errno on failure.
    200 */
    201int __init numa_add_memblk(int nid, u64 start, u64 end)
    202{
    203	int ret;
    204
    205	ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
    206	if (ret < 0) {
    207		pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n",
    208			start, (end - 1), nid);
    209		return ret;
    210	}
    211
    212	node_set(nid, numa_nodes_parsed);
    213	return ret;
    214}
    215
    216/*
    217 * Initialize NODE_DATA for a node on the local memory
    218 */
    219static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
    220{
    221	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
    222	u64 nd_pa;
    223	void *nd;
    224	int tnid;
    225
    226	if (start_pfn >= end_pfn)
    227		pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
    228
    229	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
    230	if (!nd_pa)
    231		panic("Cannot allocate %zu bytes for node %d data\n",
    232		      nd_size, nid);
    233
    234	nd = __va(nd_pa);
    235
    236	/* report and initialize */
    237	pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n",
    238		nd_pa, nd_pa + nd_size - 1);
    239	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
    240	if (tnid != nid)
    241		pr_info("NODE_DATA(%d) on node %d\n", nid, tnid);
    242
    243	node_data[nid] = nd;
    244	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
    245	NODE_DATA(nid)->node_id = nid;
    246	NODE_DATA(nid)->node_start_pfn = start_pfn;
    247	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
    248}
    249
    250/*
    251 * numa_free_distance
    252 *
    253 * The current table is freed.
    254 */
    255void __init numa_free_distance(void)
    256{
    257	size_t size;
    258
    259	if (!numa_distance)
    260		return;
    261
    262	size = numa_distance_cnt * numa_distance_cnt *
    263		sizeof(numa_distance[0]);
    264
    265	memblock_free(numa_distance, size);
    266	numa_distance_cnt = 0;
    267	numa_distance = NULL;
    268}
    269
    270/*
    271 * Create a new NUMA distance table.
    272 */
    273static int __init numa_alloc_distance(void)
    274{
    275	size_t size;
    276	int i, j;
    277
    278	size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
    279	numa_distance = memblock_alloc(size, PAGE_SIZE);
    280	if (WARN_ON(!numa_distance))
    281		return -ENOMEM;
    282
    283	numa_distance_cnt = nr_node_ids;
    284
    285	/* fill with the default distances */
    286	for (i = 0; i < numa_distance_cnt; i++)
    287		for (j = 0; j < numa_distance_cnt; j++)
    288			numa_distance[i * numa_distance_cnt + j] = i == j ?
    289				LOCAL_DISTANCE : REMOTE_DISTANCE;
    290
    291	pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt);
    292
    293	return 0;
    294}
    295
    296/**
    297 * numa_set_distance() - Set inter node NUMA distance from node to node.
    298 * @from: the 'from' node to set distance
    299 * @to: the 'to'  node to set distance
    300 * @distance: NUMA distance
    301 *
    302 * Set the distance from node @from to @to to @distance.
    303 * If distance table doesn't exist, a warning is printed.
    304 *
    305 * If @from or @to is higher than the highest known node or lower than zero
    306 * or @distance doesn't make sense, the call is ignored.
    307 */
    308void __init numa_set_distance(int from, int to, int distance)
    309{
    310	if (!numa_distance) {
    311		pr_warn_once("Warning: distance table not allocated yet\n");
    312		return;
    313	}
    314
    315	if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
    316			from < 0 || to < 0) {
    317		pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
    318			    from, to, distance);
    319		return;
    320	}
    321
    322	if ((u8)distance != distance ||
    323	    (from == to && distance != LOCAL_DISTANCE)) {
    324		pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
    325			     from, to, distance);
    326		return;
    327	}
    328
    329	numa_distance[from * numa_distance_cnt + to] = distance;
    330}
    331
    332/*
    333 * Return NUMA distance @from to @to
    334 */
    335int __node_distance(int from, int to)
    336{
    337	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
    338		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
    339	return numa_distance[from * numa_distance_cnt + to];
    340}
    341EXPORT_SYMBOL(__node_distance);
    342
    343static int __init numa_register_nodes(void)
    344{
    345	int nid;
    346	struct memblock_region *mblk;
    347
    348	/* Check that valid nid is set to memblks */
    349	for_each_mem_region(mblk) {
    350		int mblk_nid = memblock_get_region_node(mblk);
    351		phys_addr_t start = mblk->base;
    352		phys_addr_t end = mblk->base + mblk->size - 1;
    353
    354		if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
    355			pr_warn("Warning: invalid memblk node %d [mem %pap-%pap]\n",
    356				mblk_nid, &start, &end);
    357			return -EINVAL;
    358		}
    359	}
    360
    361	/* Finally register nodes. */
    362	for_each_node_mask(nid, numa_nodes_parsed) {
    363		unsigned long start_pfn, end_pfn;
    364
    365		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
    366		setup_node_data(nid, start_pfn, end_pfn);
    367		node_set_online(nid);
    368	}
    369
    370	/* Setup online nodes to actual nodes*/
    371	node_possible_map = numa_nodes_parsed;
    372
    373	return 0;
    374}
    375
    376static int __init numa_init(int (*init_func)(void))
    377{
    378	int ret;
    379
    380	nodes_clear(numa_nodes_parsed);
    381	nodes_clear(node_possible_map);
    382	nodes_clear(node_online_map);
    383
    384	ret = numa_alloc_distance();
    385	if (ret < 0)
    386		return ret;
    387
    388	ret = init_func();
    389	if (ret < 0)
    390		goto out_free_distance;
    391
    392	if (nodes_empty(numa_nodes_parsed)) {
    393		pr_info("No NUMA configuration found\n");
    394		ret = -EINVAL;
    395		goto out_free_distance;
    396	}
    397
    398	ret = numa_register_nodes();
    399	if (ret < 0)
    400		goto out_free_distance;
    401
    402	setup_node_to_cpumask_map();
    403
    404	return 0;
    405out_free_distance:
    406	numa_free_distance();
    407	return ret;
    408}
    409
    410/**
    411 * dummy_numa_init() - Fallback dummy NUMA init
    412 *
    413 * Used if there's no underlying NUMA architecture, NUMA initialization
    414 * fails, or NUMA is disabled on the command line.
    415 *
    416 * Must online at least one node (node 0) and add memory blocks that cover all
    417 * allowed memory. It is unlikely that this function fails.
    418 *
    419 * Return: 0 on success, -errno on failure.
    420 */
    421static int __init dummy_numa_init(void)
    422{
    423	phys_addr_t start = memblock_start_of_DRAM();
    424	phys_addr_t end = memblock_end_of_DRAM() - 1;
    425	int ret;
    426
    427	if (numa_off)
    428		pr_info("NUMA disabled\n"); /* Forced off on command line. */
    429	pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end);
    430
    431	ret = numa_add_memblk(0, start, end + 1);
    432	if (ret) {
    433		pr_err("NUMA init failed\n");
    434		return ret;
    435	}
    436
    437	numa_off = true;
    438	return 0;
    439}
    440
    441#ifdef CONFIG_ACPI_NUMA
    442static int __init arch_acpi_numa_init(void)
    443{
    444	int ret;
    445
    446	ret = acpi_numa_init();
    447	if (ret) {
    448		pr_info("Failed to initialise from firmware\n");
    449		return ret;
    450	}
    451
    452	return srat_disabled() ? -EINVAL : 0;
    453}
    454#else
    455static int __init arch_acpi_numa_init(void)
    456{
    457	return -EOPNOTSUPP;
    458}
    459#endif
    460
    461/**
    462 * arch_numa_init() - Initialize NUMA
    463 *
    464 * Try each configured NUMA initialization method until one succeeds. The
    465 * last fallback is dummy single node config encompassing whole memory.
    466 */
    467void __init arch_numa_init(void)
    468{
    469	if (!numa_off) {
    470		if (!acpi_disabled && !numa_init(arch_acpi_numa_init))
    471			return;
    472		if (acpi_disabled && !numa_init(of_numa_init))
    473			return;
    474	}
    475
    476	numa_init(dummy_numa_init);
    477}