cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cleanup.c (25543B)


      1/*
      2 * MTRR (Memory Type Range Register) cleanup
      3 *
      4 *  Copyright (C) 2009 Yinghai Lu
      5 *
      6 * This library is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Library General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2 of the License, or (at your option) any later version.
     10 *
     11 * This library is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Library General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Library General Public
     17 * License along with this library; if not, write to the Free
     18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     19 */
     20#include <linux/init.h>
     21#include <linux/pci.h>
     22#include <linux/smp.h>
     23#include <linux/cpu.h>
     24#include <linux/mutex.h>
     25#include <linux/uaccess.h>
     26#include <linux/kvm_para.h>
     27#include <linux/range.h>
     28
     29#include <asm/processor.h>
     30#include <asm/e820/api.h>
     31#include <asm/mtrr.h>
     32#include <asm/msr.h>
     33
     34#include "mtrr.h"
     35
     36struct var_mtrr_range_state {
     37	unsigned long	base_pfn;
     38	unsigned long	size_pfn;
     39	mtrr_type	type;
     40};
     41
     42struct var_mtrr_state {
     43	unsigned long	range_startk;
     44	unsigned long	range_sizek;
     45	unsigned long	chunk_sizek;
     46	unsigned long	gran_sizek;
     47	unsigned int	reg;
     48};
     49
     50/* Should be related to MTRR_VAR_RANGES nums */
     51#define RANGE_NUM				256
     52
     53static struct range __initdata		range[RANGE_NUM];
     54static int __initdata				nr_range;
     55
     56static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
     57
     58static int __initdata debug_print;
     59#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0)
     60
     61#define BIOS_BUG_MSG \
     62	"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
     63
     64static int __init
     65x86_get_mtrr_mem_range(struct range *range, int nr_range,
     66		       unsigned long extra_remove_base,
     67		       unsigned long extra_remove_size)
     68{
     69	unsigned long base, size;
     70	mtrr_type type;
     71	int i;
     72
     73	for (i = 0; i < num_var_ranges; i++) {
     74		type = range_state[i].type;
     75		if (type != MTRR_TYPE_WRBACK)
     76			continue;
     77		base = range_state[i].base_pfn;
     78		size = range_state[i].size_pfn;
     79		nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
     80						base, base + size);
     81	}
     82	if (debug_print) {
     83		pr_debug("After WB checking\n");
     84		for (i = 0; i < nr_range; i++)
     85			pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
     86				 range[i].start, range[i].end);
     87	}
     88
     89	/* Take out UC ranges: */
     90	for (i = 0; i < num_var_ranges; i++) {
     91		type = range_state[i].type;
     92		if (type != MTRR_TYPE_UNCACHABLE &&
     93		    type != MTRR_TYPE_WRPROT)
     94			continue;
     95		size = range_state[i].size_pfn;
     96		if (!size)
     97			continue;
     98		base = range_state[i].base_pfn;
     99		if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
    100		    (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
    101		    (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
    102			/* Var MTRR contains UC entry below 1M? Skip it: */
    103			pr_warn(BIOS_BUG_MSG, i);
    104			if (base + size <= (1<<(20-PAGE_SHIFT)))
    105				continue;
    106			size -= (1<<(20-PAGE_SHIFT)) - base;
    107			base = 1<<(20-PAGE_SHIFT);
    108		}
    109		subtract_range(range, RANGE_NUM, base, base + size);
    110	}
    111	if (extra_remove_size)
    112		subtract_range(range, RANGE_NUM, extra_remove_base,
    113				 extra_remove_base + extra_remove_size);
    114
    115	if  (debug_print) {
    116		pr_debug("After UC checking\n");
    117		for (i = 0; i < RANGE_NUM; i++) {
    118			if (!range[i].end)
    119				continue;
    120			pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
    121				 range[i].start, range[i].end);
    122		}
    123	}
    124
    125	/* sort the ranges */
    126	nr_range = clean_sort_range(range, RANGE_NUM);
    127	if  (debug_print) {
    128		pr_debug("After sorting\n");
    129		for (i = 0; i < nr_range; i++)
    130			pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
    131				 range[i].start, range[i].end);
    132	}
    133
    134	return nr_range;
    135}
    136
    137#ifdef CONFIG_MTRR_SANITIZER
    138
    139static unsigned long __init sum_ranges(struct range *range, int nr_range)
    140{
    141	unsigned long sum = 0;
    142	int i;
    143
    144	for (i = 0; i < nr_range; i++)
    145		sum += range[i].end - range[i].start;
    146
    147	return sum;
    148}
    149
    150static int enable_mtrr_cleanup __initdata =
    151	CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
    152
    153static int __init disable_mtrr_cleanup_setup(char *str)
    154{
    155	enable_mtrr_cleanup = 0;
    156	return 0;
    157}
    158early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
    159
    160static int __init enable_mtrr_cleanup_setup(char *str)
    161{
    162	enable_mtrr_cleanup = 1;
    163	return 0;
    164}
    165early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
    166
    167static int __init mtrr_cleanup_debug_setup(char *str)
    168{
    169	debug_print = 1;
    170	return 0;
    171}
    172early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
    173
    174static void __init
    175set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
    176	     unsigned char type, unsigned int address_bits)
    177{
    178	u32 base_lo, base_hi, mask_lo, mask_hi;
    179	u64 base, mask;
    180
    181	if (!sizek) {
    182		fill_mtrr_var_range(reg, 0, 0, 0, 0);
    183		return;
    184	}
    185
    186	mask = (1ULL << address_bits) - 1;
    187	mask &= ~((((u64)sizek) << 10) - 1);
    188
    189	base = ((u64)basek) << 10;
    190
    191	base |= type;
    192	mask |= 0x800;
    193
    194	base_lo = base & ((1ULL<<32) - 1);
    195	base_hi = base >> 32;
    196
    197	mask_lo = mask & ((1ULL<<32) - 1);
    198	mask_hi = mask >> 32;
    199
    200	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
    201}
    202
    203static void __init
    204save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
    205	      unsigned char type)
    206{
    207	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
    208	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
    209	range_state[reg].type = type;
    210}
    211
    212static void __init set_var_mtrr_all(unsigned int address_bits)
    213{
    214	unsigned long basek, sizek;
    215	unsigned char type;
    216	unsigned int reg;
    217
    218	for (reg = 0; reg < num_var_ranges; reg++) {
    219		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
    220		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
    221		type = range_state[reg].type;
    222
    223		set_var_mtrr(reg, basek, sizek, type, address_bits);
    224	}
    225}
    226
    227static unsigned long to_size_factor(unsigned long sizek, char *factorp)
    228{
    229	unsigned long base = sizek;
    230	char factor;
    231
    232	if (base & ((1<<10) - 1)) {
    233		/* Not MB-aligned: */
    234		factor = 'K';
    235	} else if (base & ((1<<20) - 1)) {
    236		factor = 'M';
    237		base >>= 10;
    238	} else {
    239		factor = 'G';
    240		base >>= 20;
    241	}
    242
    243	*factorp = factor;
    244
    245	return base;
    246}
    247
    248static unsigned int __init
    249range_to_mtrr(unsigned int reg, unsigned long range_startk,
    250	      unsigned long range_sizek, unsigned char type)
    251{
    252	if (!range_sizek || (reg >= num_var_ranges))
    253		return reg;
    254
    255	while (range_sizek) {
    256		unsigned long max_align, align;
    257		unsigned long sizek;
    258
    259		/* Compute the maximum size with which we can make a range: */
    260		if (range_startk)
    261			max_align = __ffs(range_startk);
    262		else
    263			max_align = BITS_PER_LONG - 1;
    264
    265		align = __fls(range_sizek);
    266		if (align > max_align)
    267			align = max_align;
    268
    269		sizek = 1UL << align;
    270		if (debug_print) {
    271			char start_factor = 'K', size_factor = 'K';
    272			unsigned long start_base, size_base;
    273
    274			start_base = to_size_factor(range_startk, &start_factor);
    275			size_base = to_size_factor(sizek, &size_factor);
    276
    277			Dprintk("Setting variable MTRR %d, "
    278				"base: %ld%cB, range: %ld%cB, type %s\n",
    279				reg, start_base, start_factor,
    280				size_base, size_factor,
    281				(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
    282				   ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
    283				);
    284		}
    285		save_var_mtrr(reg++, range_startk, sizek, type);
    286		range_startk += sizek;
    287		range_sizek -= sizek;
    288		if (reg >= num_var_ranges)
    289			break;
    290	}
    291	return reg;
    292}
    293
    294static unsigned __init
    295range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
    296			unsigned long sizek)
    297{
    298	unsigned long hole_basek, hole_sizek;
    299	unsigned long second_sizek;
    300	unsigned long range0_basek, range0_sizek;
    301	unsigned long range_basek, range_sizek;
    302	unsigned long chunk_sizek;
    303	unsigned long gran_sizek;
    304
    305	hole_basek = 0;
    306	hole_sizek = 0;
    307	second_sizek = 0;
    308	chunk_sizek = state->chunk_sizek;
    309	gran_sizek = state->gran_sizek;
    310
    311	/* Align with gran size, prevent small block used up MTRRs: */
    312	range_basek = ALIGN(state->range_startk, gran_sizek);
    313	if ((range_basek > basek) && basek)
    314		return second_sizek;
    315
    316	state->range_sizek -= (range_basek - state->range_startk);
    317	range_sizek = ALIGN(state->range_sizek, gran_sizek);
    318
    319	while (range_sizek > state->range_sizek) {
    320		range_sizek -= gran_sizek;
    321		if (!range_sizek)
    322			return 0;
    323	}
    324	state->range_sizek = range_sizek;
    325
    326	/* Try to append some small hole: */
    327	range0_basek = state->range_startk;
    328	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
    329
    330	/* No increase: */
    331	if (range0_sizek == state->range_sizek) {
    332		Dprintk("rangeX: %016lx - %016lx\n",
    333			range0_basek<<10,
    334			(range0_basek + state->range_sizek)<<10);
    335		state->reg = range_to_mtrr(state->reg, range0_basek,
    336				state->range_sizek, MTRR_TYPE_WRBACK);
    337		return 0;
    338	}
    339
    340	/* Only cut back when it is not the last: */
    341	if (sizek) {
    342		while (range0_basek + range0_sizek > (basek + sizek)) {
    343			if (range0_sizek >= chunk_sizek)
    344				range0_sizek -= chunk_sizek;
    345			else
    346				range0_sizek = 0;
    347
    348			if (!range0_sizek)
    349				break;
    350		}
    351	}
    352
    353second_try:
    354	range_basek = range0_basek + range0_sizek;
    355
    356	/* One hole in the middle: */
    357	if (range_basek > basek && range_basek <= (basek + sizek))
    358		second_sizek = range_basek - basek;
    359
    360	if (range0_sizek > state->range_sizek) {
    361
    362		/* One hole in middle or at the end: */
    363		hole_sizek = range0_sizek - state->range_sizek - second_sizek;
    364
    365		/* Hole size should be less than half of range0 size: */
    366		if (hole_sizek >= (range0_sizek >> 1) &&
    367		    range0_sizek >= chunk_sizek) {
    368			range0_sizek -= chunk_sizek;
    369			second_sizek = 0;
    370			hole_sizek = 0;
    371
    372			goto second_try;
    373		}
    374	}
    375
    376	if (range0_sizek) {
    377		Dprintk("range0: %016lx - %016lx\n",
    378			range0_basek<<10,
    379			(range0_basek + range0_sizek)<<10);
    380		state->reg = range_to_mtrr(state->reg, range0_basek,
    381				range0_sizek, MTRR_TYPE_WRBACK);
    382	}
    383
    384	if (range0_sizek < state->range_sizek) {
    385		/* Need to handle left over range: */
    386		range_sizek = state->range_sizek - range0_sizek;
    387
    388		Dprintk("range: %016lx - %016lx\n",
    389			 range_basek<<10,
    390			 (range_basek + range_sizek)<<10);
    391
    392		state->reg = range_to_mtrr(state->reg, range_basek,
    393				 range_sizek, MTRR_TYPE_WRBACK);
    394	}
    395
    396	if (hole_sizek) {
    397		hole_basek = range_basek - hole_sizek - second_sizek;
    398		Dprintk("hole: %016lx - %016lx\n",
    399			 hole_basek<<10,
    400			 (hole_basek + hole_sizek)<<10);
    401		state->reg = range_to_mtrr(state->reg, hole_basek,
    402				 hole_sizek, MTRR_TYPE_UNCACHABLE);
    403	}
    404
    405	return second_sizek;
    406}
    407
    408static void __init
    409set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
    410		   unsigned long size_pfn)
    411{
    412	unsigned long basek, sizek;
    413	unsigned long second_sizek = 0;
    414
    415	if (state->reg >= num_var_ranges)
    416		return;
    417
    418	basek = base_pfn << (PAGE_SHIFT - 10);
    419	sizek = size_pfn << (PAGE_SHIFT - 10);
    420
    421	/* See if I can merge with the last range: */
    422	if ((basek <= 1024) ||
    423	    (state->range_startk + state->range_sizek == basek)) {
    424		unsigned long endk = basek + sizek;
    425		state->range_sizek = endk - state->range_startk;
    426		return;
    427	}
    428	/* Write the range mtrrs: */
    429	if (state->range_sizek != 0)
    430		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
    431
    432	/* Allocate an msr: */
    433	state->range_startk = basek + second_sizek;
    434	state->range_sizek  = sizek - second_sizek;
    435}
    436
    437/* Minimum size of mtrr block that can take hole: */
    438static u64 mtrr_chunk_size __initdata = (256ULL<<20);
    439
    440static int __init parse_mtrr_chunk_size_opt(char *p)
    441{
    442	if (!p)
    443		return -EINVAL;
    444	mtrr_chunk_size = memparse(p, &p);
    445	return 0;
    446}
    447early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
    448
    449/* Granularity of mtrr of block: */
    450static u64 mtrr_gran_size __initdata;
    451
    452static int __init parse_mtrr_gran_size_opt(char *p)
    453{
    454	if (!p)
    455		return -EINVAL;
    456	mtrr_gran_size = memparse(p, &p);
    457	return 0;
    458}
    459early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
    460
    461static unsigned long nr_mtrr_spare_reg __initdata =
    462				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
    463
    464static int __init parse_mtrr_spare_reg(char *arg)
    465{
    466	if (arg)
    467		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
    468	return 0;
    469}
    470early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
    471
    472static int __init
    473x86_setup_var_mtrrs(struct range *range, int nr_range,
    474		    u64 chunk_size, u64 gran_size)
    475{
    476	struct var_mtrr_state var_state;
    477	int num_reg;
    478	int i;
    479
    480	var_state.range_startk	= 0;
    481	var_state.range_sizek	= 0;
    482	var_state.reg		= 0;
    483	var_state.chunk_sizek	= chunk_size >> 10;
    484	var_state.gran_sizek	= gran_size >> 10;
    485
    486	memset(range_state, 0, sizeof(range_state));
    487
    488	/* Write the range: */
    489	for (i = 0; i < nr_range; i++) {
    490		set_var_mtrr_range(&var_state, range[i].start,
    491				   range[i].end - range[i].start);
    492	}
    493
    494	/* Write the last range: */
    495	if (var_state.range_sizek != 0)
    496		range_to_mtrr_with_hole(&var_state, 0, 0);
    497
    498	num_reg = var_state.reg;
    499	/* Clear out the extra MTRR's: */
    500	while (var_state.reg < num_var_ranges) {
    501		save_var_mtrr(var_state.reg, 0, 0, 0);
    502		var_state.reg++;
    503	}
    504
    505	return num_reg;
    506}
    507
    508struct mtrr_cleanup_result {
    509	unsigned long	gran_sizek;
    510	unsigned long	chunk_sizek;
    511	unsigned long	lose_cover_sizek;
    512	unsigned int	num_reg;
    513	int		bad;
    514};
    515
    516/*
    517 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
    518 * chunk size: gran_size, ..., 2G
    519 * so we need (1+16)*8
    520 */
    521#define NUM_RESULT	136
    522#define PSHIFT		(PAGE_SHIFT - 10)
    523
    524static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
    525static unsigned long __initdata min_loss_pfn[RANGE_NUM];
    526
    527static void __init print_out_mtrr_range_state(void)
    528{
    529	char start_factor = 'K', size_factor = 'K';
    530	unsigned long start_base, size_base;
    531	mtrr_type type;
    532	int i;
    533
    534	for (i = 0; i < num_var_ranges; i++) {
    535
    536		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
    537		if (!size_base)
    538			continue;
    539
    540		size_base = to_size_factor(size_base, &size_factor);
    541		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
    542		start_base = to_size_factor(start_base, &start_factor);
    543		type = range_state[i].type;
    544
    545		pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
    546			i, start_base, start_factor,
    547			size_base, size_factor,
    548			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
    549			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
    550			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
    551			);
    552	}
    553}
    554
    555static int __init mtrr_need_cleanup(void)
    556{
    557	int i;
    558	mtrr_type type;
    559	unsigned long size;
    560	/* Extra one for all 0: */
    561	int num[MTRR_NUM_TYPES + 1];
    562
    563	/* Check entries number: */
    564	memset(num, 0, sizeof(num));
    565	for (i = 0; i < num_var_ranges; i++) {
    566		type = range_state[i].type;
    567		size = range_state[i].size_pfn;
    568		if (type >= MTRR_NUM_TYPES)
    569			continue;
    570		if (!size)
    571			type = MTRR_NUM_TYPES;
    572		num[type]++;
    573	}
    574
    575	/* Check if we got UC entries: */
    576	if (!num[MTRR_TYPE_UNCACHABLE])
    577		return 0;
    578
    579	/* Check if we only had WB and UC */
    580	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
    581	    num_var_ranges - num[MTRR_NUM_TYPES])
    582		return 0;
    583
    584	return 1;
    585}
    586
    587static unsigned long __initdata range_sums;
    588
    589static void __init
    590mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
    591		      unsigned long x_remove_base,
    592		      unsigned long x_remove_size, int i)
    593{
    594	/*
    595	 * range_new should really be an automatic variable, but
    596	 * putting 4096 bytes on the stack is frowned upon, to put it
    597	 * mildly. It is safe to make it a static __initdata variable,
    598	 * since mtrr_calc_range_state is only called during init and
    599	 * there's no way it will call itself recursively.
    600	 */
    601	static struct range range_new[RANGE_NUM] __initdata;
    602	unsigned long range_sums_new;
    603	int nr_range_new;
    604	int num_reg;
    605
    606	/* Convert ranges to var ranges state: */
    607	num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
    608
    609	/* We got new setting in range_state, check it: */
    610	memset(range_new, 0, sizeof(range_new));
    611	nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
    612				x_remove_base, x_remove_size);
    613	range_sums_new = sum_ranges(range_new, nr_range_new);
    614
    615	result[i].chunk_sizek = chunk_size >> 10;
    616	result[i].gran_sizek = gran_size >> 10;
    617	result[i].num_reg = num_reg;
    618
    619	if (range_sums < range_sums_new) {
    620		result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT;
    621		result[i].bad = 1;
    622	} else {
    623		result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT;
    624	}
    625
    626	/* Double check it: */
    627	if (!result[i].bad && !result[i].lose_cover_sizek) {
    628		if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range)))
    629			result[i].bad = 1;
    630	}
    631
    632	if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg]))
    633		min_loss_pfn[num_reg] = range_sums - range_sums_new;
    634}
    635
    636static void __init mtrr_print_out_one_result(int i)
    637{
    638	unsigned long gran_base, chunk_base, lose_base;
    639	char gran_factor, chunk_factor, lose_factor;
    640
    641	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor);
    642	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor);
    643	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor);
    644
    645	pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
    646		result[i].bad ? "*BAD*" : " ",
    647		gran_base, gran_factor, chunk_base, chunk_factor);
    648	pr_cont("num_reg: %d  \tlose cover RAM: %s%ld%c\n",
    649		result[i].num_reg, result[i].bad ? "-" : "",
    650		lose_base, lose_factor);
    651}
    652
    653static int __init mtrr_search_optimal_index(void)
    654{
    655	int num_reg_good;
    656	int index_good;
    657	int i;
    658
    659	if (nr_mtrr_spare_reg >= num_var_ranges)
    660		nr_mtrr_spare_reg = num_var_ranges - 1;
    661
    662	num_reg_good = -1;
    663	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
    664		if (!min_loss_pfn[i])
    665			num_reg_good = i;
    666	}
    667
    668	index_good = -1;
    669	if (num_reg_good != -1) {
    670		for (i = 0; i < NUM_RESULT; i++) {
    671			if (!result[i].bad &&
    672			    result[i].num_reg == num_reg_good &&
    673			    !result[i].lose_cover_sizek) {
    674				index_good = i;
    675				break;
    676			}
    677		}
    678	}
    679
    680	return index_good;
    681}
    682
    683int __init mtrr_cleanup(unsigned address_bits)
    684{
    685	unsigned long x_remove_base, x_remove_size;
    686	unsigned long base, size, def, dummy;
    687	u64 chunk_size, gran_size;
    688	mtrr_type type;
    689	int index_good;
    690	int i;
    691
    692	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
    693		return 0;
    694
    695	rdmsr(MSR_MTRRdefType, def, dummy);
    696	def &= 0xff;
    697	if (def != MTRR_TYPE_UNCACHABLE)
    698		return 0;
    699
    700	/* Get it and store it aside: */
    701	memset(range_state, 0, sizeof(range_state));
    702	for (i = 0; i < num_var_ranges; i++) {
    703		mtrr_if->get(i, &base, &size, &type);
    704		range_state[i].base_pfn = base;
    705		range_state[i].size_pfn = size;
    706		range_state[i].type = type;
    707	}
    708
    709	/* Check if we need handle it and can handle it: */
    710	if (!mtrr_need_cleanup())
    711		return 0;
    712
    713	/* Print original var MTRRs at first, for debugging: */
    714	pr_debug("original variable MTRRs\n");
    715	print_out_mtrr_range_state();
    716
    717	memset(range, 0, sizeof(range));
    718	x_remove_size = 0;
    719	x_remove_base = 1 << (32 - PAGE_SHIFT);
    720	if (mtrr_tom2)
    721		x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
    722
    723	/*
    724	 * [0, 1M) should always be covered by var mtrr with WB
    725	 * and fixed mtrrs should take effect before var mtrr for it:
    726	 */
    727	nr_range = add_range_with_merge(range, RANGE_NUM, 0, 0,
    728					1ULL<<(20 - PAGE_SHIFT));
    729	/* add from var mtrr at last */
    730	nr_range = x86_get_mtrr_mem_range(range, nr_range,
    731					  x_remove_base, x_remove_size);
    732
    733	range_sums = sum_ranges(range, nr_range);
    734	pr_info("total RAM covered: %ldM\n",
    735	       range_sums >> (20 - PAGE_SHIFT));
    736
    737	if (mtrr_chunk_size && mtrr_gran_size) {
    738		i = 0;
    739		mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
    740				      x_remove_base, x_remove_size, i);
    741
    742		mtrr_print_out_one_result(i);
    743
    744		if (!result[i].bad) {
    745			set_var_mtrr_all(address_bits);
    746			pr_debug("New variable MTRRs\n");
    747			print_out_mtrr_range_state();
    748			return 1;
    749		}
    750		pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
    751	}
    752
    753	i = 0;
    754	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
    755	memset(result, 0, sizeof(result));
    756	for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
    757
    758		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
    759		     chunk_size <<= 1) {
    760
    761			if (i >= NUM_RESULT)
    762				continue;
    763
    764			mtrr_calc_range_state(chunk_size, gran_size,
    765				      x_remove_base, x_remove_size, i);
    766			if (debug_print) {
    767				mtrr_print_out_one_result(i);
    768				pr_info("\n");
    769			}
    770
    771			i++;
    772		}
    773	}
    774
    775	/* Try to find the optimal index: */
    776	index_good = mtrr_search_optimal_index();
    777
    778	if (index_good != -1) {
    779		pr_info("Found optimal setting for mtrr clean up\n");
    780		i = index_good;
    781		mtrr_print_out_one_result(i);
    782
    783		/* Convert ranges to var ranges state: */
    784		chunk_size = result[i].chunk_sizek;
    785		chunk_size <<= 10;
    786		gran_size = result[i].gran_sizek;
    787		gran_size <<= 10;
    788		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
    789		set_var_mtrr_all(address_bits);
    790		pr_debug("New variable MTRRs\n");
    791		print_out_mtrr_range_state();
    792		return 1;
    793	} else {
    794		/* print out all */
    795		for (i = 0; i < NUM_RESULT; i++)
    796			mtrr_print_out_one_result(i);
    797	}
    798
    799	pr_info("mtrr_cleanup: can not find optimal value\n");
    800	pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n");
    801
    802	return 0;
    803}
    804#else
    805int __init mtrr_cleanup(unsigned address_bits)
    806{
    807	return 0;
    808}
    809#endif
    810
    811static int disable_mtrr_trim;
    812
    813static int __init disable_mtrr_trim_setup(char *str)
    814{
    815	disable_mtrr_trim = 1;
    816	return 0;
    817}
    818early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
    819
    820/*
    821 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
    822 * for memory >4GB. Check for that here.
    823 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
    824 * apply to are wrong, but so far we don't know of any such case in the wild.
    825 */
    826#define Tom2Enabled		(1U << 21)
    827#define Tom2ForceMemTypeWB	(1U << 22)
    828
    829int __init amd_special_default_mtrr(void)
    830{
    831	u32 l, h;
    832
    833	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
    834	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
    835		return 0;
    836	if (boot_cpu_data.x86 < 0xf)
    837		return 0;
    838	/* In case some hypervisor doesn't pass SYSCFG through: */
    839	if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0)
    840		return 0;
    841	/*
    842	 * Memory between 4GB and top of mem is forced WB by this magic bit.
    843	 * Reserved before K8RevF, but should be zero there.
    844	 */
    845	if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
    846		 (Tom2Enabled | Tom2ForceMemTypeWB))
    847		return 1;
    848	return 0;
    849}
    850
    851static u64 __init
    852real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
    853{
    854	u64 trim_start, trim_size;
    855
    856	trim_start = start_pfn;
    857	trim_start <<= PAGE_SHIFT;
    858
    859	trim_size = limit_pfn;
    860	trim_size <<= PAGE_SHIFT;
    861	trim_size -= trim_start;
    862
    863	return e820__range_update(trim_start, trim_size, E820_TYPE_RAM, E820_TYPE_RESERVED);
    864}
    865
    866/**
    867 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
    868 * @end_pfn: ending page frame number
    869 *
    870 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
    871 * memory configurations.  This routine checks that the highest MTRR matches
    872 * the end of memory, to make sure the MTRRs having a write back type cover
    873 * all of the memory the kernel is intending to use.  If not, it'll trim any
    874 * memory off the end by adjusting end_pfn, removing it from the kernel's
    875 * allocation pools, warning the user with an obnoxious message.
    876 */
    877int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
    878{
    879	unsigned long i, base, size, highest_pfn = 0, def, dummy;
    880	mtrr_type type;
    881	u64 total_trim_size;
    882	/* extra one for all 0 */
    883	int num[MTRR_NUM_TYPES + 1];
    884
    885	/*
    886	 * Make sure we only trim uncachable memory on machines that
    887	 * support the Intel MTRR architecture:
    888	 */
    889	if (!is_cpu(INTEL) || disable_mtrr_trim)
    890		return 0;
    891
    892	rdmsr(MSR_MTRRdefType, def, dummy);
    893	def &= 0xff;
    894	if (def != MTRR_TYPE_UNCACHABLE)
    895		return 0;
    896
    897	/* Get it and store it aside: */
    898	memset(range_state, 0, sizeof(range_state));
    899	for (i = 0; i < num_var_ranges; i++) {
    900		mtrr_if->get(i, &base, &size, &type);
    901		range_state[i].base_pfn = base;
    902		range_state[i].size_pfn = size;
    903		range_state[i].type = type;
    904	}
    905
    906	/* Find highest cached pfn: */
    907	for (i = 0; i < num_var_ranges; i++) {
    908		type = range_state[i].type;
    909		if (type != MTRR_TYPE_WRBACK)
    910			continue;
    911		base = range_state[i].base_pfn;
    912		size = range_state[i].size_pfn;
    913		if (highest_pfn < base + size)
    914			highest_pfn = base + size;
    915	}
    916
    917	/* kvm/qemu doesn't have mtrr set right, don't trim them all: */
    918	if (!highest_pfn) {
    919		pr_info("CPU MTRRs all blank - virtualized system.\n");
    920		return 0;
    921	}
    922
    923	/* Check entries number: */
    924	memset(num, 0, sizeof(num));
    925	for (i = 0; i < num_var_ranges; i++) {
    926		type = range_state[i].type;
    927		if (type >= MTRR_NUM_TYPES)
    928			continue;
    929		size = range_state[i].size_pfn;
    930		if (!size)
    931			type = MTRR_NUM_TYPES;
    932		num[type]++;
    933	}
    934
    935	/* No entry for WB? */
    936	if (!num[MTRR_TYPE_WRBACK])
    937		return 0;
    938
    939	/* Check if we only had WB and UC: */
    940	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
    941		num_var_ranges - num[MTRR_NUM_TYPES])
    942		return 0;
    943
    944	memset(range, 0, sizeof(range));
    945	nr_range = 0;
    946	if (mtrr_tom2) {
    947		range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
    948		range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT;
    949		if (highest_pfn < range[nr_range].end)
    950			highest_pfn = range[nr_range].end;
    951		nr_range++;
    952	}
    953	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
    954
    955	/* Check the head: */
    956	total_trim_size = 0;
    957	if (range[0].start)
    958		total_trim_size += real_trim_memory(0, range[0].start);
    959
    960	/* Check the holes: */
    961	for (i = 0; i < nr_range - 1; i++) {
    962		if (range[i].end < range[i+1].start)
    963			total_trim_size += real_trim_memory(range[i].end,
    964							    range[i+1].start);
    965	}
    966
    967	/* Check the top: */
    968	i = nr_range - 1;
    969	if (range[i].end < end_pfn)
    970		total_trim_size += real_trim_memory(range[i].end,
    971							 end_pfn);
    972
    973	if (total_trim_size) {
    974		pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n",
    975			total_trim_size >> 20);
    976
    977		if (!changed_by_mtrr_cleanup)
    978			WARN_ON(1);
    979
    980		pr_info("update e820 for mtrr\n");
    981		e820__update_table_print();
    982
    983		return 1;
    984	}
    985
    986	return 0;
    987}