cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

calibrate.c (8756B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* calibrate.c: default delay calibration
      3 *
      4 * Excised from init/main.c
      5 *  Copyright (C) 1991, 1992  Linus Torvalds
      6 */
      7
      8#include <linux/jiffies.h>
      9#include <linux/delay.h>
     10#include <linux/init.h>
     11#include <linux/timex.h>
     12#include <linux/smp.h>
     13#include <linux/percpu.h>
     14
     15unsigned long lpj_fine;
     16unsigned long preset_lpj;
     17static int __init lpj_setup(char *str)
     18{
     19	preset_lpj = simple_strtoul(str,NULL,0);
     20	return 1;
     21}
     22
     23__setup("lpj=", lpj_setup);
     24
     25#ifdef ARCH_HAS_READ_CURRENT_TIMER
     26
     27/* This routine uses the read_current_timer() routine and gets the
     28 * loops per jiffy directly, instead of guessing it using delay().
     29 * Also, this code tries to handle non-maskable asynchronous events
     30 * (like SMIs)
     31 */
     32#define DELAY_CALIBRATION_TICKS			((HZ < 100) ? 1 : (HZ/100))
     33#define MAX_DIRECT_CALIBRATION_RETRIES		5
     34
     35static unsigned long calibrate_delay_direct(void)
     36{
     37	unsigned long pre_start, start, post_start;
     38	unsigned long pre_end, end, post_end;
     39	unsigned long start_jiffies;
     40	unsigned long timer_rate_min, timer_rate_max;
     41	unsigned long good_timer_sum = 0;
     42	unsigned long good_timer_count = 0;
     43	unsigned long measured_times[MAX_DIRECT_CALIBRATION_RETRIES];
     44	int max = -1; /* index of measured_times with max/min values or not set */
     45	int min = -1;
     46	int i;
     47
     48	if (read_current_timer(&pre_start) < 0 )
     49		return 0;
     50
     51	/*
     52	 * A simple loop like
     53	 *	while ( jiffies < start_jiffies+1)
     54	 *		start = read_current_timer();
     55	 * will not do. As we don't really know whether jiffy switch
     56	 * happened first or timer_value was read first. And some asynchronous
     57	 * event can happen between these two events introducing errors in lpj.
     58	 *
     59	 * So, we do
     60	 * 1. pre_start <- When we are sure that jiffy switch hasn't happened
     61	 * 2. check jiffy switch
     62	 * 3. start <- timer value before or after jiffy switch
     63	 * 4. post_start <- When we are sure that jiffy switch has happened
     64	 *
     65	 * Note, we don't know anything about order of 2 and 3.
     66	 * Now, by looking at post_start and pre_start difference, we can
     67	 * check whether any asynchronous event happened or not
     68	 */
     69
     70	for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
     71		pre_start = 0;
     72		read_current_timer(&start);
     73		start_jiffies = jiffies;
     74		while (time_before_eq(jiffies, start_jiffies + 1)) {
     75			pre_start = start;
     76			read_current_timer(&start);
     77		}
     78		read_current_timer(&post_start);
     79
     80		pre_end = 0;
     81		end = post_start;
     82		while (time_before_eq(jiffies, start_jiffies + 1 +
     83					       DELAY_CALIBRATION_TICKS)) {
     84			pre_end = end;
     85			read_current_timer(&end);
     86		}
     87		read_current_timer(&post_end);
     88
     89		timer_rate_max = (post_end - pre_start) /
     90					DELAY_CALIBRATION_TICKS;
     91		timer_rate_min = (pre_end - post_start) /
     92					DELAY_CALIBRATION_TICKS;
     93
     94		/*
     95		 * If the upper limit and lower limit of the timer_rate is
     96		 * >= 12.5% apart, redo calibration.
     97		 */
     98		if (start >= post_end)
     99			printk(KERN_NOTICE "calibrate_delay_direct() ignoring "
    100					"timer_rate as we had a TSC wrap around"
    101					" start=%lu >=post_end=%lu\n",
    102				start, post_end);
    103		if (start < post_end && pre_start != 0 && pre_end != 0 &&
    104		    (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
    105			good_timer_count++;
    106			good_timer_sum += timer_rate_max;
    107			measured_times[i] = timer_rate_max;
    108			if (max < 0 || timer_rate_max > measured_times[max])
    109				max = i;
    110			if (min < 0 || timer_rate_max < measured_times[min])
    111				min = i;
    112		} else
    113			measured_times[i] = 0;
    114
    115	}
    116
    117	/*
    118	 * Find the maximum & minimum - if they differ too much throw out the
    119	 * one with the largest difference from the mean and try again...
    120	 */
    121	while (good_timer_count > 1) {
    122		unsigned long estimate;
    123		unsigned long maxdiff;
    124
    125		/* compute the estimate */
    126		estimate = (good_timer_sum/good_timer_count);
    127		maxdiff = estimate >> 3;
    128
    129		/* if range is within 12% let's take it */
    130		if ((measured_times[max] - measured_times[min]) < maxdiff)
    131			return estimate;
    132
    133		/* ok - drop the worse value and try again... */
    134		good_timer_sum = 0;
    135		good_timer_count = 0;
    136		if ((measured_times[max] - estimate) <
    137				(estimate - measured_times[min])) {
    138			printk(KERN_NOTICE "calibrate_delay_direct() dropping "
    139					"min bogoMips estimate %d = %lu\n",
    140				min, measured_times[min]);
    141			measured_times[min] = 0;
    142			min = max;
    143		} else {
    144			printk(KERN_NOTICE "calibrate_delay_direct() dropping "
    145					"max bogoMips estimate %d = %lu\n",
    146				max, measured_times[max]);
    147			measured_times[max] = 0;
    148			max = min;
    149		}
    150
    151		for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
    152			if (measured_times[i] == 0)
    153				continue;
    154			good_timer_count++;
    155			good_timer_sum += measured_times[i];
    156			if (measured_times[i] < measured_times[min])
    157				min = i;
    158			if (measured_times[i] > measured_times[max])
    159				max = i;
    160		}
    161
    162	}
    163
    164	printk(KERN_NOTICE "calibrate_delay_direct() failed to get a good "
    165	       "estimate for loops_per_jiffy.\nProbably due to long platform "
    166		"interrupts. Consider using \"lpj=\" boot option.\n");
    167	return 0;
    168}
    169#else
    170static unsigned long calibrate_delay_direct(void)
    171{
    172	return 0;
    173}
    174#endif
    175
    176/*
    177 * This is the number of bits of precision for the loops_per_jiffy.  Each
    178 * time we refine our estimate after the first takes 1.5/HZ seconds, so try
    179 * to start with a good estimate.
    180 * For the boot cpu we can skip the delay calibration and assign it a value
    181 * calculated based on the timer frequency.
    182 * For the rest of the CPUs we cannot assume that the timer frequency is same as
    183 * the cpu frequency, hence do the calibration for those.
    184 */
    185#define LPS_PREC 8
    186
    187static unsigned long calibrate_delay_converge(void)
    188{
    189	/* First stage - slowly accelerate to find initial bounds */
    190	unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
    191	int trials = 0, band = 0, trial_in_band = 0;
    192
    193	lpj = (1<<12);
    194
    195	/* wait for "start of" clock tick */
    196	ticks = jiffies;
    197	while (ticks == jiffies)
    198		; /* nothing */
    199	/* Go .. */
    200	ticks = jiffies;
    201	do {
    202		if (++trial_in_band == (1<<band)) {
    203			++band;
    204			trial_in_band = 0;
    205		}
    206		__delay(lpj * band);
    207		trials += band;
    208	} while (ticks == jiffies);
    209	/*
    210	 * We overshot, so retreat to a clear underestimate. Then estimate
    211	 * the largest likely undershoot. This defines our chop bounds.
    212	 */
    213	trials -= band;
    214	loopadd_base = lpj * band;
    215	lpj_base = lpj * trials;
    216
    217recalibrate:
    218	lpj = lpj_base;
    219	loopadd = loopadd_base;
    220
    221	/*
    222	 * Do a binary approximation to get lpj set to
    223	 * equal one clock (up to LPS_PREC bits)
    224	 */
    225	chop_limit = lpj >> LPS_PREC;
    226	while (loopadd > chop_limit) {
    227		lpj += loopadd;
    228		ticks = jiffies;
    229		while (ticks == jiffies)
    230			; /* nothing */
    231		ticks = jiffies;
    232		__delay(lpj);
    233		if (jiffies != ticks)	/* longer than 1 tick */
    234			lpj -= loopadd;
    235		loopadd >>= 1;
    236	}
    237	/*
    238	 * If we incremented every single time possible, presume we've
    239	 * massively underestimated initially, and retry with a higher
    240	 * start, and larger range. (Only seen on x86_64, due to SMIs)
    241	 */
    242	if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
    243		lpj_base = lpj;
    244		loopadd_base <<= 2;
    245		goto recalibrate;
    246	}
    247
    248	return lpj;
    249}
    250
    251static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 };
    252
    253/*
    254 * Check if cpu calibration delay is already known. For example,
    255 * some processors with multi-core sockets may have all cores
    256 * with the same calibration delay.
    257 *
    258 * Architectures should override this function if a faster calibration
    259 * method is available.
    260 */
    261unsigned long __attribute__((weak)) calibrate_delay_is_known(void)
    262{
    263	return 0;
    264}
    265
    266/*
    267 * Indicate the cpu delay calibration is done. This can be used by
    268 * architectures to stop accepting delay timer registrations after this point.
    269 */
    270
    271void __attribute__((weak)) calibration_delay_done(void)
    272{
    273}
    274
    275void calibrate_delay(void)
    276{
    277	unsigned long lpj;
    278	static bool printed;
    279	int this_cpu = smp_processor_id();
    280
    281	if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
    282		lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
    283		if (!printed)
    284			pr_info("Calibrating delay loop (skipped) "
    285				"already calibrated this CPU");
    286	} else if (preset_lpj) {
    287		lpj = preset_lpj;
    288		if (!printed)
    289			pr_info("Calibrating delay loop (skipped) "
    290				"preset value.. ");
    291	} else if ((!printed) && lpj_fine) {
    292		lpj = lpj_fine;
    293		pr_info("Calibrating delay loop (skipped), "
    294			"value calculated using timer frequency.. ");
    295	} else if ((lpj = calibrate_delay_is_known())) {
    296		;
    297	} else if ((lpj = calibrate_delay_direct()) != 0) {
    298		if (!printed)
    299			pr_info("Calibrating delay using timer "
    300				"specific routine.. ");
    301	} else {
    302		if (!printed)
    303			pr_info("Calibrating delay loop... ");
    304		lpj = calibrate_delay_converge();
    305	}
    306	per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
    307	if (!printed)
    308		pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
    309			lpj/(500000/HZ),
    310			(lpj/(5000/HZ)) % 100, lpj);
    311
    312	loops_per_jiffy = lpj;
    313	printed = true;
    314
    315	calibration_delay_done();
    316}