cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

runtest.c (7562B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/* Copyright(c) 2022 Intel Corporation. */
      3
      4#include <linux/cpu.h>
      5#include <linux/delay.h>
      6#include <linux/fs.h>
      7#include <linux/nmi.h>
      8#include <linux/slab.h>
      9#include <linux/stop_machine.h>
     10
     11#include "ifs.h"
     12
     13/*
     14 * Note all code and data in this file is protected by
     15 * ifs_sem. On HT systems all threads on a core will
     16 * execute together, but only the first thread on the
     17 * core will update results of the test.
     18 */
     19
     20#define CREATE_TRACE_POINTS
     21#include <trace/events/intel_ifs.h>
     22
     23/* Max retries on the same chunk */
     24#define MAX_IFS_RETRIES  5
     25
     26/*
     27 * Number of TSC cycles that a logical CPU will wait for the other
     28 * logical CPU on the core in the WRMSR(ACTIVATE_SCAN).
     29 */
     30#define IFS_THREAD_WAIT 100000
     31
     32enum ifs_status_err_code {
     33	IFS_NO_ERROR				= 0,
     34	IFS_OTHER_THREAD_COULD_NOT_JOIN		= 1,
     35	IFS_INTERRUPTED_BEFORE_RENDEZVOUS	= 2,
     36	IFS_POWER_MGMT_INADEQUATE_FOR_SCAN	= 3,
     37	IFS_INVALID_CHUNK_RANGE			= 4,
     38	IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS	= 5,
     39	IFS_CORE_NOT_CAPABLE_CURRENTLY		= 6,
     40	IFS_UNASSIGNED_ERROR_CODE		= 7,
     41	IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT	= 8,
     42	IFS_INTERRUPTED_DURING_EXECUTION	= 9,
     43};
     44
     45static const char * const scan_test_status[] = {
     46	[IFS_NO_ERROR] = "SCAN no error",
     47	[IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
     48	[IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.",
     49	[IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] =
     50	"Core Abort SCAN Response due to power management condition.",
     51	[IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range",
     52	[IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
     53	[IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently",
     54	[IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7",
     55	[IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] =
     56	"Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
     57	[IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start",
     58};
     59
     60static void message_not_tested(struct device *dev, int cpu, union ifs_status status)
     61{
     62	if (status.error_code < ARRAY_SIZE(scan_test_status)) {
     63		dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n",
     64			 cpumask_pr_args(cpu_smt_mask(cpu)),
     65			 scan_test_status[status.error_code]);
     66	} else if (status.error_code == IFS_SW_TIMEOUT) {
     67		dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
     68			 cpumask_pr_args(cpu_smt_mask(cpu)));
     69	} else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
     70		dev_info(dev, "CPU(s) %*pbl: %s\n",
     71			 cpumask_pr_args(cpu_smt_mask(cpu)),
     72			 "Not all scan chunks were executed. Maximum forward progress retries exceeded");
     73	} else {
     74		dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n",
     75			 cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
     76	}
     77}
     78
     79static void message_fail(struct device *dev, int cpu, union ifs_status status)
     80{
     81	/*
     82	 * control_error is set when the microcode runs into a problem
     83	 * loading the image from the reserved BIOS memory, or it has
     84	 * been corrupted. Reloading the image may fix this issue.
     85	 */
     86	if (status.control_error) {
     87		dev_err(dev, "CPU(s) %*pbl: could not execute from loaded scan image\n",
     88			cpumask_pr_args(cpu_smt_mask(cpu)));
     89	}
     90
     91	/*
     92	 * signature_error is set when the output from the scan chains does not
     93	 * match the expected signature. This might be a transient problem (e.g.
     94	 * due to a bit flip from an alpha particle or neutron). If the problem
     95	 * repeats on a subsequent test, then it indicates an actual problem in
     96	 * the core being tested.
     97	 */
     98	if (status.signature_error) {
     99		dev_err(dev, "CPU(s) %*pbl: test signature incorrect.\n",
    100			cpumask_pr_args(cpu_smt_mask(cpu)));
    101	}
    102}
    103
    104static bool can_restart(union ifs_status status)
    105{
    106	enum ifs_status_err_code err_code = status.error_code;
    107
    108	/* Signature for chunk is bad, or scan test failed */
    109	if (status.signature_error || status.control_error)
    110		return false;
    111
    112	switch (err_code) {
    113	case IFS_NO_ERROR:
    114	case IFS_OTHER_THREAD_COULD_NOT_JOIN:
    115	case IFS_INTERRUPTED_BEFORE_RENDEZVOUS:
    116	case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN:
    117	case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
    118	case IFS_INTERRUPTED_DURING_EXECUTION:
    119		return true;
    120	case IFS_INVALID_CHUNK_RANGE:
    121	case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS:
    122	case IFS_CORE_NOT_CAPABLE_CURRENTLY:
    123	case IFS_UNASSIGNED_ERROR_CODE:
    124		break;
    125	}
    126	return false;
    127}
    128
    129/*
    130 * Execute the scan. Called "simultaneously" on all threads of a core
    131 * at high priority using the stop_cpus mechanism.
    132 */
    133static int doscan(void *data)
    134{
    135	int cpu = smp_processor_id();
    136	u64 *msrs = data;
    137	int first;
    138
    139	/* Only the first logical CPU on a core reports result */
    140	first = cpumask_first(cpu_smt_mask(cpu));
    141
    142	/*
    143	 * This WRMSR will wait for other HT threads to also write
    144	 * to this MSR (at most for activate.delay cycles). Then it
    145	 * starts scan of each requested chunk. The core scan happens
    146	 * during the "execution" of the WRMSR. This instruction can
    147	 * take up to 200 milliseconds (in the case where all chunks
    148	 * are processed in a single pass) before it retires.
    149	 */
    150	wrmsrl(MSR_ACTIVATE_SCAN, msrs[0]);
    151
    152	if (cpu == first) {
    153		/* Pass back the result of the scan */
    154		rdmsrl(MSR_SCAN_STATUS, msrs[1]);
    155	}
    156
    157	return 0;
    158}
    159
    160/*
    161 * Use stop_core_cpuslocked() to synchronize writing to MSR_ACTIVATE_SCAN
    162 * on all threads of the core to be tested. Loop if necessary to complete
    163 * run of all chunks. Include some defensive tests to make sure forward
    164 * progress is made, and that the whole test completes in a reasonable time.
    165 */
    166static void ifs_test_core(int cpu, struct device *dev)
    167{
    168	union ifs_scan activate;
    169	union ifs_status status;
    170	unsigned long timeout;
    171	struct ifs_data *ifsd;
    172	u64 msrvals[2];
    173	int retries;
    174
    175	ifsd = ifs_get_data(dev);
    176
    177	activate.rsvd = 0;
    178	activate.delay = IFS_THREAD_WAIT;
    179	activate.sigmce = 0;
    180	activate.start = 0;
    181	activate.stop = ifsd->valid_chunks - 1;
    182
    183	timeout = jiffies + HZ / 2;
    184	retries = MAX_IFS_RETRIES;
    185
    186	while (activate.start <= activate.stop) {
    187		if (time_after(jiffies, timeout)) {
    188			status.error_code = IFS_SW_TIMEOUT;
    189			break;
    190		}
    191
    192		msrvals[0] = activate.data;
    193		stop_core_cpuslocked(cpu, doscan, msrvals);
    194
    195		status.data = msrvals[1];
    196
    197		trace_ifs_status(cpu, activate, status);
    198
    199		/* Some cases can be retried, give up for others */
    200		if (!can_restart(status))
    201			break;
    202
    203		if (status.chunk_num == activate.start) {
    204			/* Check for forward progress */
    205			if (--retries == 0) {
    206				if (status.error_code == IFS_NO_ERROR)
    207					status.error_code = IFS_SW_PARTIAL_COMPLETION;
    208				break;
    209			}
    210		} else {
    211			retries = MAX_IFS_RETRIES;
    212			activate.start = status.chunk_num;
    213		}
    214	}
    215
    216	/* Update status for this core */
    217	ifsd->scan_details = status.data;
    218
    219	if (status.control_error || status.signature_error) {
    220		ifsd->status = SCAN_TEST_FAIL;
    221		message_fail(dev, cpu, status);
    222	} else if (status.error_code) {
    223		ifsd->status = SCAN_NOT_TESTED;
    224		message_not_tested(dev, cpu, status);
    225	} else {
    226		ifsd->status = SCAN_TEST_PASS;
    227	}
    228}
    229
    230/*
    231 * Initiate per core test. It wakes up work queue threads on the target cpu and
    232 * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and
    233 * wait for all sibling threads to finish the scan test.
    234 */
    235int do_core_test(int cpu, struct device *dev)
    236{
    237	int ret = 0;
    238
    239	/* Prevent CPUs from being taken offline during the scan test */
    240	cpus_read_lock();
    241
    242	if (!cpu_online(cpu)) {
    243		dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
    244		ret = -EINVAL;
    245		goto out;
    246	}
    247
    248	ifs_test_core(cpu, dev);
    249out:
    250	cpus_read_unlock();
    251	return ret;
    252}