cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cpustat_kern.c (7139B)


      1// SPDX-License-Identifier: GPL-2.0
      2
      3#include <linux/version.h>
      4#include <linux/ptrace.h>
      5#include <uapi/linux/bpf.h>
      6#include <bpf/bpf_helpers.h>
      7
      8/*
      9 * The CPU number, cstate number and pstate number are based
     10 * on 96boards Hikey with octa CA53 CPUs.
     11 *
     12 * Every CPU have three idle states for cstate:
     13 *   WFI, CPU_OFF, CLUSTER_OFF
     14 *
     15 * Every CPU have 5 operating points:
     16 *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
     17 *
     18 * This code is based on these assumption and other platforms
     19 * need to adjust these definitions.
     20 */
     21#define MAX_CPU			8
     22#define MAX_PSTATE_ENTRIES	5
     23#define MAX_CSTATE_ENTRIES	3
     24
     25static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
     26
     27/*
     28 * my_map structure is used to record cstate and pstate index and
     29 * timestamp (Idx, Ts), when new event incoming we need to update
     30 * combination for new state index and timestamp (Idx`, Ts`).
     31 *
     32 * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
     33 * interval for the previous state: Duration(Idx) = Ts` - Ts.
     34 *
     35 * Every CPU has one below array for recording state index and
     36 * timestamp, and record for cstate and pstate saperately:
     37 *
     38 * +--------------------------+
     39 * | cstate timestamp         |
     40 * +--------------------------+
     41 * | cstate index             |
     42 * +--------------------------+
     43 * | pstate timestamp         |
     44 * +--------------------------+
     45 * | pstate index             |
     46 * +--------------------------+
     47 */
     48#define MAP_OFF_CSTATE_TIME	0
     49#define MAP_OFF_CSTATE_IDX	1
     50#define MAP_OFF_PSTATE_TIME	2
     51#define MAP_OFF_PSTATE_IDX	3
     52#define MAP_OFF_NUM		4
     53
     54struct {
     55	__uint(type, BPF_MAP_TYPE_ARRAY);
     56	__type(key, u32);
     57	__type(value, u64);
     58	__uint(max_entries, MAX_CPU * MAP_OFF_NUM);
     59} my_map SEC(".maps");
     60
     61/* cstate_duration records duration time for every idle state per CPU */
     62struct {
     63	__uint(type, BPF_MAP_TYPE_ARRAY);
     64	__type(key, u32);
     65	__type(value, u64);
     66	__uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
     67} cstate_duration SEC(".maps");
     68
     69/* pstate_duration records duration time for every operating point per CPU */
     70struct {
     71	__uint(type, BPF_MAP_TYPE_ARRAY);
     72	__type(key, u32);
     73	__type(value, u64);
     74	__uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
     75} pstate_duration SEC(".maps");
     76
     77/*
     78 * The trace events for cpu_idle and cpu_frequency are taken from:
     79 * /sys/kernel/debug/tracing/events/power/cpu_idle/format
     80 * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
     81 *
     82 * These two events have same format, so define one common structure.
     83 */
     84struct cpu_args {
     85	u64 pad;
     86	u32 state;
     87	u32 cpu_id;
     88};
     89
     90/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
     91static u32 find_cpu_pstate_idx(u32 frequency)
     92{
     93	u32 i;
     94
     95	for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
     96		if (frequency == cpu_opps[i])
     97			return i;
     98	}
     99
    100	return i;
    101}
    102
    103SEC("tracepoint/power/cpu_idle")
    104int bpf_prog1(struct cpu_args *ctx)
    105{
    106	u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
    107	u32 key, cpu, pstate_idx;
    108	u64 *val;
    109
    110	if (ctx->cpu_id > MAX_CPU)
    111		return 0;
    112
    113	cpu = ctx->cpu_id;
    114
    115	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
    116	cts = bpf_map_lookup_elem(&my_map, &key);
    117	if (!cts)
    118		return 0;
    119
    120	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
    121	cstate = bpf_map_lookup_elem(&my_map, &key);
    122	if (!cstate)
    123		return 0;
    124
    125	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
    126	pts = bpf_map_lookup_elem(&my_map, &key);
    127	if (!pts)
    128		return 0;
    129
    130	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
    131	pstate = bpf_map_lookup_elem(&my_map, &key);
    132	if (!pstate)
    133		return 0;
    134
    135	prev_state = *cstate;
    136	*cstate = ctx->state;
    137
    138	if (!*cts) {
    139		*cts = bpf_ktime_get_ns();
    140		return 0;
    141	}
    142
    143	cur_ts = bpf_ktime_get_ns();
    144	delta = cur_ts - *cts;
    145	*cts = cur_ts;
    146
    147	/*
    148	 * When state doesn't equal to (u32)-1, the cpu will enter
    149	 * one idle state; for this case we need to record interval
    150	 * for the pstate.
    151	 *
    152	 *                 OPP2
    153	 *            +---------------------+
    154	 *     OPP1   |                     |
    155	 *   ---------+                     |
    156	 *                                  |  Idle state
    157	 *                                  +---------------
    158	 *
    159	 *            |<- pstate duration ->|
    160	 *            ^                     ^
    161	 *           pts                  cur_ts
    162	 */
    163	if (ctx->state != (u32)-1) {
    164
    165		/* record pstate after have first cpu_frequency event */
    166		if (!*pts)
    167			return 0;
    168
    169		delta = cur_ts - *pts;
    170
    171		pstate_idx = find_cpu_pstate_idx(*pstate);
    172		if (pstate_idx >= MAX_PSTATE_ENTRIES)
    173			return 0;
    174
    175		key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
    176		val = bpf_map_lookup_elem(&pstate_duration, &key);
    177		if (val)
    178			__sync_fetch_and_add((long *)val, delta);
    179
    180	/*
    181	 * When state equal to (u32)-1, the cpu just exits from one
    182	 * specific idle state; for this case we need to record
    183	 * interval for the pstate.
    184	 *
    185	 *       OPP2
    186	 *   -----------+
    187	 *              |                          OPP1
    188	 *              |                     +-----------
    189	 *              |     Idle state      |
    190	 *              +---------------------+
    191	 *
    192	 *              |<- cstate duration ->|
    193	 *              ^                     ^
    194	 *             cts                  cur_ts
    195	 */
    196	} else {
    197
    198		key = cpu * MAX_CSTATE_ENTRIES + prev_state;
    199		val = bpf_map_lookup_elem(&cstate_duration, &key);
    200		if (val)
    201			__sync_fetch_and_add((long *)val, delta);
    202	}
    203
    204	/* Update timestamp for pstate as new start time */
    205	if (*pts)
    206		*pts = cur_ts;
    207
    208	return 0;
    209}
    210
    211SEC("tracepoint/power/cpu_frequency")
    212int bpf_prog2(struct cpu_args *ctx)
    213{
    214	u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
    215	u32 key, cpu, pstate_idx;
    216	u64 *val;
    217
    218	cpu = ctx->cpu_id;
    219
    220	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
    221	pts = bpf_map_lookup_elem(&my_map, &key);
    222	if (!pts)
    223		return 0;
    224
    225	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
    226	pstate = bpf_map_lookup_elem(&my_map, &key);
    227	if (!pstate)
    228		return 0;
    229
    230	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
    231	cstate = bpf_map_lookup_elem(&my_map, &key);
    232	if (!cstate)
    233		return 0;
    234
    235	prev_state = *pstate;
    236	*pstate = ctx->state;
    237
    238	if (!*pts) {
    239		*pts = bpf_ktime_get_ns();
    240		return 0;
    241	}
    242
    243	cur_ts = bpf_ktime_get_ns();
    244	delta = cur_ts - *pts;
    245	*pts = cur_ts;
    246
    247	/* When CPU is in idle, bail out to skip pstate statistics */
    248	if (*cstate != (u32)(-1))
    249		return 0;
    250
    251	/*
    252	 * The cpu changes to another different OPP (in below diagram
    253	 * change frequency from OPP3 to OPP1), need recording interval
    254	 * for previous frequency OPP3 and update timestamp as start
    255	 * time for new frequency OPP1.
    256	 *
    257	 *                 OPP3
    258	 *            +---------------------+
    259	 *     OPP2   |                     |
    260	 *   ---------+                     |
    261	 *                                  |    OPP1
    262	 *                                  +---------------
    263	 *
    264	 *            |<- pstate duration ->|
    265	 *            ^                     ^
    266	 *           pts                  cur_ts
    267	 */
    268	pstate_idx = find_cpu_pstate_idx(*pstate);
    269	if (pstate_idx >= MAX_PSTATE_ENTRIES)
    270		return 0;
    271
    272	key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
    273	val = bpf_map_lookup_elem(&pstate_duration, &key);
    274	if (val)
    275		__sync_fetch_and_add((long *)val, delta);
    276
    277	return 0;
    278}
    279
    280char _license[] SEC("license") = "GPL";
    281u32 _version SEC("version") = LINUX_VERSION_CODE;