knc.c (8421B)
1// SPDX-License-Identifier: GPL-2.0 2/* Driver for Intel Xeon Phi "Knights Corner" PMU */ 3 4#include <linux/perf_event.h> 5#include <linux/types.h> 6 7#include <asm/hardirq.h> 8 9#include "../perf_event.h" 10 11static const u64 knc_perfmon_event_map[] = 12{ 13 [PERF_COUNT_HW_CPU_CYCLES] = 0x002a, 14 [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, 15 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, 16 [PERF_COUNT_HW_CACHE_MISSES] = 0x0029, 17 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, 18 [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, 19}; 20 21static const u64 __initconst knc_hw_cache_event_ids 22 [PERF_COUNT_HW_CACHE_MAX] 23 [PERF_COUNT_HW_CACHE_OP_MAX] 24 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 25{ 26 [ C(L1D) ] = { 27 [ C(OP_READ) ] = { 28 /* On Xeon Phi event "0" is a valid DATA_READ */ 29 /* (L1 Data Cache Reads) Instruction. */ 30 /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ 31 /* bit will always be set in x86_pmu_hw_config(). */ 32 [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, 33 /* DATA_READ */ 34 [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ 35 }, 36 [ C(OP_WRITE) ] = { 37 [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ 38 [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ 39 }, 40 [ C(OP_PREFETCH) ] = { 41 [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ 42 [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ 43 }, 44 }, 45 [ C(L1I ) ] = { 46 [ C(OP_READ) ] = { 47 [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ 48 [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ 49 }, 50 [ C(OP_WRITE) ] = { 51 [ C(RESULT_ACCESS) ] = -1, 52 [ C(RESULT_MISS) ] = -1, 53 }, 54 [ C(OP_PREFETCH) ] = { 55 [ C(RESULT_ACCESS) ] = 0x0, 56 [ C(RESULT_MISS) ] = 0x0, 57 }, 58 }, 59 [ C(LL ) ] = { 60 [ C(OP_READ) ] = { 61 [ C(RESULT_ACCESS) ] = 0, 62 [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ 63 }, 64 [ C(OP_WRITE) ] = { 65 [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ 66 [ C(RESULT_MISS) ] = 0, 67 }, 68 [ C(OP_PREFETCH) ] = { 69 [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ 70 [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ 71 }, 72 }, 73 [ C(DTLB) ] = { 74 [ C(OP_READ) ] = { 75 [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, 76 /* DATA_READ */ 77 /* see note on L1 OP_READ */ 78 [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ 79 }, 80 [ C(OP_WRITE) ] = { 81 [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ 82 [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ 83 }, 84 [ C(OP_PREFETCH) ] = { 85 [ C(RESULT_ACCESS) ] = 0x0, 86 [ C(RESULT_MISS) ] = 0x0, 87 }, 88 }, 89 [ C(ITLB) ] = { 90 [ C(OP_READ) ] = { 91 [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ 92 [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ 93 }, 94 [ C(OP_WRITE) ] = { 95 [ C(RESULT_ACCESS) ] = -1, 96 [ C(RESULT_MISS) ] = -1, 97 }, 98 [ C(OP_PREFETCH) ] = { 99 [ C(RESULT_ACCESS) ] = -1, 100 [ C(RESULT_MISS) ] = -1, 101 }, 102 }, 103 [ C(BPU ) ] = { 104 [ C(OP_READ) ] = { 105 [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ 106 [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ 107 }, 108 [ C(OP_WRITE) ] = { 109 [ C(RESULT_ACCESS) ] = -1, 110 [ C(RESULT_MISS) ] = -1, 111 }, 112 [ C(OP_PREFETCH) ] = { 113 [ C(RESULT_ACCESS) ] = -1, 114 [ C(RESULT_MISS) ] = -1, 115 }, 116 }, 117}; 118 119 120static u64 knc_pmu_event_map(int hw_event) 121{ 122 return knc_perfmon_event_map[hw_event]; 123} 124 125static struct event_constraint knc_event_constraints[] = 126{ 127 INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ 128 INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ 129 INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ 130 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ 131 INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ 132 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ 133 INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ 134 INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ 135 INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ 136 INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ 137 INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ 138 INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ 139 INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ 140 INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ 141 INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ 142 INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ 143 INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ 144 INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ 145 INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ 146 INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ 147 INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ 148 EVENT_CONSTRAINT_END 149}; 150 151#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d 152#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e 153#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f 154 155#define KNC_ENABLE_COUNTER0 0x00000001 156#define KNC_ENABLE_COUNTER1 0x00000002 157 158static void knc_pmu_disable_all(void) 159{ 160 u64 val; 161 162 rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); 163 val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); 164 wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); 165} 166 167static void knc_pmu_enable_all(int added) 168{ 169 u64 val; 170 171 rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); 172 val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); 173 wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); 174} 175 176static inline void 177knc_pmu_disable_event(struct perf_event *event) 178{ 179 struct hw_perf_event *hwc = &event->hw; 180 u64 val; 181 182 val = hwc->config; 183 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 184 185 (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); 186} 187 188static void knc_pmu_enable_event(struct perf_event *event) 189{ 190 struct hw_perf_event *hwc = &event->hw; 191 u64 val; 192 193 val = hwc->config; 194 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 195 196 (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); 197} 198 199static inline u64 knc_pmu_get_status(void) 200{ 201 u64 status; 202 203 rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); 204 205 return status; 206} 207 208static inline void knc_pmu_ack_status(u64 ack) 209{ 210 wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); 211} 212 213static int knc_pmu_handle_irq(struct pt_regs *regs) 214{ 215 struct perf_sample_data data; 216 struct cpu_hw_events *cpuc; 217 int handled = 0; 218 int bit, loops; 219 u64 status; 220 221 cpuc = this_cpu_ptr(&cpu_hw_events); 222 223 knc_pmu_disable_all(); 224 225 status = knc_pmu_get_status(); 226 if (!status) { 227 knc_pmu_enable_all(0); 228 return handled; 229 } 230 231 loops = 0; 232again: 233 knc_pmu_ack_status(status); 234 if (++loops > 100) { 235 WARN_ONCE(1, "perf: irq loop stuck!\n"); 236 perf_event_print_debug(); 237 goto done; 238 } 239 240 inc_irq_stat(apic_perf_irqs); 241 242 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 243 struct perf_event *event = cpuc->events[bit]; 244 245 handled++; 246 247 if (!test_bit(bit, cpuc->active_mask)) 248 continue; 249 250 if (!intel_pmu_save_and_restart(event)) 251 continue; 252 253 perf_sample_data_init(&data, 0, event->hw.last_period); 254 255 if (perf_event_overflow(event, &data, regs)) 256 x86_pmu_stop(event, 0); 257 } 258 259 /* 260 * Repeat if there is more work to be done: 261 */ 262 status = knc_pmu_get_status(); 263 if (status) 264 goto again; 265 266done: 267 /* Only restore PMU state when it's active. See x86_pmu_disable(). */ 268 if (cpuc->enabled) 269 knc_pmu_enable_all(0); 270 271 return handled; 272} 273 274 275PMU_FORMAT_ATTR(event, "config:0-7" ); 276PMU_FORMAT_ATTR(umask, "config:8-15" ); 277PMU_FORMAT_ATTR(edge, "config:18" ); 278PMU_FORMAT_ATTR(inv, "config:23" ); 279PMU_FORMAT_ATTR(cmask, "config:24-31" ); 280 281static struct attribute *intel_knc_formats_attr[] = { 282 &format_attr_event.attr, 283 &format_attr_umask.attr, 284 &format_attr_edge.attr, 285 &format_attr_inv.attr, 286 &format_attr_cmask.attr, 287 NULL, 288}; 289 290static const struct x86_pmu knc_pmu __initconst = { 291 .name = "knc", 292 .handle_irq = knc_pmu_handle_irq, 293 .disable_all = knc_pmu_disable_all, 294 .enable_all = knc_pmu_enable_all, 295 .enable = knc_pmu_enable_event, 296 .disable = knc_pmu_disable_event, 297 .hw_config = x86_pmu_hw_config, 298 .schedule_events = x86_schedule_events, 299 .eventsel = MSR_KNC_EVNTSEL0, 300 .perfctr = MSR_KNC_PERFCTR0, 301 .event_map = knc_pmu_event_map, 302 .max_events = ARRAY_SIZE(knc_perfmon_event_map), 303 .apic = 1, 304 .max_period = (1ULL << 39) - 1, 305 .version = 0, 306 .num_counters = 2, 307 .cntval_bits = 40, 308 .cntval_mask = (1ULL << 40) - 1, 309 .get_event_constraints = x86_get_event_constraints, 310 .event_constraints = knc_event_constraints, 311 .format_attrs = intel_knc_formats_attr, 312}; 313 314__init int knc_pmu_init(void) 315{ 316 x86_pmu = knc_pmu; 317 318 memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, 319 sizeof(hw_cache_event_ids)); 320 321 return 0; 322}