cache-l2x0-pmu.c (12906B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * L220/L310 cache controller support 4 * 5 * Copyright (C) 2016 ARM Limited 6 */ 7#include <linux/errno.h> 8#include <linux/hrtimer.h> 9#include <linux/io.h> 10#include <linux/list.h> 11#include <linux/perf_event.h> 12#include <linux/printk.h> 13#include <linux/slab.h> 14#include <linux/types.h> 15 16#include <asm/hardware/cache-l2x0.h> 17 18#define PMU_NR_COUNTERS 2 19 20static void __iomem *l2x0_base; 21static struct pmu *l2x0_pmu; 22static cpumask_t pmu_cpu; 23 24static const char *l2x0_name; 25 26static ktime_t l2x0_pmu_poll_period; 27static struct hrtimer l2x0_pmu_hrtimer; 28 29/* 30 * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0. 31 * Registers controlling these are laid out in pairs, in descending order, i.e. 32 * the register for Counter1 comes first, followed by the register for 33 * Counter0. 34 * We ensure that idx 0 -> Counter0, and idx1 -> Counter1. 35 */ 36static struct perf_event *events[PMU_NR_COUNTERS]; 37 38/* Find an unused counter */ 39static int l2x0_pmu_find_idx(void) 40{ 41 int i; 42 43 for (i = 0; i < PMU_NR_COUNTERS; i++) { 44 if (!events[i]) 45 return i; 46 } 47 48 return -1; 49} 50 51/* How many counters are allocated? */ 52static int l2x0_pmu_num_active_counters(void) 53{ 54 int i, cnt = 0; 55 56 for (i = 0; i < PMU_NR_COUNTERS; i++) { 57 if (events[i]) 58 cnt++; 59 } 60 61 return cnt; 62} 63 64static void l2x0_pmu_counter_config_write(int idx, u32 val) 65{ 66 writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx); 67} 68 69static u32 l2x0_pmu_counter_read(int idx) 70{ 71 return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); 72} 73 74static void l2x0_pmu_counter_write(int idx, u32 val) 75{ 76 writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); 77} 78 79static void __l2x0_pmu_enable(void) 80{ 81 u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); 82 val |= L2X0_EVENT_CNT_CTRL_ENABLE; 83 writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); 84} 85 86static void __l2x0_pmu_disable(void) 87{ 88 u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); 89 val &= ~L2X0_EVENT_CNT_CTRL_ENABLE; 90 writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); 91} 92 93static void l2x0_pmu_enable(struct pmu *pmu) 94{ 95 if (l2x0_pmu_num_active_counters() == 0) 96 return; 97 98 __l2x0_pmu_enable(); 99} 100 101static void l2x0_pmu_disable(struct pmu *pmu) 102{ 103 if (l2x0_pmu_num_active_counters() == 0) 104 return; 105 106 __l2x0_pmu_disable(); 107} 108 109static void warn_if_saturated(u32 count) 110{ 111 if (count != 0xffffffff) 112 return; 113 114 pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n"); 115} 116 117static void l2x0_pmu_event_read(struct perf_event *event) 118{ 119 struct hw_perf_event *hw = &event->hw; 120 u64 prev_count, new_count, mask; 121 122 do { 123 prev_count = local64_read(&hw->prev_count); 124 new_count = l2x0_pmu_counter_read(hw->idx); 125 } while (local64_xchg(&hw->prev_count, new_count) != prev_count); 126 127 mask = GENMASK_ULL(31, 0); 128 local64_add((new_count - prev_count) & mask, &event->count); 129 130 warn_if_saturated(new_count); 131} 132 133static void l2x0_pmu_event_configure(struct perf_event *event) 134{ 135 struct hw_perf_event *hw = &event->hw; 136 137 /* 138 * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we 139 * will *always* lose some number of events when a counter saturates, 140 * and have no way of detecting how many were lost. 141 * 142 * To minimize the impact of this, we try to maximize the period by 143 * always starting counters at zero. To ensure that group ratios are 144 * representative, we poll periodically to avoid counters saturating. 145 * See l2x0_pmu_poll(). 146 */ 147 local64_set(&hw->prev_count, 0); 148 l2x0_pmu_counter_write(hw->idx, 0); 149} 150 151static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer) 152{ 153 unsigned long flags; 154 int i; 155 156 local_irq_save(flags); 157 __l2x0_pmu_disable(); 158 159 for (i = 0; i < PMU_NR_COUNTERS; i++) { 160 struct perf_event *event = events[i]; 161 162 if (!event) 163 continue; 164 165 l2x0_pmu_event_read(event); 166 l2x0_pmu_event_configure(event); 167 } 168 169 __l2x0_pmu_enable(); 170 local_irq_restore(flags); 171 172 hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period); 173 return HRTIMER_RESTART; 174} 175 176 177static void __l2x0_pmu_event_enable(int idx, u32 event) 178{ 179 u32 val; 180 181 val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT; 182 val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; 183 l2x0_pmu_counter_config_write(idx, val); 184} 185 186static void l2x0_pmu_event_start(struct perf_event *event, int flags) 187{ 188 struct hw_perf_event *hw = &event->hw; 189 190 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 191 return; 192 193 if (flags & PERF_EF_RELOAD) { 194 WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE)); 195 l2x0_pmu_event_configure(event); 196 } 197 198 hw->state = 0; 199 200 __l2x0_pmu_event_enable(hw->idx, hw->config_base); 201} 202 203static void __l2x0_pmu_event_disable(int idx) 204{ 205 u32 val; 206 207 val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT; 208 val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; 209 l2x0_pmu_counter_config_write(idx, val); 210} 211 212static void l2x0_pmu_event_stop(struct perf_event *event, int flags) 213{ 214 struct hw_perf_event *hw = &event->hw; 215 216 if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED)) 217 return; 218 219 __l2x0_pmu_event_disable(hw->idx); 220 221 hw->state |= PERF_HES_STOPPED; 222 223 if (flags & PERF_EF_UPDATE) { 224 l2x0_pmu_event_read(event); 225 hw->state |= PERF_HES_UPTODATE; 226 } 227} 228 229static int l2x0_pmu_event_add(struct perf_event *event, int flags) 230{ 231 struct hw_perf_event *hw = &event->hw; 232 int idx = l2x0_pmu_find_idx(); 233 234 if (idx == -1) 235 return -EAGAIN; 236 237 /* 238 * Pin the timer, so that the overflows are handled by the chosen 239 * event->cpu (this is the same one as presented in "cpumask" 240 * attribute). 241 */ 242 if (l2x0_pmu_num_active_counters() == 0) 243 hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period, 244 HRTIMER_MODE_REL_PINNED); 245 246 events[idx] = event; 247 hw->idx = idx; 248 249 l2x0_pmu_event_configure(event); 250 251 hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 252 253 if (flags & PERF_EF_START) 254 l2x0_pmu_event_start(event, 0); 255 256 return 0; 257} 258 259static void l2x0_pmu_event_del(struct perf_event *event, int flags) 260{ 261 struct hw_perf_event *hw = &event->hw; 262 263 l2x0_pmu_event_stop(event, PERF_EF_UPDATE); 264 265 events[hw->idx] = NULL; 266 hw->idx = -1; 267 268 if (l2x0_pmu_num_active_counters() == 0) 269 hrtimer_cancel(&l2x0_pmu_hrtimer); 270} 271 272static bool l2x0_pmu_group_is_valid(struct perf_event *event) 273{ 274 struct pmu *pmu = event->pmu; 275 struct perf_event *leader = event->group_leader; 276 struct perf_event *sibling; 277 int num_hw = 0; 278 279 if (leader->pmu == pmu) 280 num_hw++; 281 else if (!is_software_event(leader)) 282 return false; 283 284 for_each_sibling_event(sibling, leader) { 285 if (sibling->pmu == pmu) 286 num_hw++; 287 else if (!is_software_event(sibling)) 288 return false; 289 } 290 291 return num_hw <= PMU_NR_COUNTERS; 292} 293 294static int l2x0_pmu_event_init(struct perf_event *event) 295{ 296 struct hw_perf_event *hw = &event->hw; 297 298 if (event->attr.type != l2x0_pmu->type) 299 return -ENOENT; 300 301 if (is_sampling_event(event) || 302 event->attach_state & PERF_ATTACH_TASK) 303 return -EINVAL; 304 305 if (event->cpu < 0) 306 return -EINVAL; 307 308 if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK) 309 return -EINVAL; 310 311 hw->config_base = event->attr.config; 312 313 if (!l2x0_pmu_group_is_valid(event)) 314 return -EINVAL; 315 316 event->cpu = cpumask_first(&pmu_cpu); 317 318 return 0; 319} 320 321struct l2x0_event_attribute { 322 struct device_attribute attr; 323 unsigned int config; 324 bool pl310_only; 325}; 326 327#define L2X0_EVENT_ATTR(_name, _config, _pl310_only) \ 328 (&((struct l2x0_event_attribute[]) {{ \ 329 .attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL), \ 330 .config = _config, \ 331 .pl310_only = _pl310_only, \ 332 }})[0].attr.attr) 333 334#define L220_PLUS_EVENT_ATTR(_name, _config) \ 335 L2X0_EVENT_ATTR(_name, _config, false) 336 337#define PL310_EVENT_ATTR(_name, _config) \ 338 L2X0_EVENT_ATTR(_name, _config, true) 339 340static ssize_t l2x0_pmu_event_show(struct device *dev, 341 struct device_attribute *attr, char *buf) 342{ 343 struct l2x0_event_attribute *lattr; 344 345 lattr = container_of(attr, typeof(*lattr), attr); 346 return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config); 347} 348 349static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj, 350 struct attribute *attr, 351 int unused) 352{ 353 struct device *dev = kobj_to_dev(kobj); 354 struct pmu *pmu = dev_get_drvdata(dev); 355 struct l2x0_event_attribute *lattr; 356 357 lattr = container_of(attr, typeof(*lattr), attr.attr); 358 359 if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0) 360 return attr->mode; 361 362 return 0; 363} 364 365static struct attribute *l2x0_pmu_event_attrs[] = { 366 L220_PLUS_EVENT_ATTR(co, 0x1), 367 L220_PLUS_EVENT_ATTR(drhit, 0x2), 368 L220_PLUS_EVENT_ATTR(drreq, 0x3), 369 L220_PLUS_EVENT_ATTR(dwhit, 0x4), 370 L220_PLUS_EVENT_ATTR(dwreq, 0x5), 371 L220_PLUS_EVENT_ATTR(dwtreq, 0x6), 372 L220_PLUS_EVENT_ATTR(irhit, 0x7), 373 L220_PLUS_EVENT_ATTR(irreq, 0x8), 374 L220_PLUS_EVENT_ATTR(wa, 0x9), 375 PL310_EVENT_ATTR(ipfalloc, 0xa), 376 PL310_EVENT_ATTR(epfhit, 0xb), 377 PL310_EVENT_ATTR(epfalloc, 0xc), 378 PL310_EVENT_ATTR(srrcvd, 0xd), 379 PL310_EVENT_ATTR(srconf, 0xe), 380 PL310_EVENT_ATTR(epfrcvd, 0xf), 381 NULL 382}; 383 384static struct attribute_group l2x0_pmu_event_attrs_group = { 385 .name = "events", 386 .attrs = l2x0_pmu_event_attrs, 387 .is_visible = l2x0_pmu_event_attr_is_visible, 388}; 389 390static ssize_t l2x0_pmu_cpumask_show(struct device *dev, 391 struct device_attribute *attr, char *buf) 392{ 393 return cpumap_print_to_pagebuf(true, buf, &pmu_cpu); 394} 395 396static struct device_attribute l2x0_pmu_cpumask_attr = 397 __ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL); 398 399static struct attribute *l2x0_pmu_cpumask_attrs[] = { 400 &l2x0_pmu_cpumask_attr.attr, 401 NULL, 402}; 403 404static struct attribute_group l2x0_pmu_cpumask_attr_group = { 405 .attrs = l2x0_pmu_cpumask_attrs, 406}; 407 408static const struct attribute_group *l2x0_pmu_attr_groups[] = { 409 &l2x0_pmu_event_attrs_group, 410 &l2x0_pmu_cpumask_attr_group, 411 NULL, 412}; 413 414static void l2x0_pmu_reset(void) 415{ 416 int i; 417 418 __l2x0_pmu_disable(); 419 420 for (i = 0; i < PMU_NR_COUNTERS; i++) 421 __l2x0_pmu_event_disable(i); 422} 423 424static int l2x0_pmu_offline_cpu(unsigned int cpu) 425{ 426 unsigned int target; 427 428 if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu)) 429 return 0; 430 431 target = cpumask_any_but(cpu_online_mask, cpu); 432 if (target >= nr_cpu_ids) 433 return 0; 434 435 perf_pmu_migrate_context(l2x0_pmu, cpu, target); 436 cpumask_set_cpu(target, &pmu_cpu); 437 438 return 0; 439} 440 441void l2x0_pmu_suspend(void) 442{ 443 int i; 444 445 if (!l2x0_pmu) 446 return; 447 448 l2x0_pmu_disable(l2x0_pmu); 449 450 for (i = 0; i < PMU_NR_COUNTERS; i++) { 451 if (events[i]) 452 l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE); 453 } 454 455} 456 457void l2x0_pmu_resume(void) 458{ 459 int i; 460 461 if (!l2x0_pmu) 462 return; 463 464 l2x0_pmu_reset(); 465 466 for (i = 0; i < PMU_NR_COUNTERS; i++) { 467 if (events[i]) 468 l2x0_pmu_event_start(events[i], PERF_EF_RELOAD); 469 } 470 471 l2x0_pmu_enable(l2x0_pmu); 472} 473 474void __init l2x0_pmu_register(void __iomem *base, u32 part) 475{ 476 /* 477 * Determine whether we support the PMU, and choose the name for sysfs. 478 * This is also used by l2x0_pmu_event_attr_is_visible to determine 479 * which events to display, as the PL310 PMU supports a superset of 480 * L220 events. 481 * 482 * The L210 PMU has a different programmer's interface, and is not 483 * supported by this driver. 484 * 485 * We must defer registering the PMU until the perf subsystem is up and 486 * running, so just stash the name and base, and leave that to another 487 * initcall. 488 */ 489 switch (part & L2X0_CACHE_ID_PART_MASK) { 490 case L2X0_CACHE_ID_PART_L220: 491 l2x0_name = "l2c_220"; 492 break; 493 case L2X0_CACHE_ID_PART_L310: 494 l2x0_name = "l2c_310"; 495 break; 496 default: 497 return; 498 } 499 500 l2x0_base = base; 501} 502 503static __init int l2x0_pmu_init(void) 504{ 505 int ret; 506 507 if (!l2x0_base) 508 return 0; 509 510 l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL); 511 if (!l2x0_pmu) { 512 pr_warn("Unable to allocate L2x0 PMU\n"); 513 return -ENOMEM; 514 } 515 516 *l2x0_pmu = (struct pmu) { 517 .task_ctx_nr = perf_invalid_context, 518 .pmu_enable = l2x0_pmu_enable, 519 .pmu_disable = l2x0_pmu_disable, 520 .read = l2x0_pmu_event_read, 521 .start = l2x0_pmu_event_start, 522 .stop = l2x0_pmu_event_stop, 523 .add = l2x0_pmu_event_add, 524 .del = l2x0_pmu_event_del, 525 .event_init = l2x0_pmu_event_init, 526 .attr_groups = l2x0_pmu_attr_groups, 527 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 528 }; 529 530 l2x0_pmu_reset(); 531 532 /* 533 * We always use a hrtimer rather than an interrupt. 534 * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll. 535 * 536 * Polling once a second allows the counters to fill up to 1/128th on a 537 * quad-core test chip with cores clocked at 400MHz. Hopefully this 538 * leaves sufficient headroom to avoid overflow on production silicon 539 * at higher frequencies. 540 */ 541 l2x0_pmu_poll_period = ms_to_ktime(1000); 542 hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 543 l2x0_pmu_hrtimer.function = l2x0_pmu_poll; 544 545 cpumask_set_cpu(0, &pmu_cpu); 546 ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE, 547 "perf/arm/l2x0:online", NULL, 548 l2x0_pmu_offline_cpu); 549 if (ret) 550 goto out_pmu; 551 552 ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1); 553 if (ret) 554 goto out_cpuhp; 555 556 return 0; 557 558out_cpuhp: 559 cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE); 560out_pmu: 561 kfree(l2x0_pmu); 562 l2x0_pmu = NULL; 563 return ret; 564} 565device_initcall(l2x0_pmu_init);