iommu.c (13370B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2013 Advanced Micro Devices, Inc. 4 * 5 * Author: Steven Kinney <Steven.Kinney@amd.com> 6 * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> 7 * 8 * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation 9 */ 10 11#define pr_fmt(fmt) "perf/amd_iommu: " fmt 12 13#include <linux/perf_event.h> 14#include <linux/init.h> 15#include <linux/cpumask.h> 16#include <linux/slab.h> 17#include <linux/amd-iommu.h> 18 19#include "../perf_event.h" 20#include "iommu.h" 21 22/* iommu pmu conf masks */ 23#define GET_CSOURCE(x) ((x)->conf & 0xFFULL) 24#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) 25#define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL) 26#define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL) 27 28/* iommu pmu conf1 masks */ 29#define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL) 30#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) 31#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) 32 33#define IOMMU_NAME_SIZE 16 34 35struct perf_amd_iommu { 36 struct list_head list; 37 struct pmu pmu; 38 struct amd_iommu *iommu; 39 char name[IOMMU_NAME_SIZE]; 40 u8 max_banks; 41 u8 max_counters; 42 u64 cntr_assign_mask; 43 raw_spinlock_t lock; 44}; 45 46static LIST_HEAD(perf_amd_iommu_list); 47 48/*--------------------------------------------- 49 * sysfs format attributes 50 *---------------------------------------------*/ 51PMU_FORMAT_ATTR(csource, "config:0-7"); 52PMU_FORMAT_ATTR(devid, "config:8-23"); 53PMU_FORMAT_ATTR(domid, "config:24-39"); 54PMU_FORMAT_ATTR(pasid, "config:40-59"); 55PMU_FORMAT_ATTR(devid_mask, "config1:0-15"); 56PMU_FORMAT_ATTR(domid_mask, "config1:16-31"); 57PMU_FORMAT_ATTR(pasid_mask, "config1:32-51"); 58 59static struct attribute *iommu_format_attrs[] = { 60 &format_attr_csource.attr, 61 &format_attr_devid.attr, 62 &format_attr_pasid.attr, 63 &format_attr_domid.attr, 64 &format_attr_devid_mask.attr, 65 &format_attr_pasid_mask.attr, 66 &format_attr_domid_mask.attr, 67 NULL, 68}; 69 70static struct attribute_group amd_iommu_format_group = { 71 .name = "format", 72 .attrs = iommu_format_attrs, 73}; 74 75/*--------------------------------------------- 76 * sysfs events attributes 77 *---------------------------------------------*/ 78static struct attribute_group amd_iommu_events_group = { 79 .name = "events", 80}; 81 82struct amd_iommu_event_desc { 83 struct device_attribute attr; 84 const char *event; 85}; 86 87static ssize_t _iommu_event_show(struct device *dev, 88 struct device_attribute *attr, char *buf) 89{ 90 struct amd_iommu_event_desc *event = 91 container_of(attr, struct amd_iommu_event_desc, attr); 92 return sprintf(buf, "%s\n", event->event); 93} 94 95#define AMD_IOMMU_EVENT_DESC(_name, _event) \ 96{ \ 97 .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \ 98 .event = _event, \ 99} 100 101static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = { 102 AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"), 103 AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"), 104 AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"), 105 AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"), 106 AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"), 107 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"), 108 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"), 109 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"), 110 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"), 111 AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"), 112 AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"), 113 AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"), 114 AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"), 115 AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"), 116 AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"), 117 AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"), 118 AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"), 119 AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"), 120 AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"), 121 AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h, "csource=0x14"), 122 AMD_IOMMU_EVENT_DESC(vapic_int_non_guest, "csource=0x15"), 123 AMD_IOMMU_EVENT_DESC(vapic_int_guest, "csource=0x16"), 124 AMD_IOMMU_EVENT_DESC(smi_recv, "csource=0x17"), 125 AMD_IOMMU_EVENT_DESC(smi_blk, "csource=0x18"), 126 { /* end: all zeroes */ }, 127}; 128 129/*--------------------------------------------- 130 * sysfs cpumask attributes 131 *---------------------------------------------*/ 132static cpumask_t iommu_cpumask; 133 134static ssize_t _iommu_cpumask_show(struct device *dev, 135 struct device_attribute *attr, 136 char *buf) 137{ 138 return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask); 139} 140static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL); 141 142static struct attribute *iommu_cpumask_attrs[] = { 143 &dev_attr_cpumask.attr, 144 NULL, 145}; 146 147static struct attribute_group amd_iommu_cpumask_group = { 148 .attrs = iommu_cpumask_attrs, 149}; 150 151/*---------------------------------------------*/ 152 153static int get_next_avail_iommu_bnk_cntr(struct perf_event *event) 154{ 155 struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu); 156 int max_cntrs = piommu->max_counters; 157 int max_banks = piommu->max_banks; 158 u32 shift, bank, cntr; 159 unsigned long flags; 160 int retval; 161 162 raw_spin_lock_irqsave(&piommu->lock, flags); 163 164 for (bank = 0; bank < max_banks; bank++) { 165 for (cntr = 0; cntr < max_cntrs; cntr++) { 166 shift = bank + (bank*3) + cntr; 167 if (piommu->cntr_assign_mask & BIT_ULL(shift)) { 168 continue; 169 } else { 170 piommu->cntr_assign_mask |= BIT_ULL(shift); 171 event->hw.iommu_bank = bank; 172 event->hw.iommu_cntr = cntr; 173 retval = 0; 174 goto out; 175 } 176 } 177 } 178 retval = -ENOSPC; 179out: 180 raw_spin_unlock_irqrestore(&piommu->lock, flags); 181 return retval; 182} 183 184static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, 185 u8 bank, u8 cntr) 186{ 187 unsigned long flags; 188 int max_banks, max_cntrs; 189 int shift = 0; 190 191 max_banks = perf_iommu->max_banks; 192 max_cntrs = perf_iommu->max_counters; 193 194 if ((bank > max_banks) || (cntr > max_cntrs)) 195 return -EINVAL; 196 197 shift = bank + cntr + (bank*3); 198 199 raw_spin_lock_irqsave(&perf_iommu->lock, flags); 200 perf_iommu->cntr_assign_mask &= ~(1ULL<<shift); 201 raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); 202 203 return 0; 204} 205 206static int perf_iommu_event_init(struct perf_event *event) 207{ 208 struct hw_perf_event *hwc = &event->hw; 209 210 /* test the event attr type check for PMU enumeration */ 211 if (event->attr.type != event->pmu->type) 212 return -ENOENT; 213 214 /* 215 * IOMMU counters are shared across all cores. 216 * Therefore, it does not support per-process mode. 217 * Also, it does not support event sampling mode. 218 */ 219 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) 220 return -EINVAL; 221 222 if (event->cpu < 0) 223 return -EINVAL; 224 225 /* update the hw_perf_event struct with the iommu config data */ 226 hwc->conf = event->attr.config; 227 hwc->conf1 = event->attr.config1; 228 229 return 0; 230} 231 232static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev) 233{ 234 return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu; 235} 236 237static void perf_iommu_enable_event(struct perf_event *ev) 238{ 239 struct amd_iommu *iommu = perf_event_2_iommu(ev); 240 struct hw_perf_event *hwc = &ev->hw; 241 u8 bank = hwc->iommu_bank; 242 u8 cntr = hwc->iommu_cntr; 243 u64 reg = 0ULL; 244 245 reg = GET_CSOURCE(hwc); 246 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®); 247 248 reg = GET_DEVID_MASK(hwc); 249 reg = GET_DEVID(hwc) | (reg << 32); 250 if (reg) 251 reg |= BIT(31); 252 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®); 253 254 reg = GET_PASID_MASK(hwc); 255 reg = GET_PASID(hwc) | (reg << 32); 256 if (reg) 257 reg |= BIT(31); 258 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®); 259 260 reg = GET_DOMID_MASK(hwc); 261 reg = GET_DOMID(hwc) | (reg << 32); 262 if (reg) 263 reg |= BIT(31); 264 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®); 265} 266 267static void perf_iommu_disable_event(struct perf_event *event) 268{ 269 struct amd_iommu *iommu = perf_event_2_iommu(event); 270 struct hw_perf_event *hwc = &event->hw; 271 u64 reg = 0ULL; 272 273 amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, 274 IOMMU_PC_COUNTER_SRC_REG, ®); 275} 276 277static void perf_iommu_start(struct perf_event *event, int flags) 278{ 279 struct hw_perf_event *hwc = &event->hw; 280 281 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) 282 return; 283 284 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 285 hwc->state = 0; 286 287 /* 288 * To account for power-gating, which prevents write to 289 * the counter, we need to enable the counter 290 * before setting up counter register. 291 */ 292 perf_iommu_enable_event(event); 293 294 if (flags & PERF_EF_RELOAD) { 295 u64 count = 0; 296 struct amd_iommu *iommu = perf_event_2_iommu(event); 297 298 /* 299 * Since the IOMMU PMU only support counting mode, 300 * the counter always start with value zero. 301 */ 302 amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, 303 IOMMU_PC_COUNTER_REG, &count); 304 } 305 306 perf_event_update_userpage(event); 307} 308 309static void perf_iommu_read(struct perf_event *event) 310{ 311 u64 count; 312 struct hw_perf_event *hwc = &event->hw; 313 struct amd_iommu *iommu = perf_event_2_iommu(event); 314 315 if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, 316 IOMMU_PC_COUNTER_REG, &count)) 317 return; 318 319 /* IOMMU pc counter register is only 48 bits */ 320 count &= GENMASK_ULL(47, 0); 321 322 /* 323 * Since the counter always start with value zero, 324 * simply just accumulate the count for the event. 325 */ 326 local64_add(count, &event->count); 327} 328 329static void perf_iommu_stop(struct perf_event *event, int flags) 330{ 331 struct hw_perf_event *hwc = &event->hw; 332 333 if (hwc->state & PERF_HES_UPTODATE) 334 return; 335 336 /* 337 * To account for power-gating, in which reading the counter would 338 * return zero, we need to read the register before disabling. 339 */ 340 perf_iommu_read(event); 341 hwc->state |= PERF_HES_UPTODATE; 342 343 perf_iommu_disable_event(event); 344 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 345 hwc->state |= PERF_HES_STOPPED; 346} 347 348static int perf_iommu_add(struct perf_event *event, int flags) 349{ 350 int retval; 351 352 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 353 354 /* request an iommu bank/counter */ 355 retval = get_next_avail_iommu_bnk_cntr(event); 356 if (retval) 357 return retval; 358 359 if (flags & PERF_EF_START) 360 perf_iommu_start(event, PERF_EF_RELOAD); 361 362 return 0; 363} 364 365static void perf_iommu_del(struct perf_event *event, int flags) 366{ 367 struct hw_perf_event *hwc = &event->hw; 368 struct perf_amd_iommu *perf_iommu = 369 container_of(event->pmu, struct perf_amd_iommu, pmu); 370 371 perf_iommu_stop(event, PERF_EF_UPDATE); 372 373 /* clear the assigned iommu bank/counter */ 374 clear_avail_iommu_bnk_cntr(perf_iommu, 375 hwc->iommu_bank, hwc->iommu_cntr); 376 377 perf_event_update_userpage(event); 378} 379 380static __init int _init_events_attrs(void) 381{ 382 int i = 0, j; 383 struct attribute **attrs; 384 385 while (amd_iommu_v2_event_descs[i].attr.attr.name) 386 i++; 387 388 attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL); 389 if (!attrs) 390 return -ENOMEM; 391 392 for (j = 0; j < i; j++) 393 attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr; 394 395 amd_iommu_events_group.attrs = attrs; 396 return 0; 397} 398 399static const struct attribute_group *amd_iommu_attr_groups[] = { 400 &amd_iommu_format_group, 401 &amd_iommu_cpumask_group, 402 &amd_iommu_events_group, 403 NULL, 404}; 405 406static const struct pmu iommu_pmu __initconst = { 407 .event_init = perf_iommu_event_init, 408 .add = perf_iommu_add, 409 .del = perf_iommu_del, 410 .start = perf_iommu_start, 411 .stop = perf_iommu_stop, 412 .read = perf_iommu_read, 413 .task_ctx_nr = perf_invalid_context, 414 .attr_groups = amd_iommu_attr_groups, 415 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 416}; 417 418static __init int init_one_iommu(unsigned int idx) 419{ 420 struct perf_amd_iommu *perf_iommu; 421 int ret; 422 423 perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL); 424 if (!perf_iommu) 425 return -ENOMEM; 426 427 raw_spin_lock_init(&perf_iommu->lock); 428 429 perf_iommu->pmu = iommu_pmu; 430 perf_iommu->iommu = get_amd_iommu(idx); 431 perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx); 432 perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx); 433 434 if (!perf_iommu->iommu || 435 !perf_iommu->max_banks || 436 !perf_iommu->max_counters) { 437 kfree(perf_iommu); 438 return -EINVAL; 439 } 440 441 snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx); 442 443 ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1); 444 if (!ret) { 445 pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n", 446 idx, perf_iommu->max_banks, perf_iommu->max_counters); 447 list_add_tail(&perf_iommu->list, &perf_amd_iommu_list); 448 } else { 449 pr_warn("Error initializing IOMMU %d.\n", idx); 450 kfree(perf_iommu); 451 } 452 return ret; 453} 454 455static __init int amd_iommu_pc_init(void) 456{ 457 unsigned int i, cnt = 0; 458 int ret; 459 460 /* Make sure the IOMMU PC resource is available */ 461 if (!amd_iommu_pc_supported()) 462 return -ENODEV; 463 464 ret = _init_events_attrs(); 465 if (ret) 466 return ret; 467 468 /* 469 * An IOMMU PMU is specific to an IOMMU, and can function independently. 470 * So we go through all IOMMUs and ignore the one that fails init 471 * unless all IOMMU are failing. 472 */ 473 for (i = 0; i < amd_iommu_get_num_iommus(); i++) { 474 ret = init_one_iommu(i); 475 if (!ret) 476 cnt++; 477 } 478 479 if (!cnt) { 480 kfree(amd_iommu_events_group.attrs); 481 return -ENODEV; 482 } 483 484 /* Init cpumask attributes to only core 0 */ 485 cpumask_set_cpu(0, &iommu_cpumask); 486 return 0; 487} 488 489device_initcall(amd_iommu_pc_init);