power.c (7260B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Performance events - AMD Processor Power Reporting Mechanism 4 * 5 * Copyright (C) 2016 Advanced Micro Devices, Inc. 6 * 7 * Author: Huang Rui <ray.huang@amd.com> 8 */ 9 10#include <linux/module.h> 11#include <linux/slab.h> 12#include <linux/perf_event.h> 13#include <asm/cpu_device_id.h> 14#include "../perf_event.h" 15 16/* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */ 17#define AMD_POWER_EVENT_MASK 0xFFULL 18 19/* 20 * Accumulated power status counters. 21 */ 22#define AMD_POWER_EVENTSEL_PKG 1 23 24/* 25 * The ratio of compute unit power accumulator sample period to the 26 * PTSC period. 27 */ 28static unsigned int cpu_pwr_sample_ratio; 29 30/* Maximum accumulated power of a compute unit. */ 31static u64 max_cu_acc_power; 32 33static struct pmu pmu_class; 34 35/* 36 * Accumulated power represents the sum of each compute unit's (CU) power 37 * consumption. On any core of each CU we read the total accumulated power from 38 * MSR_F15H_CU_PWR_ACCUMULATOR. cpu_mask represents CPU bit map of all cores 39 * which are picked to measure the power for the CUs they belong to. 40 */ 41static cpumask_t cpu_mask; 42 43static void event_update(struct perf_event *event) 44{ 45 struct hw_perf_event *hwc = &event->hw; 46 u64 prev_pwr_acc, new_pwr_acc, prev_ptsc, new_ptsc; 47 u64 delta, tdelta; 48 49 prev_pwr_acc = hwc->pwr_acc; 50 prev_ptsc = hwc->ptsc; 51 rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc); 52 rdmsrl(MSR_F15H_PTSC, new_ptsc); 53 54 /* 55 * Calculate the CU power consumption over a time period, the unit of 56 * final value (delta) is micro-Watts. Then add it to the event count. 57 */ 58 if (new_pwr_acc < prev_pwr_acc) { 59 delta = max_cu_acc_power + new_pwr_acc; 60 delta -= prev_pwr_acc; 61 } else 62 delta = new_pwr_acc - prev_pwr_acc; 63 64 delta *= cpu_pwr_sample_ratio * 1000; 65 tdelta = new_ptsc - prev_ptsc; 66 67 do_div(delta, tdelta); 68 local64_add(delta, &event->count); 69} 70 71static void __pmu_event_start(struct perf_event *event) 72{ 73 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 74 return; 75 76 event->hw.state = 0; 77 78 rdmsrl(MSR_F15H_PTSC, event->hw.ptsc); 79 rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc); 80} 81 82static void pmu_event_start(struct perf_event *event, int mode) 83{ 84 __pmu_event_start(event); 85} 86 87static void pmu_event_stop(struct perf_event *event, int mode) 88{ 89 struct hw_perf_event *hwc = &event->hw; 90 91 /* Mark event as deactivated and stopped. */ 92 if (!(hwc->state & PERF_HES_STOPPED)) 93 hwc->state |= PERF_HES_STOPPED; 94 95 /* Check if software counter update is necessary. */ 96 if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 97 /* 98 * Drain the remaining delta count out of an event 99 * that we are disabling: 100 */ 101 event_update(event); 102 hwc->state |= PERF_HES_UPTODATE; 103 } 104} 105 106static int pmu_event_add(struct perf_event *event, int mode) 107{ 108 struct hw_perf_event *hwc = &event->hw; 109 110 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 111 112 if (mode & PERF_EF_START) 113 __pmu_event_start(event); 114 115 return 0; 116} 117 118static void pmu_event_del(struct perf_event *event, int flags) 119{ 120 pmu_event_stop(event, PERF_EF_UPDATE); 121} 122 123static int pmu_event_init(struct perf_event *event) 124{ 125 u64 cfg = event->attr.config & AMD_POWER_EVENT_MASK; 126 127 /* Only look at AMD power events. */ 128 if (event->attr.type != pmu_class.type) 129 return -ENOENT; 130 131 /* Unsupported modes and filters. */ 132 if (event->attr.sample_period) 133 return -EINVAL; 134 135 if (cfg != AMD_POWER_EVENTSEL_PKG) 136 return -EINVAL; 137 138 return 0; 139} 140 141static void pmu_event_read(struct perf_event *event) 142{ 143 event_update(event); 144} 145 146static ssize_t 147get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) 148{ 149 return cpumap_print_to_pagebuf(true, buf, &cpu_mask); 150} 151 152static DEVICE_ATTR(cpumask, S_IRUGO, get_attr_cpumask, NULL); 153 154static struct attribute *pmu_attrs[] = { 155 &dev_attr_cpumask.attr, 156 NULL, 157}; 158 159static struct attribute_group pmu_attr_group = { 160 .attrs = pmu_attrs, 161}; 162 163/* 164 * Currently it only supports to report the power of each 165 * processor/package. 166 */ 167EVENT_ATTR_STR(power-pkg, power_pkg, "event=0x01"); 168 169EVENT_ATTR_STR(power-pkg.unit, power_pkg_unit, "mWatts"); 170 171/* Convert the count from micro-Watts to milli-Watts. */ 172EVENT_ATTR_STR(power-pkg.scale, power_pkg_scale, "1.000000e-3"); 173 174static struct attribute *events_attr[] = { 175 EVENT_PTR(power_pkg), 176 EVENT_PTR(power_pkg_unit), 177 EVENT_PTR(power_pkg_scale), 178 NULL, 179}; 180 181static struct attribute_group pmu_events_group = { 182 .name = "events", 183 .attrs = events_attr, 184}; 185 186PMU_FORMAT_ATTR(event, "config:0-7"); 187 188static struct attribute *formats_attr[] = { 189 &format_attr_event.attr, 190 NULL, 191}; 192 193static struct attribute_group pmu_format_group = { 194 .name = "format", 195 .attrs = formats_attr, 196}; 197 198static const struct attribute_group *attr_groups[] = { 199 &pmu_attr_group, 200 &pmu_format_group, 201 &pmu_events_group, 202 NULL, 203}; 204 205static struct pmu pmu_class = { 206 .attr_groups = attr_groups, 207 /* system-wide only */ 208 .task_ctx_nr = perf_invalid_context, 209 .event_init = pmu_event_init, 210 .add = pmu_event_add, 211 .del = pmu_event_del, 212 .start = pmu_event_start, 213 .stop = pmu_event_stop, 214 .read = pmu_event_read, 215 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 216 .module = THIS_MODULE, 217}; 218 219static int power_cpu_exit(unsigned int cpu) 220{ 221 int target; 222 223 if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask)) 224 return 0; 225 226 /* 227 * Find a new CPU on the same compute unit, if was set in cpumask 228 * and still some CPUs on compute unit. Then migrate event and 229 * context to new CPU. 230 */ 231 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 232 if (target < nr_cpumask_bits) { 233 cpumask_set_cpu(target, &cpu_mask); 234 perf_pmu_migrate_context(&pmu_class, cpu, target); 235 } 236 return 0; 237} 238 239static int power_cpu_init(unsigned int cpu) 240{ 241 int target; 242 243 /* 244 * 1) If any CPU is set at cpu_mask in the same compute unit, do 245 * nothing. 246 * 2) If no CPU is set at cpu_mask in the same compute unit, 247 * set current ONLINE CPU. 248 * 249 * Note: if there is a CPU aside of the new one already in the 250 * sibling mask, then it is also in cpu_mask. 251 */ 252 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 253 if (target >= nr_cpumask_bits) 254 cpumask_set_cpu(cpu, &cpu_mask); 255 return 0; 256} 257 258static const struct x86_cpu_id cpu_match[] = { 259 X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL), 260 {}, 261}; 262 263static int __init amd_power_pmu_init(void) 264{ 265 int ret; 266 267 if (!x86_match_cpu(cpu_match)) 268 return -ENODEV; 269 270 if (!boot_cpu_has(X86_FEATURE_ACC_POWER)) 271 return -ENODEV; 272 273 cpu_pwr_sample_ratio = cpuid_ecx(0x80000007); 274 275 if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) { 276 pr_err("Failed to read max compute unit power accumulator MSR\n"); 277 return -ENODEV; 278 } 279 280 281 cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, 282 "perf/x86/amd/power:online", 283 power_cpu_init, power_cpu_exit); 284 285 ret = perf_pmu_register(&pmu_class, "power", -1); 286 if (WARN_ON(ret)) { 287 pr_warn("AMD Power PMU registration failed\n"); 288 return ret; 289 } 290 291 pr_info("AMD Power PMU detected\n"); 292 return ret; 293} 294module_init(amd_power_pmu_init); 295 296static void __exit amd_power_pmu_exit(void) 297{ 298 cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE); 299 perf_pmu_unregister(&pmu_class); 300} 301module_exit(amd_power_pmu_exit); 302 303MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); 304MODULE_DESCRIPTION("AMD Processor Power Reporting Mechanism"); 305MODULE_LICENSE("GPL v2");