cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

devfreq_cooling.c (14151B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * devfreq_cooling: Thermal cooling device implementation for devices using
      4 *                  devfreq
      5 *
      6 * Copyright (C) 2014-2015 ARM Limited
      7 *
      8 * TODO:
      9 *    - If OPPs are added or removed after devfreq cooling has
     10 *      registered, the devfreq cooling won't react to it.
     11 */
     12
     13#include <linux/devfreq.h>
     14#include <linux/devfreq_cooling.h>
     15#include <linux/energy_model.h>
     16#include <linux/export.h>
     17#include <linux/slab.h>
     18#include <linux/pm_opp.h>
     19#include <linux/pm_qos.h>
     20#include <linux/thermal.h>
     21#include <linux/units.h>
     22
     23#include <trace/events/thermal.h>
     24
     25#define SCALE_ERROR_MITIGATION	100
     26
     27/**
     28 * struct devfreq_cooling_device - Devfreq cooling device
     29 *		devfreq_cooling_device registered.
     30 * @cdev:	Pointer to associated thermal cooling device.
     31 * @devfreq:	Pointer to associated devfreq device.
     32 * @cooling_state:	Current cooling state.
     33 * @freq_table:	Pointer to a table with the frequencies sorted in descending
     34 *		order.  You can index the table by cooling device state
     35 * @max_state:	It is the last index, that is, one less than the number of the
     36 *		OPPs
     37 * @power_ops:	Pointer to devfreq_cooling_power, a more precised model.
     38 * @res_util:	Resource utilization scaling factor for the power.
     39 *		It is multiplied by 100 to minimize the error. It is used
     40 *		for estimation of the power budget instead of using
     41 *		'utilization' (which is	'busy_time' / 'total_time').
     42 *		The 'res_util' range is from 100 to power * 100	for the
     43 *		corresponding 'state'.
     44 * @capped_state:	index to cooling state with in dynamic power budget
     45 * @req_max_freq:	PM QoS request for limiting the maximum frequency
     46 *			of the devfreq device.
     47 * @em_pd:		Energy Model for the associated Devfreq device
     48 */
     49struct devfreq_cooling_device {
     50	struct thermal_cooling_device *cdev;
     51	struct devfreq *devfreq;
     52	unsigned long cooling_state;
     53	u32 *freq_table;
     54	size_t max_state;
     55	struct devfreq_cooling_power *power_ops;
     56	u32 res_util;
     57	int capped_state;
     58	struct dev_pm_qos_request req_max_freq;
     59	struct em_perf_domain *em_pd;
     60};
     61
     62static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
     63					 unsigned long *state)
     64{
     65	struct devfreq_cooling_device *dfc = cdev->devdata;
     66
     67	*state = dfc->max_state;
     68
     69	return 0;
     70}
     71
     72static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev,
     73					 unsigned long *state)
     74{
     75	struct devfreq_cooling_device *dfc = cdev->devdata;
     76
     77	*state = dfc->cooling_state;
     78
     79	return 0;
     80}
     81
     82static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
     83					 unsigned long state)
     84{
     85	struct devfreq_cooling_device *dfc = cdev->devdata;
     86	struct devfreq *df = dfc->devfreq;
     87	struct device *dev = df->dev.parent;
     88	unsigned long freq;
     89	int perf_idx;
     90
     91	if (state == dfc->cooling_state)
     92		return 0;
     93
     94	dev_dbg(dev, "Setting cooling state %lu\n", state);
     95
     96	if (state > dfc->max_state)
     97		return -EINVAL;
     98
     99	if (dfc->em_pd) {
    100		perf_idx = dfc->max_state - state;
    101		freq = dfc->em_pd->table[perf_idx].frequency * 1000;
    102	} else {
    103		freq = dfc->freq_table[state];
    104	}
    105
    106	dev_pm_qos_update_request(&dfc->req_max_freq,
    107				  DIV_ROUND_UP(freq, HZ_PER_KHZ));
    108
    109	dfc->cooling_state = state;
    110
    111	return 0;
    112}
    113
    114/**
    115 * get_perf_idx() - get the performance index corresponding to a frequency
    116 * @em_pd:	Pointer to device's Energy Model
    117 * @freq:	frequency in kHz
    118 *
    119 * Return: the performance index associated with the @freq, or
    120 * -EINVAL if it wasn't found.
    121 */
    122static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq)
    123{
    124	int i;
    125
    126	for (i = 0; i < em_pd->nr_perf_states; i++) {
    127		if (em_pd->table[i].frequency == freq)
    128			return i;
    129	}
    130
    131	return -EINVAL;
    132}
    133
    134static unsigned long get_voltage(struct devfreq *df, unsigned long freq)
    135{
    136	struct device *dev = df->dev.parent;
    137	unsigned long voltage;
    138	struct dev_pm_opp *opp;
    139
    140	opp = dev_pm_opp_find_freq_exact(dev, freq, true);
    141	if (PTR_ERR(opp) == -ERANGE)
    142		opp = dev_pm_opp_find_freq_exact(dev, freq, false);
    143
    144	if (IS_ERR(opp)) {
    145		dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n",
    146				    freq, PTR_ERR(opp));
    147		return 0;
    148	}
    149
    150	voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
    151	dev_pm_opp_put(opp);
    152
    153	if (voltage == 0) {
    154		dev_err_ratelimited(dev,
    155				    "Failed to get voltage for frequency %lu\n",
    156				    freq);
    157	}
    158
    159	return voltage;
    160}
    161
    162static void _normalize_load(struct devfreq_dev_status *status)
    163{
    164	if (status->total_time > 0xfffff) {
    165		status->total_time >>= 10;
    166		status->busy_time >>= 10;
    167	}
    168
    169	status->busy_time <<= 10;
    170	status->busy_time /= status->total_time ? : 1;
    171
    172	status->busy_time = status->busy_time ? : 1;
    173	status->total_time = 1024;
    174}
    175
    176static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
    177					       u32 *power)
    178{
    179	struct devfreq_cooling_device *dfc = cdev->devdata;
    180	struct devfreq *df = dfc->devfreq;
    181	struct devfreq_dev_status status;
    182	unsigned long state;
    183	unsigned long freq;
    184	unsigned long voltage;
    185	int res, perf_idx;
    186
    187	mutex_lock(&df->lock);
    188	status = df->last_status;
    189	mutex_unlock(&df->lock);
    190
    191	freq = status.current_frequency;
    192
    193	if (dfc->power_ops && dfc->power_ops->get_real_power) {
    194		voltage = get_voltage(df, freq);
    195		if (voltage == 0) {
    196			res = -EINVAL;
    197			goto fail;
    198		}
    199
    200		res = dfc->power_ops->get_real_power(df, power, freq, voltage);
    201		if (!res) {
    202			state = dfc->capped_state;
    203			dfc->res_util = dfc->em_pd->table[state].power;
    204			dfc->res_util *= SCALE_ERROR_MITIGATION;
    205
    206			if (*power > 1)
    207				dfc->res_util /= *power;
    208		} else {
    209			goto fail;
    210		}
    211	} else {
    212		/* Energy Model frequencies are in kHz */
    213		perf_idx = get_perf_idx(dfc->em_pd, freq / 1000);
    214		if (perf_idx < 0) {
    215			res = -EAGAIN;
    216			goto fail;
    217		}
    218
    219		_normalize_load(&status);
    220
    221		/* Scale power for utilization */
    222		*power = dfc->em_pd->table[perf_idx].power;
    223		*power *= status.busy_time;
    224		*power >>= 10;
    225	}
    226
    227	trace_thermal_power_devfreq_get_power(cdev, &status, freq, *power);
    228
    229	return 0;
    230fail:
    231	/* It is safe to set max in this case */
    232	dfc->res_util = SCALE_ERROR_MITIGATION;
    233	return res;
    234}
    235
    236static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
    237				       unsigned long state, u32 *power)
    238{
    239	struct devfreq_cooling_device *dfc = cdev->devdata;
    240	int perf_idx;
    241
    242	if (state > dfc->max_state)
    243		return -EINVAL;
    244
    245	perf_idx = dfc->max_state - state;
    246	*power = dfc->em_pd->table[perf_idx].power;
    247
    248	return 0;
    249}
    250
    251static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
    252				       u32 power, unsigned long *state)
    253{
    254	struct devfreq_cooling_device *dfc = cdev->devdata;
    255	struct devfreq *df = dfc->devfreq;
    256	struct devfreq_dev_status status;
    257	unsigned long freq;
    258	s32 est_power;
    259	int i;
    260
    261	mutex_lock(&df->lock);
    262	status = df->last_status;
    263	mutex_unlock(&df->lock);
    264
    265	freq = status.current_frequency;
    266
    267	if (dfc->power_ops && dfc->power_ops->get_real_power) {
    268		/* Scale for resource utilization */
    269		est_power = power * dfc->res_util;
    270		est_power /= SCALE_ERROR_MITIGATION;
    271	} else {
    272		/* Scale dynamic power for utilization */
    273		_normalize_load(&status);
    274		est_power = power << 10;
    275		est_power /= status.busy_time;
    276	}
    277
    278	/*
    279	 * Find the first cooling state that is within the power
    280	 * budget. The EM power table is sorted ascending.
    281	 */
    282	for (i = dfc->max_state; i > 0; i--)
    283		if (est_power >= dfc->em_pd->table[i].power)
    284			break;
    285
    286	*state = dfc->max_state - i;
    287	dfc->capped_state = *state;
    288
    289	trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
    290	return 0;
    291}
    292
    293static struct thermal_cooling_device_ops devfreq_cooling_ops = {
    294	.get_max_state = devfreq_cooling_get_max_state,
    295	.get_cur_state = devfreq_cooling_get_cur_state,
    296	.set_cur_state = devfreq_cooling_set_cur_state,
    297};
    298
    299/**
    300 * devfreq_cooling_gen_tables() - Generate frequency table.
    301 * @dfc:	Pointer to devfreq cooling device.
    302 * @num_opps:	Number of OPPs
    303 *
    304 * Generate frequency table which holds the frequencies in descending
    305 * order. That way its indexed by cooling device state. This is for
    306 * compatibility with drivers which do not register Energy Model.
    307 *
    308 * Return: 0 on success, negative error code on failure.
    309 */
    310static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc,
    311				      int num_opps)
    312{
    313	struct devfreq *df = dfc->devfreq;
    314	struct device *dev = df->dev.parent;
    315	unsigned long freq;
    316	int i;
    317
    318	dfc->freq_table = kcalloc(num_opps, sizeof(*dfc->freq_table),
    319			     GFP_KERNEL);
    320	if (!dfc->freq_table)
    321		return -ENOMEM;
    322
    323	for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
    324		struct dev_pm_opp *opp;
    325
    326		opp = dev_pm_opp_find_freq_floor(dev, &freq);
    327		if (IS_ERR(opp)) {
    328			kfree(dfc->freq_table);
    329			return PTR_ERR(opp);
    330		}
    331
    332		dev_pm_opp_put(opp);
    333		dfc->freq_table[i] = freq;
    334	}
    335
    336	return 0;
    337}
    338
    339/**
    340 * of_devfreq_cooling_register_power() - Register devfreq cooling device,
    341 *                                      with OF and power information.
    342 * @np:	Pointer to OF device_node.
    343 * @df:	Pointer to devfreq device.
    344 * @dfc_power:	Pointer to devfreq_cooling_power.
    345 *
    346 * Register a devfreq cooling device.  The available OPPs must be
    347 * registered on the device.
    348 *
    349 * If @dfc_power is provided, the cooling device is registered with the
    350 * power extensions.  For the power extensions to work correctly,
    351 * devfreq should use the simple_ondemand governor, other governors
    352 * are not currently supported.
    353 */
    354struct thermal_cooling_device *
    355of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
    356				  struct devfreq_cooling_power *dfc_power)
    357{
    358	struct thermal_cooling_device *cdev;
    359	struct device *dev = df->dev.parent;
    360	struct devfreq_cooling_device *dfc;
    361	struct em_perf_domain *em;
    362	struct thermal_cooling_device_ops *ops;
    363	char *name;
    364	int err, num_opps;
    365
    366	ops = kmemdup(&devfreq_cooling_ops, sizeof(*ops), GFP_KERNEL);
    367	if (!ops)
    368		return ERR_PTR(-ENOMEM);
    369
    370	dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
    371	if (!dfc) {
    372		err = -ENOMEM;
    373		goto free_ops;
    374	}
    375
    376	dfc->devfreq = df;
    377
    378	em = em_pd_get(dev);
    379	if (em && !em_is_artificial(em)) {
    380		dfc->em_pd = em;
    381		ops->get_requested_power =
    382			devfreq_cooling_get_requested_power;
    383		ops->state2power = devfreq_cooling_state2power;
    384		ops->power2state = devfreq_cooling_power2state;
    385
    386		dfc->power_ops = dfc_power;
    387
    388		num_opps = em_pd_nr_perf_states(dfc->em_pd);
    389	} else {
    390		/* Backward compatibility for drivers which do not use IPA */
    391		dev_dbg(dev, "missing proper EM for cooling device\n");
    392
    393		num_opps = dev_pm_opp_get_opp_count(dev);
    394
    395		err = devfreq_cooling_gen_tables(dfc, num_opps);
    396		if (err)
    397			goto free_dfc;
    398	}
    399
    400	if (num_opps <= 0) {
    401		err = -EINVAL;
    402		goto free_dfc;
    403	}
    404
    405	/* max_state is an index, not a counter */
    406	dfc->max_state = num_opps - 1;
    407
    408	err = dev_pm_qos_add_request(dev, &dfc->req_max_freq,
    409				     DEV_PM_QOS_MAX_FREQUENCY,
    410				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
    411	if (err < 0)
    412		goto free_table;
    413
    414	err = -ENOMEM;
    415	name = kasprintf(GFP_KERNEL, "devfreq-%s", dev_name(dev));
    416	if (!name)
    417		goto remove_qos_req;
    418
    419	cdev = thermal_of_cooling_device_register(np, name, dfc, ops);
    420	kfree(name);
    421
    422	if (IS_ERR(cdev)) {
    423		err = PTR_ERR(cdev);
    424		dev_err(dev,
    425			"Failed to register devfreq cooling device (%d)\n",
    426			err);
    427		goto remove_qos_req;
    428	}
    429
    430	dfc->cdev = cdev;
    431
    432	return cdev;
    433
    434remove_qos_req:
    435	dev_pm_qos_remove_request(&dfc->req_max_freq);
    436free_table:
    437	kfree(dfc->freq_table);
    438free_dfc:
    439	kfree(dfc);
    440free_ops:
    441	kfree(ops);
    442
    443	return ERR_PTR(err);
    444}
    445EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power);
    446
    447/**
    448 * of_devfreq_cooling_register() - Register devfreq cooling device,
    449 *                                with OF information.
    450 * @np: Pointer to OF device_node.
    451 * @df: Pointer to devfreq device.
    452 */
    453struct thermal_cooling_device *
    454of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
    455{
    456	return of_devfreq_cooling_register_power(np, df, NULL);
    457}
    458EXPORT_SYMBOL_GPL(of_devfreq_cooling_register);
    459
    460/**
    461 * devfreq_cooling_register() - Register devfreq cooling device.
    462 * @df: Pointer to devfreq device.
    463 */
    464struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
    465{
    466	return of_devfreq_cooling_register(NULL, df);
    467}
    468EXPORT_SYMBOL_GPL(devfreq_cooling_register);
    469
    470/**
    471 * devfreq_cooling_em_register() - Register devfreq cooling device with
    472 *		power information and automatically register Energy Model (EM)
    473 * @df:		Pointer to devfreq device.
    474 * @dfc_power:	Pointer to devfreq_cooling_power.
    475 *
    476 * Register a devfreq cooling device and automatically register EM. The
    477 * available OPPs must be registered for the device.
    478 *
    479 * If @dfc_power is provided, the cooling device is registered with the
    480 * power extensions. It is using the simple Energy Model which requires
    481 * "dynamic-power-coefficient" a devicetree property. To not break drivers
    482 * which miss that DT property, the function won't bail out when the EM
    483 * registration failed. The cooling device will be registered if everything
    484 * else is OK.
    485 */
    486struct thermal_cooling_device *
    487devfreq_cooling_em_register(struct devfreq *df,
    488			    struct devfreq_cooling_power *dfc_power)
    489{
    490	struct thermal_cooling_device *cdev;
    491	struct device *dev;
    492	int ret;
    493
    494	if (IS_ERR_OR_NULL(df))
    495		return ERR_PTR(-EINVAL);
    496
    497	dev = df->dev.parent;
    498
    499	ret = dev_pm_opp_of_register_em(dev, NULL);
    500	if (ret)
    501		dev_dbg(dev, "Unable to register EM for devfreq cooling device (%d)\n",
    502			ret);
    503
    504	cdev = of_devfreq_cooling_register_power(dev->of_node, df, dfc_power);
    505
    506	if (IS_ERR_OR_NULL(cdev))
    507		em_dev_unregister_perf_domain(dev);
    508
    509	return cdev;
    510}
    511EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
    512
    513/**
    514 * devfreq_cooling_unregister() - Unregister devfreq cooling device.
    515 * @cdev: Pointer to devfreq cooling device to unregister.
    516 *
    517 * Unregisters devfreq cooling device and related Energy Model if it was
    518 * present.
    519 */
    520void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
    521{
    522	struct devfreq_cooling_device *dfc;
    523	const struct thermal_cooling_device_ops *ops;
    524	struct device *dev;
    525
    526	if (IS_ERR_OR_NULL(cdev))
    527		return;
    528
    529	ops = cdev->ops;
    530	dfc = cdev->devdata;
    531	dev = dfc->devfreq->dev.parent;
    532
    533	thermal_cooling_device_unregister(dfc->cdev);
    534	dev_pm_qos_remove_request(&dfc->req_max_freq);
    535
    536	em_dev_unregister_perf_domain(dev);
    537
    538	kfree(dfc->freq_table);
    539	kfree(dfc);
    540	kfree(ops);
    541}
    542EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);