gk20a.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
gk20a.c (6225B)
      1/*
      2 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     20 * DEALINGS IN THE SOFTWARE.
     21 */
     22#define gk20a_pmu(p) container_of((p), struct gk20a_pmu, base)
     23#include "priv.h"
     24
     25#include <subdev/clk.h>
     26#include <subdev/timer.h>
     27#include <subdev/volt.h>
     28
     29#define BUSY_SLOT	0
     30#define CLK_SLOT	7
     31
     32struct gk20a_pmu_dvfs_data {
     33	int p_load_target;
     34	int p_load_max;
     35	int p_smooth;
     36	unsigned int avg_load;
     37};
     38
     39struct gk20a_pmu {
     40	struct nvkm_pmu base;
     41	struct nvkm_alarm alarm;
     42	struct gk20a_pmu_dvfs_data *data;
     43};
     44
     45struct gk20a_pmu_dvfs_dev_status {
     46	u32 total;
     47	u32 busy;
     48};
     49
     50static int
     51gk20a_pmu_dvfs_target(struct gk20a_pmu *pmu, int *state)
     52{
     53	struct nvkm_clk *clk = pmu->base.subdev.device->clk;
     54
     55	return nvkm_clk_astate(clk, *state, 0, false);
     56}
     57
     58static void
     59gk20a_pmu_dvfs_get_cur_state(struct gk20a_pmu *pmu, int *state)
     60{
     61	struct nvkm_clk *clk = pmu->base.subdev.device->clk;
     62
     63	*state = clk->pstate;
     64}
     65
     66static int
     67gk20a_pmu_dvfs_get_target_state(struct gk20a_pmu *pmu,
     68				int *state, int load)
     69{
     70	struct gk20a_pmu_dvfs_data *data = pmu->data;
     71	struct nvkm_clk *clk = pmu->base.subdev.device->clk;
     72	int cur_level, level;
     73
     74	/* For GK20A, the performance level is directly mapped to pstate */
     75	level = cur_level = clk->pstate;
     76
     77	if (load > data->p_load_max) {
     78		level = min(clk->state_nr - 1, level + (clk->state_nr / 3));
     79	} else {
     80		level += ((load - data->p_load_target) * 10 /
     81				data->p_load_target) / 2;
     82		level = max(0, level);
     83		level = min(clk->state_nr - 1, level);
     84	}
     85
     86	nvkm_trace(&pmu->base.subdev, "cur level = %d, new level = %d\n",
     87		   cur_level, level);
     88
     89	*state = level;
     90
     91	return (level != cur_level);
     92}
     93
     94static void
     95gk20a_pmu_dvfs_get_dev_status(struct gk20a_pmu *pmu,
     96			      struct gk20a_pmu_dvfs_dev_status *status)
     97{
     98	struct nvkm_falcon *falcon = &pmu->base.falcon;
     99
    100	status->busy = nvkm_falcon_rd32(falcon, 0x508 + (BUSY_SLOT * 0x10));
    101	status->total= nvkm_falcon_rd32(falcon, 0x508 + (CLK_SLOT * 0x10));
    102}
    103
    104static void
    105gk20a_pmu_dvfs_reset_dev_status(struct gk20a_pmu *pmu)
    106{
    107	struct nvkm_falcon *falcon = &pmu->base.falcon;
    108
    109	nvkm_falcon_wr32(falcon, 0x508 + (BUSY_SLOT * 0x10), 0x80000000);
    110	nvkm_falcon_wr32(falcon, 0x508 + (CLK_SLOT * 0x10), 0x80000000);
    111}
    112
    113static void
    114gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
    115{
    116	struct gk20a_pmu *pmu =
    117		container_of(alarm, struct gk20a_pmu, alarm);
    118	struct gk20a_pmu_dvfs_data *data = pmu->data;
    119	struct gk20a_pmu_dvfs_dev_status status;
    120	struct nvkm_subdev *subdev = &pmu->base.subdev;
    121	struct nvkm_device *device = subdev->device;
    122	struct nvkm_clk *clk = device->clk;
    123	struct nvkm_timer *tmr = device->timer;
    124	struct nvkm_volt *volt = device->volt;
    125	u32 utilization = 0;
    126	int state;
    127
    128	/*
    129	 * The PMU is initialized before CLK and VOLT, so we have to make sure the
    130	 * CLK and VOLT are ready here.
    131	 */
    132	if (!clk || !volt)
    133		goto resched;
    134
    135	gk20a_pmu_dvfs_get_dev_status(pmu, &status);
    136
    137	if (status.total)
    138		utilization = div_u64((u64)status.busy * 100, status.total);
    139
    140	data->avg_load = (data->p_smooth * data->avg_load) + utilization;
    141	data->avg_load /= data->p_smooth + 1;
    142	nvkm_trace(subdev, "utilization = %d %%, avg_load = %d %%\n",
    143		   utilization, data->avg_load);
    144
    145	gk20a_pmu_dvfs_get_cur_state(pmu, &state);
    146
    147	if (gk20a_pmu_dvfs_get_target_state(pmu, &state, data->avg_load)) {
    148		nvkm_trace(subdev, "set new state to %d\n", state);
    149		gk20a_pmu_dvfs_target(pmu, &state);
    150	}
    151
    152resched:
    153	gk20a_pmu_dvfs_reset_dev_status(pmu);
    154	nvkm_timer_alarm(tmr, 100000000, alarm);
    155}
    156
    157static void
    158gk20a_pmu_fini(struct nvkm_pmu *pmu)
    159{
    160	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
    161	nvkm_timer_alarm(pmu->subdev.device->timer, 0, &gpmu->alarm);
    162
    163	nvkm_falcon_put(&pmu->falcon, &pmu->subdev);
    164}
    165
    166static int
    167gk20a_pmu_init(struct nvkm_pmu *pmu)
    168{
    169	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
    170	struct nvkm_subdev *subdev = &pmu->subdev;
    171	struct nvkm_device *device = pmu->subdev.device;
    172	struct nvkm_falcon *falcon = &pmu->falcon;
    173	int ret;
    174
    175	ret = nvkm_falcon_get(falcon, subdev);
    176	if (ret) {
    177		nvkm_error(subdev, "cannot acquire %s falcon!\n", falcon->name);
    178		return ret;
    179	}
    180
    181	/* init pwr perf counter */
    182	nvkm_falcon_wr32(falcon, 0x504 + (BUSY_SLOT * 0x10), 0x00200001);
    183	nvkm_falcon_wr32(falcon, 0x50c + (BUSY_SLOT * 0x10), 0x00000002);
    184	nvkm_falcon_wr32(falcon, 0x50c + (CLK_SLOT * 0x10), 0x00000003);
    185
    186	nvkm_timer_alarm(device->timer, 2000000000, &gpmu->alarm);
    187	return 0;
    188}
    189
    190static struct gk20a_pmu_dvfs_data
    191gk20a_dvfs_data= {
    192	.p_load_target = 70,
    193	.p_load_max = 90,
    194	.p_smooth = 1,
    195};
    196
    197static const struct nvkm_pmu_func
    198gk20a_pmu = {
    199	.flcn = &gt215_pmu_flcn,
    200	.enabled = gf100_pmu_enabled,
    201	.init = gk20a_pmu_init,
    202	.fini = gk20a_pmu_fini,
    203	.reset = gf100_pmu_reset,
    204};
    205
    206static const struct nvkm_pmu_fwif
    207gk20a_pmu_fwif[] = {
    208	{ -1, gf100_pmu_nofw, &gk20a_pmu },
    209	{}
    210};
    211
    212int
    213gk20a_pmu_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
    214	      struct nvkm_pmu **ppmu)
    215{
    216	struct gk20a_pmu *pmu;
    217	int ret;
    218
    219	if (!(pmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
    220		return -ENOMEM;
    221	*ppmu = &pmu->base;
    222
    223	ret = nvkm_pmu_ctor(gk20a_pmu_fwif, device, type, inst, &pmu->base);
    224	if (ret)
    225		return ret;
    226
    227	pmu->data = &gk20a_dvfs_data;
    228	nvkm_alarm_init(&pmu->alarm, gk20a_pmu_dvfs_work);
    229	return 0;
    230}