cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel_rps.c (62711B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2019 Intel Corporation
      4 */
      5
      6#include <linux/string_helpers.h>
      7
      8#include <drm/i915_drm.h>
      9
     10#include "i915_drv.h"
     11#include "i915_irq.h"
     12#include "intel_breadcrumbs.h"
     13#include "intel_gt.h"
     14#include "intel_gt_clock_utils.h"
     15#include "intel_gt_irq.h"
     16#include "intel_gt_pm_irq.h"
     17#include "intel_gt_regs.h"
     18#include "intel_mchbar_regs.h"
     19#include "intel_pcode.h"
     20#include "intel_rps.h"
     21#include "vlv_sideband.h"
     22#include "../../../platform/x86/intel_ips.h"
     23
     24#define BUSY_MAX_EI	20u /* ms */
     25
     26/*
     27 * Lock protecting IPS related data structures
     28 */
     29static DEFINE_SPINLOCK(mchdev_lock);
     30
     31static struct intel_gt *rps_to_gt(struct intel_rps *rps)
     32{
     33	return container_of(rps, struct intel_gt, rps);
     34}
     35
     36static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
     37{
     38	return rps_to_gt(rps)->i915;
     39}
     40
     41static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
     42{
     43	return rps_to_gt(rps)->uncore;
     44}
     45
     46static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)
     47{
     48	struct intel_gt *gt = rps_to_gt(rps);
     49
     50	return &gt->uc.guc.slpc;
     51}
     52
     53static bool rps_uses_slpc(struct intel_rps *rps)
     54{
     55	struct intel_gt *gt = rps_to_gt(rps);
     56
     57	return intel_uc_uses_guc_slpc(&gt->uc);
     58}
     59
     60static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
     61{
     62	return mask & ~rps->pm_intrmsk_mbz;
     63}
     64
     65static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
     66{
     67	intel_uncore_write_fw(uncore, reg, val);
     68}
     69
     70static void rps_timer(struct timer_list *t)
     71{
     72	struct intel_rps *rps = from_timer(rps, t, timer);
     73	struct intel_engine_cs *engine;
     74	ktime_t dt, last, timestamp;
     75	enum intel_engine_id id;
     76	s64 max_busy[3] = {};
     77
     78	timestamp = 0;
     79	for_each_engine(engine, rps_to_gt(rps), id) {
     80		s64 busy;
     81		int i;
     82
     83		dt = intel_engine_get_busy_time(engine, &timestamp);
     84		last = engine->stats.rps;
     85		engine->stats.rps = dt;
     86
     87		busy = ktime_to_ns(ktime_sub(dt, last));
     88		for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
     89			if (busy > max_busy[i])
     90				swap(busy, max_busy[i]);
     91		}
     92	}
     93	last = rps->pm_timestamp;
     94	rps->pm_timestamp = timestamp;
     95
     96	if (intel_rps_is_active(rps)) {
     97		s64 busy;
     98		int i;
     99
    100		dt = ktime_sub(timestamp, last);
    101
    102		/*
    103		 * Our goal is to evaluate each engine independently, so we run
    104		 * at the lowest clocks required to sustain the heaviest
    105		 * workload. However, a task may be split into sequential
    106		 * dependent operations across a set of engines, such that
    107		 * the independent contributions do not account for high load,
    108		 * but overall the task is GPU bound. For example, consider
    109		 * video decode on vcs followed by colour post-processing
    110		 * on vecs, followed by general post-processing on rcs.
    111		 * Since multi-engines being active does imply a single
    112		 * continuous workload across all engines, we hedge our
    113		 * bets by only contributing a factor of the distributed
    114		 * load into our busyness calculation.
    115		 */
    116		busy = max_busy[0];
    117		for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
    118			if (!max_busy[i])
    119				break;
    120
    121			busy += div_u64(max_busy[i], 1 << i);
    122		}
    123		GT_TRACE(rps_to_gt(rps),
    124			 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
    125			 busy, (int)div64_u64(100 * busy, dt),
    126			 max_busy[0], max_busy[1], max_busy[2],
    127			 rps->pm_interval);
    128
    129		if (100 * busy > rps->power.up_threshold * dt &&
    130		    rps->cur_freq < rps->max_freq_softlimit) {
    131			rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
    132			rps->pm_interval = 1;
    133			schedule_work(&rps->work);
    134		} else if (100 * busy < rps->power.down_threshold * dt &&
    135			   rps->cur_freq > rps->min_freq_softlimit) {
    136			rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
    137			rps->pm_interval = 1;
    138			schedule_work(&rps->work);
    139		} else {
    140			rps->last_adj = 0;
    141		}
    142
    143		mod_timer(&rps->timer,
    144			  jiffies + msecs_to_jiffies(rps->pm_interval));
    145		rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
    146	}
    147}
    148
    149static void rps_start_timer(struct intel_rps *rps)
    150{
    151	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
    152	rps->pm_interval = 1;
    153	mod_timer(&rps->timer, jiffies + 1);
    154}
    155
    156static void rps_stop_timer(struct intel_rps *rps)
    157{
    158	del_timer_sync(&rps->timer);
    159	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
    160	cancel_work_sync(&rps->work);
    161}
    162
    163static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
    164{
    165	u32 mask = 0;
    166
    167	/* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
    168	if (val > rps->min_freq_softlimit)
    169		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
    170			 GEN6_PM_RP_DOWN_THRESHOLD |
    171			 GEN6_PM_RP_DOWN_TIMEOUT);
    172
    173	if (val < rps->max_freq_softlimit)
    174		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
    175
    176	mask &= rps->pm_events;
    177
    178	return rps_pm_sanitize_mask(rps, ~mask);
    179}
    180
    181static void rps_reset_ei(struct intel_rps *rps)
    182{
    183	memset(&rps->ei, 0, sizeof(rps->ei));
    184}
    185
    186static void rps_enable_interrupts(struct intel_rps *rps)
    187{
    188	struct intel_gt *gt = rps_to_gt(rps);
    189
    190	GEM_BUG_ON(rps_uses_slpc(rps));
    191
    192	GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
    193		 rps->pm_events, rps_pm_mask(rps, rps->last_freq));
    194
    195	rps_reset_ei(rps);
    196
    197	spin_lock_irq(&gt->irq_lock);
    198	gen6_gt_pm_enable_irq(gt, rps->pm_events);
    199	spin_unlock_irq(&gt->irq_lock);
    200
    201	intel_uncore_write(gt->uncore,
    202			   GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq));
    203}
    204
    205static void gen6_rps_reset_interrupts(struct intel_rps *rps)
    206{
    207	gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
    208}
    209
    210static void gen11_rps_reset_interrupts(struct intel_rps *rps)
    211{
    212	while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
    213		;
    214}
    215
    216static void rps_reset_interrupts(struct intel_rps *rps)
    217{
    218	struct intel_gt *gt = rps_to_gt(rps);
    219
    220	spin_lock_irq(&gt->irq_lock);
    221	if (GRAPHICS_VER(gt->i915) >= 11)
    222		gen11_rps_reset_interrupts(rps);
    223	else
    224		gen6_rps_reset_interrupts(rps);
    225
    226	rps->pm_iir = 0;
    227	spin_unlock_irq(&gt->irq_lock);
    228}
    229
    230static void rps_disable_interrupts(struct intel_rps *rps)
    231{
    232	struct intel_gt *gt = rps_to_gt(rps);
    233
    234	intel_uncore_write(gt->uncore,
    235			   GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
    236
    237	spin_lock_irq(&gt->irq_lock);
    238	gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
    239	spin_unlock_irq(&gt->irq_lock);
    240
    241	intel_synchronize_irq(gt->i915);
    242
    243	/*
    244	 * Now that we will not be generating any more work, flush any
    245	 * outstanding tasks. As we are called on the RPS idle path,
    246	 * we will reset the GPU to minimum frequencies, so the current
    247	 * state of the worker can be discarded.
    248	 */
    249	cancel_work_sync(&rps->work);
    250
    251	rps_reset_interrupts(rps);
    252	GT_TRACE(gt, "interrupts:off\n");
    253}
    254
    255static const struct cparams {
    256	u16 i;
    257	u16 t;
    258	u16 m;
    259	u16 c;
    260} cparams[] = {
    261	{ 1, 1333, 301, 28664 },
    262	{ 1, 1066, 294, 24460 },
    263	{ 1, 800, 294, 25192 },
    264	{ 0, 1333, 276, 27605 },
    265	{ 0, 1066, 276, 27605 },
    266	{ 0, 800, 231, 23784 },
    267};
    268
    269static void gen5_rps_init(struct intel_rps *rps)
    270{
    271	struct drm_i915_private *i915 = rps_to_i915(rps);
    272	struct intel_uncore *uncore = rps_to_uncore(rps);
    273	u8 fmax, fmin, fstart;
    274	u32 rgvmodectl;
    275	int c_m, i;
    276
    277	if (i915->fsb_freq <= 3200)
    278		c_m = 0;
    279	else if (i915->fsb_freq <= 4800)
    280		c_m = 1;
    281	else
    282		c_m = 2;
    283
    284	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
    285		if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
    286			rps->ips.m = cparams[i].m;
    287			rps->ips.c = cparams[i].c;
    288			break;
    289		}
    290	}
    291
    292	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
    293
    294	/* Set up min, max, and cur for interrupt handling */
    295	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
    296	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
    297	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
    298		MEMMODE_FSTART_SHIFT;
    299	drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
    300		fmax, fmin, fstart);
    301
    302	rps->min_freq = fmax;
    303	rps->efficient_freq = fstart;
    304	rps->max_freq = fmin;
    305}
    306
    307static unsigned long
    308__ips_chipset_val(struct intel_ips *ips)
    309{
    310	struct intel_uncore *uncore =
    311		rps_to_uncore(container_of(ips, struct intel_rps, ips));
    312	unsigned long now = jiffies_to_msecs(jiffies), dt;
    313	unsigned long result;
    314	u64 total, delta;
    315
    316	lockdep_assert_held(&mchdev_lock);
    317
    318	/*
    319	 * Prevent division-by-zero if we are asking too fast.
    320	 * Also, we don't get interesting results if we are polling
    321	 * faster than once in 10ms, so just return the saved value
    322	 * in such cases.
    323	 */
    324	dt = now - ips->last_time1;
    325	if (dt <= 10)
    326		return ips->chipset_power;
    327
    328	/* FIXME: handle per-counter overflow */
    329	total = intel_uncore_read(uncore, DMIEC);
    330	total += intel_uncore_read(uncore, DDREC);
    331	total += intel_uncore_read(uncore, CSIEC);
    332
    333	delta = total - ips->last_count1;
    334
    335	result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
    336
    337	ips->last_count1 = total;
    338	ips->last_time1 = now;
    339
    340	ips->chipset_power = result;
    341
    342	return result;
    343}
    344
    345static unsigned long ips_mch_val(struct intel_uncore *uncore)
    346{
    347	unsigned int m, x, b;
    348	u32 tsfs;
    349
    350	tsfs = intel_uncore_read(uncore, TSFS);
    351	x = intel_uncore_read8(uncore, TR1);
    352
    353	b = tsfs & TSFS_INTR_MASK;
    354	m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
    355
    356	return m * x / 127 - b;
    357}
    358
    359static int _pxvid_to_vd(u8 pxvid)
    360{
    361	if (pxvid == 0)
    362		return 0;
    363
    364	if (pxvid >= 8 && pxvid < 31)
    365		pxvid = 31;
    366
    367	return (pxvid + 2) * 125;
    368}
    369
    370static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
    371{
    372	const int vd = _pxvid_to_vd(pxvid);
    373
    374	if (INTEL_INFO(i915)->is_mobile)
    375		return max(vd - 1125, 0);
    376
    377	return vd;
    378}
    379
    380static void __gen5_ips_update(struct intel_ips *ips)
    381{
    382	struct intel_uncore *uncore =
    383		rps_to_uncore(container_of(ips, struct intel_rps, ips));
    384	u64 now, delta, dt;
    385	u32 count;
    386
    387	lockdep_assert_held(&mchdev_lock);
    388
    389	now = ktime_get_raw_ns();
    390	dt = now - ips->last_time2;
    391	do_div(dt, NSEC_PER_MSEC);
    392
    393	/* Don't divide by 0 */
    394	if (dt <= 10)
    395		return;
    396
    397	count = intel_uncore_read(uncore, GFXEC);
    398	delta = count - ips->last_count2;
    399
    400	ips->last_count2 = count;
    401	ips->last_time2 = now;
    402
    403	/* More magic constants... */
    404	ips->gfx_power = div_u64(delta * 1181, dt * 10);
    405}
    406
    407static void gen5_rps_update(struct intel_rps *rps)
    408{
    409	spin_lock_irq(&mchdev_lock);
    410	__gen5_ips_update(&rps->ips);
    411	spin_unlock_irq(&mchdev_lock);
    412}
    413
    414static unsigned int gen5_invert_freq(struct intel_rps *rps,
    415				     unsigned int val)
    416{
    417	/* Invert the frequency bin into an ips delay */
    418	val = rps->max_freq - val;
    419	val = rps->min_freq + val;
    420
    421	return val;
    422}
    423
    424static int __gen5_rps_set(struct intel_rps *rps, u8 val)
    425{
    426	struct intel_uncore *uncore = rps_to_uncore(rps);
    427	u16 rgvswctl;
    428
    429	lockdep_assert_held(&mchdev_lock);
    430
    431	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
    432	if (rgvswctl & MEMCTL_CMD_STS) {
    433		DRM_DEBUG("gpu busy, RCS change rejected\n");
    434		return -EBUSY; /* still busy with another command */
    435	}
    436
    437	/* Invert the frequency bin into an ips delay */
    438	val = gen5_invert_freq(rps, val);
    439
    440	rgvswctl =
    441		(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
    442		(val << MEMCTL_FREQ_SHIFT) |
    443		MEMCTL_SFCAVM;
    444	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
    445	intel_uncore_posting_read16(uncore, MEMSWCTL);
    446
    447	rgvswctl |= MEMCTL_CMD_STS;
    448	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
    449
    450	return 0;
    451}
    452
    453static int gen5_rps_set(struct intel_rps *rps, u8 val)
    454{
    455	int err;
    456
    457	spin_lock_irq(&mchdev_lock);
    458	err = __gen5_rps_set(rps, val);
    459	spin_unlock_irq(&mchdev_lock);
    460
    461	return err;
    462}
    463
    464static unsigned long intel_pxfreq(u32 vidfreq)
    465{
    466	int div = (vidfreq & 0x3f0000) >> 16;
    467	int post = (vidfreq & 0x3000) >> 12;
    468	int pre = (vidfreq & 0x7);
    469
    470	if (!pre)
    471		return 0;
    472
    473	return div * 133333 / (pre << post);
    474}
    475
    476static unsigned int init_emon(struct intel_uncore *uncore)
    477{
    478	u8 pxw[16];
    479	int i;
    480
    481	/* Disable to program */
    482	intel_uncore_write(uncore, ECR, 0);
    483	intel_uncore_posting_read(uncore, ECR);
    484
    485	/* Program energy weights for various events */
    486	intel_uncore_write(uncore, SDEW, 0x15040d00);
    487	intel_uncore_write(uncore, CSIEW0, 0x007f0000);
    488	intel_uncore_write(uncore, CSIEW1, 0x1e220004);
    489	intel_uncore_write(uncore, CSIEW2, 0x04000004);
    490
    491	for (i = 0; i < 5; i++)
    492		intel_uncore_write(uncore, PEW(i), 0);
    493	for (i = 0; i < 3; i++)
    494		intel_uncore_write(uncore, DEW(i), 0);
    495
    496	/* Program P-state weights to account for frequency power adjustment */
    497	for (i = 0; i < 16; i++) {
    498		u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
    499		unsigned int freq = intel_pxfreq(pxvidfreq);
    500		unsigned int vid =
    501			(pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
    502		unsigned int val;
    503
    504		val = vid * vid * freq / 1000 * 255;
    505		val /= 127 * 127 * 900;
    506
    507		pxw[i] = val;
    508	}
    509	/* Render standby states get 0 weight */
    510	pxw[14] = 0;
    511	pxw[15] = 0;
    512
    513	for (i = 0; i < 4; i++) {
    514		intel_uncore_write(uncore, PXW(i),
    515				   pxw[i * 4 + 0] << 24 |
    516				   pxw[i * 4 + 1] << 16 |
    517				   pxw[i * 4 + 2] <<  8 |
    518				   pxw[i * 4 + 3] <<  0);
    519	}
    520
    521	/* Adjust magic regs to magic values (more experimental results) */
    522	intel_uncore_write(uncore, OGW0, 0);
    523	intel_uncore_write(uncore, OGW1, 0);
    524	intel_uncore_write(uncore, EG0, 0x00007f00);
    525	intel_uncore_write(uncore, EG1, 0x0000000e);
    526	intel_uncore_write(uncore, EG2, 0x000e0000);
    527	intel_uncore_write(uncore, EG3, 0x68000300);
    528	intel_uncore_write(uncore, EG4, 0x42000000);
    529	intel_uncore_write(uncore, EG5, 0x00140031);
    530	intel_uncore_write(uncore, EG6, 0);
    531	intel_uncore_write(uncore, EG7, 0);
    532
    533	for (i = 0; i < 8; i++)
    534		intel_uncore_write(uncore, PXWL(i), 0);
    535
    536	/* Enable PMON + select events */
    537	intel_uncore_write(uncore, ECR, 0x80000019);
    538
    539	return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
    540}
    541
    542static bool gen5_rps_enable(struct intel_rps *rps)
    543{
    544	struct drm_i915_private *i915 = rps_to_i915(rps);
    545	struct intel_uncore *uncore = rps_to_uncore(rps);
    546	u8 fstart, vstart;
    547	u32 rgvmodectl;
    548
    549	spin_lock_irq(&mchdev_lock);
    550
    551	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
    552
    553	/* Enable temp reporting */
    554	intel_uncore_write16(uncore, PMMISC,
    555			     intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
    556	intel_uncore_write16(uncore, TSC1,
    557			     intel_uncore_read16(uncore, TSC1) | TSE);
    558
    559	/* 100ms RC evaluation intervals */
    560	intel_uncore_write(uncore, RCUPEI, 100000);
    561	intel_uncore_write(uncore, RCDNEI, 100000);
    562
    563	/* Set max/min thresholds to 90ms and 80ms respectively */
    564	intel_uncore_write(uncore, RCBMAXAVG, 90000);
    565	intel_uncore_write(uncore, RCBMINAVG, 80000);
    566
    567	intel_uncore_write(uncore, MEMIHYST, 1);
    568
    569	/* Set up min, max, and cur for interrupt handling */
    570	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
    571		MEMMODE_FSTART_SHIFT;
    572
    573	vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
    574		  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
    575
    576	intel_uncore_write(uncore,
    577			   MEMINTREN,
    578			   MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
    579
    580	intel_uncore_write(uncore, VIDSTART, vstart);
    581	intel_uncore_posting_read(uncore, VIDSTART);
    582
    583	rgvmodectl |= MEMMODE_SWMODE_EN;
    584	intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
    585
    586	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
    587			     MEMCTL_CMD_STS) == 0, 10))
    588		drm_err(&uncore->i915->drm,
    589			"stuck trying to change perf mode\n");
    590	mdelay(1);
    591
    592	__gen5_rps_set(rps, rps->cur_freq);
    593
    594	rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
    595	rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
    596	rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
    597	rps->ips.last_time1 = jiffies_to_msecs(jiffies);
    598
    599	rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
    600	rps->ips.last_time2 = ktime_get_raw_ns();
    601
    602	spin_lock(&i915->irq_lock);
    603	ilk_enable_display_irq(i915, DE_PCU_EVENT);
    604	spin_unlock(&i915->irq_lock);
    605
    606	spin_unlock_irq(&mchdev_lock);
    607
    608	rps->ips.corr = init_emon(uncore);
    609
    610	return true;
    611}
    612
    613static void gen5_rps_disable(struct intel_rps *rps)
    614{
    615	struct drm_i915_private *i915 = rps_to_i915(rps);
    616	struct intel_uncore *uncore = rps_to_uncore(rps);
    617	u16 rgvswctl;
    618
    619	spin_lock_irq(&mchdev_lock);
    620
    621	spin_lock(&i915->irq_lock);
    622	ilk_disable_display_irq(i915, DE_PCU_EVENT);
    623	spin_unlock(&i915->irq_lock);
    624
    625	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
    626
    627	/* Ack interrupts, disable EFC interrupt */
    628	intel_uncore_write(uncore, MEMINTREN,
    629			   intel_uncore_read(uncore, MEMINTREN) &
    630			   ~MEMINT_EVAL_CHG_EN);
    631	intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
    632
    633	/* Go back to the starting frequency */
    634	__gen5_rps_set(rps, rps->idle_freq);
    635	mdelay(1);
    636	rgvswctl |= MEMCTL_CMD_STS;
    637	intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
    638	mdelay(1);
    639
    640	spin_unlock_irq(&mchdev_lock);
    641}
    642
    643static u32 rps_limits(struct intel_rps *rps, u8 val)
    644{
    645	u32 limits;
    646
    647	/*
    648	 * Only set the down limit when we've reached the lowest level to avoid
    649	 * getting more interrupts, otherwise leave this clear. This prevents a
    650	 * race in the hw when coming out of rc6: There's a tiny window where
    651	 * the hw runs at the minimal clock before selecting the desired
    652	 * frequency, if the down threshold expires in that window we will not
    653	 * receive a down interrupt.
    654	 */
    655	if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
    656		limits = rps->max_freq_softlimit << 23;
    657		if (val <= rps->min_freq_softlimit)
    658			limits |= rps->min_freq_softlimit << 14;
    659	} else {
    660		limits = rps->max_freq_softlimit << 24;
    661		if (val <= rps->min_freq_softlimit)
    662			limits |= rps->min_freq_softlimit << 16;
    663	}
    664
    665	return limits;
    666}
    667
    668static void rps_set_power(struct intel_rps *rps, int new_power)
    669{
    670	struct intel_gt *gt = rps_to_gt(rps);
    671	struct intel_uncore *uncore = gt->uncore;
    672	u32 threshold_up = 0, threshold_down = 0; /* in % */
    673	u32 ei_up = 0, ei_down = 0;
    674
    675	lockdep_assert_held(&rps->power.mutex);
    676
    677	if (new_power == rps->power.mode)
    678		return;
    679
    680	threshold_up = 95;
    681	threshold_down = 85;
    682
    683	/* Note the units here are not exactly 1us, but 1280ns. */
    684	switch (new_power) {
    685	case LOW_POWER:
    686		ei_up = 16000;
    687		ei_down = 32000;
    688		break;
    689
    690	case BETWEEN:
    691		ei_up = 13000;
    692		ei_down = 32000;
    693		break;
    694
    695	case HIGH_POWER:
    696		ei_up = 10000;
    697		ei_down = 32000;
    698		break;
    699	}
    700
    701	/* When byt can survive without system hang with dynamic
    702	 * sw freq adjustments, this restriction can be lifted.
    703	 */
    704	if (IS_VALLEYVIEW(gt->i915))
    705		goto skip_hw_write;
    706
    707	GT_TRACE(gt,
    708		 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
    709		 new_power, threshold_up, ei_up, threshold_down, ei_down);
    710
    711	set(uncore, GEN6_RP_UP_EI,
    712	    intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
    713	set(uncore, GEN6_RP_UP_THRESHOLD,
    714	    intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
    715
    716	set(uncore, GEN6_RP_DOWN_EI,
    717	    intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
    718	set(uncore, GEN6_RP_DOWN_THRESHOLD,
    719	    intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
    720
    721	set(uncore, GEN6_RP_CONTROL,
    722	    (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
    723	    GEN6_RP_MEDIA_HW_NORMAL_MODE |
    724	    GEN6_RP_MEDIA_IS_GFX |
    725	    GEN6_RP_ENABLE |
    726	    GEN6_RP_UP_BUSY_AVG |
    727	    GEN6_RP_DOWN_IDLE_AVG);
    728
    729skip_hw_write:
    730	rps->power.mode = new_power;
    731	rps->power.up_threshold = threshold_up;
    732	rps->power.down_threshold = threshold_down;
    733}
    734
    735static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
    736{
    737	int new_power;
    738
    739	new_power = rps->power.mode;
    740	switch (rps->power.mode) {
    741	case LOW_POWER:
    742		if (val > rps->efficient_freq + 1 &&
    743		    val > rps->cur_freq)
    744			new_power = BETWEEN;
    745		break;
    746
    747	case BETWEEN:
    748		if (val <= rps->efficient_freq &&
    749		    val < rps->cur_freq)
    750			new_power = LOW_POWER;
    751		else if (val >= rps->rp0_freq &&
    752			 val > rps->cur_freq)
    753			new_power = HIGH_POWER;
    754		break;
    755
    756	case HIGH_POWER:
    757		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
    758		    val < rps->cur_freq)
    759			new_power = BETWEEN;
    760		break;
    761	}
    762	/* Max/min bins are special */
    763	if (val <= rps->min_freq_softlimit)
    764		new_power = LOW_POWER;
    765	if (val >= rps->max_freq_softlimit)
    766		new_power = HIGH_POWER;
    767
    768	mutex_lock(&rps->power.mutex);
    769	if (rps->power.interactive)
    770		new_power = HIGH_POWER;
    771	rps_set_power(rps, new_power);
    772	mutex_unlock(&rps->power.mutex);
    773}
    774
    775void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
    776{
    777	GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n",
    778		 str_yes_no(interactive));
    779
    780	mutex_lock(&rps->power.mutex);
    781	if (interactive) {
    782		if (!rps->power.interactive++ && intel_rps_is_active(rps))
    783			rps_set_power(rps, HIGH_POWER);
    784	} else {
    785		GEM_BUG_ON(!rps->power.interactive);
    786		rps->power.interactive--;
    787	}
    788	mutex_unlock(&rps->power.mutex);
    789}
    790
    791static int gen6_rps_set(struct intel_rps *rps, u8 val)
    792{
    793	struct intel_uncore *uncore = rps_to_uncore(rps);
    794	struct drm_i915_private *i915 = rps_to_i915(rps);
    795	u32 swreq;
    796
    797	GEM_BUG_ON(rps_uses_slpc(rps));
    798
    799	if (GRAPHICS_VER(i915) >= 9)
    800		swreq = GEN9_FREQUENCY(val);
    801	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
    802		swreq = HSW_FREQUENCY(val);
    803	else
    804		swreq = (GEN6_FREQUENCY(val) |
    805			 GEN6_OFFSET(0) |
    806			 GEN6_AGGRESSIVE_TURBO);
    807	set(uncore, GEN6_RPNSWREQ, swreq);
    808
    809	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
    810		 val, intel_gpu_freq(rps, val), swreq);
    811
    812	return 0;
    813}
    814
    815static int vlv_rps_set(struct intel_rps *rps, u8 val)
    816{
    817	struct drm_i915_private *i915 = rps_to_i915(rps);
    818	int err;
    819
    820	vlv_punit_get(i915);
    821	err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
    822	vlv_punit_put(i915);
    823
    824	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
    825		 val, intel_gpu_freq(rps, val));
    826
    827	return err;
    828}
    829
    830static int rps_set(struct intel_rps *rps, u8 val, bool update)
    831{
    832	struct drm_i915_private *i915 = rps_to_i915(rps);
    833	int err;
    834
    835	if (val == rps->last_freq)
    836		return 0;
    837
    838	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
    839		err = vlv_rps_set(rps, val);
    840	else if (GRAPHICS_VER(i915) >= 6)
    841		err = gen6_rps_set(rps, val);
    842	else
    843		err = gen5_rps_set(rps, val);
    844	if (err)
    845		return err;
    846
    847	if (update && GRAPHICS_VER(i915) >= 6)
    848		gen6_rps_set_thresholds(rps, val);
    849	rps->last_freq = val;
    850
    851	return 0;
    852}
    853
    854void intel_rps_unpark(struct intel_rps *rps)
    855{
    856	if (!intel_rps_is_enabled(rps))
    857		return;
    858
    859	GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
    860
    861	/*
    862	 * Use the user's desired frequency as a guide, but for better
    863	 * performance, jump directly to RPe as our starting frequency.
    864	 */
    865	mutex_lock(&rps->lock);
    866
    867	intel_rps_set_active(rps);
    868	intel_rps_set(rps,
    869		      clamp(rps->cur_freq,
    870			    rps->min_freq_softlimit,
    871			    rps->max_freq_softlimit));
    872
    873	mutex_unlock(&rps->lock);
    874
    875	rps->pm_iir = 0;
    876	if (intel_rps_has_interrupts(rps))
    877		rps_enable_interrupts(rps);
    878	if (intel_rps_uses_timer(rps))
    879		rps_start_timer(rps);
    880
    881	if (GRAPHICS_VER(rps_to_i915(rps)) == 5)
    882		gen5_rps_update(rps);
    883}
    884
    885void intel_rps_park(struct intel_rps *rps)
    886{
    887	int adj;
    888
    889	if (!intel_rps_is_enabled(rps))
    890		return;
    891
    892	if (!intel_rps_clear_active(rps))
    893		return;
    894
    895	if (intel_rps_uses_timer(rps))
    896		rps_stop_timer(rps);
    897	if (intel_rps_has_interrupts(rps))
    898		rps_disable_interrupts(rps);
    899
    900	if (rps->last_freq <= rps->idle_freq)
    901		return;
    902
    903	/*
    904	 * The punit delays the write of the frequency and voltage until it
    905	 * determines the GPU is awake. During normal usage we don't want to
    906	 * waste power changing the frequency if the GPU is sleeping (rc6).
    907	 * However, the GPU and driver is now idle and we do not want to delay
    908	 * switching to minimum voltage (reducing power whilst idle) as we do
    909	 * not expect to be woken in the near future and so must flush the
    910	 * change by waking the device.
    911	 *
    912	 * We choose to take the media powerwell (either would do to trick the
    913	 * punit into committing the voltage change) as that takes a lot less
    914	 * power than the render powerwell.
    915	 */
    916	intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
    917	rps_set(rps, rps->idle_freq, false);
    918	intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
    919
    920	/*
    921	 * Since we will try and restart from the previously requested
    922	 * frequency on unparking, treat this idle point as a downclock
    923	 * interrupt and reduce the frequency for resume. If we park/unpark
    924	 * more frequently than the rps worker can run, we will not respond
    925	 * to any EI and never see a change in frequency.
    926	 *
    927	 * (Note we accommodate Cherryview's limitation of only using an
    928	 * even bin by applying it to all.)
    929	 */
    930	adj = rps->last_adj;
    931	if (adj < 0)
    932		adj *= 2;
    933	else /* CHV needs even encode values */
    934		adj = -2;
    935	rps->last_adj = adj;
    936	rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
    937	if (rps->cur_freq < rps->efficient_freq) {
    938		rps->cur_freq = rps->efficient_freq;
    939		rps->last_adj = 0;
    940	}
    941
    942	GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
    943}
    944
    945u32 intel_rps_get_boost_frequency(struct intel_rps *rps)
    946{
    947	struct intel_guc_slpc *slpc;
    948
    949	if (rps_uses_slpc(rps)) {
    950		slpc = rps_to_slpc(rps);
    951
    952		return slpc->boost_freq;
    953	} else {
    954		return intel_gpu_freq(rps, rps->boost_freq);
    955	}
    956}
    957
    958static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
    959{
    960	bool boost = false;
    961
    962	/* Validate against (static) hardware limits */
    963	val = intel_freq_opcode(rps, val);
    964	if (val < rps->min_freq || val > rps->max_freq)
    965		return -EINVAL;
    966
    967	mutex_lock(&rps->lock);
    968	if (val != rps->boost_freq) {
    969		rps->boost_freq = val;
    970		boost = atomic_read(&rps->num_waiters);
    971	}
    972	mutex_unlock(&rps->lock);
    973	if (boost)
    974		schedule_work(&rps->work);
    975
    976	return 0;
    977}
    978
    979int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq)
    980{
    981	struct intel_guc_slpc *slpc;
    982
    983	if (rps_uses_slpc(rps)) {
    984		slpc = rps_to_slpc(rps);
    985
    986		return intel_guc_slpc_set_boost_freq(slpc, freq);
    987	} else {
    988		return rps_set_boost_freq(rps, freq);
    989	}
    990}
    991
    992void intel_rps_dec_waiters(struct intel_rps *rps)
    993{
    994	struct intel_guc_slpc *slpc;
    995
    996	if (rps_uses_slpc(rps)) {
    997		slpc = rps_to_slpc(rps);
    998
    999		intel_guc_slpc_dec_waiters(slpc);
   1000	} else {
   1001		atomic_dec(&rps->num_waiters);
   1002	}
   1003}
   1004
   1005void intel_rps_boost(struct i915_request *rq)
   1006{
   1007	struct intel_guc_slpc *slpc;
   1008
   1009	if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
   1010		return;
   1011
   1012	/* Serializes with i915_request_retire() */
   1013	if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
   1014		struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
   1015
   1016		if (rps_uses_slpc(rps)) {
   1017			slpc = rps_to_slpc(rps);
   1018
   1019			/* Return if old value is non zero */
   1020			if (!atomic_fetch_inc(&slpc->num_waiters))
   1021				schedule_work(&slpc->boost_work);
   1022
   1023			return;
   1024		}
   1025
   1026		if (atomic_fetch_inc(&rps->num_waiters))
   1027			return;
   1028
   1029		if (!intel_rps_is_active(rps))
   1030			return;
   1031
   1032		GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
   1033			 rq->fence.context, rq->fence.seqno);
   1034
   1035		if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
   1036			schedule_work(&rps->work);
   1037
   1038		WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
   1039	}
   1040}
   1041
   1042int intel_rps_set(struct intel_rps *rps, u8 val)
   1043{
   1044	int err;
   1045
   1046	lockdep_assert_held(&rps->lock);
   1047	GEM_BUG_ON(val > rps->max_freq);
   1048	GEM_BUG_ON(val < rps->min_freq);
   1049
   1050	if (intel_rps_is_active(rps)) {
   1051		err = rps_set(rps, val, true);
   1052		if (err)
   1053			return err;
   1054
   1055		/*
   1056		 * Make sure we continue to get interrupts
   1057		 * until we hit the minimum or maximum frequencies.
   1058		 */
   1059		if (intel_rps_has_interrupts(rps)) {
   1060			struct intel_uncore *uncore = rps_to_uncore(rps);
   1061
   1062			set(uncore,
   1063			    GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
   1064
   1065			set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
   1066		}
   1067	}
   1068
   1069	rps->cur_freq = val;
   1070	return 0;
   1071}
   1072
   1073static u32 intel_rps_read_state_cap(struct intel_rps *rps)
   1074{
   1075	struct drm_i915_private *i915 = rps_to_i915(rps);
   1076	struct intel_uncore *uncore = rps_to_uncore(rps);
   1077
   1078	if (IS_XEHPSDV(i915))
   1079		return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
   1080	else if (IS_GEN9_LP(i915))
   1081		return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
   1082	else
   1083		return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
   1084}
   1085
   1086/**
   1087 * gen6_rps_get_freq_caps - Get freq caps exposed by HW
   1088 * @rps: the intel_rps structure
   1089 * @caps: returned freq caps
   1090 *
   1091 * Returned "caps" frequencies should be converted to MHz using
   1092 * intel_gpu_freq()
   1093 */
   1094void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
   1095{
   1096	struct drm_i915_private *i915 = rps_to_i915(rps);
   1097	u32 rp_state_cap;
   1098
   1099	rp_state_cap = intel_rps_read_state_cap(rps);
   1100
   1101	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
   1102	if (IS_GEN9_LP(i915)) {
   1103		caps->rp0_freq = (rp_state_cap >> 16) & 0xff;
   1104		caps->rp1_freq = (rp_state_cap >>  8) & 0xff;
   1105		caps->min_freq = (rp_state_cap >>  0) & 0xff;
   1106	} else {
   1107		caps->rp0_freq = (rp_state_cap >>  0) & 0xff;
   1108		caps->rp1_freq = (rp_state_cap >>  8) & 0xff;
   1109		caps->min_freq = (rp_state_cap >> 16) & 0xff;
   1110	}
   1111
   1112	if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
   1113		/*
   1114		 * In this case rp_state_cap register reports frequencies in
   1115		 * units of 50 MHz. Convert these to the actual "hw unit", i.e.
   1116		 * units of 16.67 MHz
   1117		 */
   1118		caps->rp0_freq *= GEN9_FREQ_SCALER;
   1119		caps->rp1_freq *= GEN9_FREQ_SCALER;
   1120		caps->min_freq *= GEN9_FREQ_SCALER;
   1121	}
   1122}
   1123
   1124static void gen6_rps_init(struct intel_rps *rps)
   1125{
   1126	struct drm_i915_private *i915 = rps_to_i915(rps);
   1127	struct intel_rps_freq_caps caps;
   1128
   1129	gen6_rps_get_freq_caps(rps, &caps);
   1130	rps->rp0_freq = caps.rp0_freq;
   1131	rps->rp1_freq = caps.rp1_freq;
   1132	rps->min_freq = caps.min_freq;
   1133
   1134	/* hw_max = RP0 until we check for overclocking */
   1135	rps->max_freq = rps->rp0_freq;
   1136
   1137	rps->efficient_freq = rps->rp1_freq;
   1138	if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
   1139	    IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
   1140		u32 ddcc_status = 0;
   1141		u32 mult = 1;
   1142
   1143		if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11)
   1144			mult = GEN9_FREQ_SCALER;
   1145		if (snb_pcode_read(i915, HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
   1146				   &ddcc_status, NULL) == 0)
   1147			rps->efficient_freq =
   1148				clamp_t(u32,
   1149					((ddcc_status >> 8) & 0xff) * mult,
   1150					rps->min_freq,
   1151					rps->max_freq);
   1152	}
   1153}
   1154
   1155static bool rps_reset(struct intel_rps *rps)
   1156{
   1157	struct drm_i915_private *i915 = rps_to_i915(rps);
   1158
   1159	/* force a reset */
   1160	rps->power.mode = -1;
   1161	rps->last_freq = -1;
   1162
   1163	if (rps_set(rps, rps->min_freq, true)) {
   1164		drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
   1165		return false;
   1166	}
   1167
   1168	rps->cur_freq = rps->min_freq;
   1169	return true;
   1170}
   1171
   1172/* See the Gen9_GT_PM_Programming_Guide doc for the below */
   1173static bool gen9_rps_enable(struct intel_rps *rps)
   1174{
   1175	struct intel_gt *gt = rps_to_gt(rps);
   1176	struct intel_uncore *uncore = gt->uncore;
   1177
   1178	/* Program defaults and thresholds for RPS */
   1179	if (GRAPHICS_VER(gt->i915) == 9)
   1180		intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
   1181				      GEN9_FREQUENCY(rps->rp1_freq));
   1182
   1183	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
   1184
   1185	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
   1186
   1187	return rps_reset(rps);
   1188}
   1189
   1190static bool gen8_rps_enable(struct intel_rps *rps)
   1191{
   1192	struct intel_uncore *uncore = rps_to_uncore(rps);
   1193
   1194	intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
   1195			      HSW_FREQUENCY(rps->rp1_freq));
   1196
   1197	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
   1198
   1199	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
   1200
   1201	return rps_reset(rps);
   1202}
   1203
   1204static bool gen6_rps_enable(struct intel_rps *rps)
   1205{
   1206	struct intel_uncore *uncore = rps_to_uncore(rps);
   1207
   1208	/* Power down if completely idle for over 50ms */
   1209	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
   1210	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
   1211
   1212	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
   1213			  GEN6_PM_RP_DOWN_THRESHOLD |
   1214			  GEN6_PM_RP_DOWN_TIMEOUT);
   1215
   1216	return rps_reset(rps);
   1217}
   1218
   1219static int chv_rps_max_freq(struct intel_rps *rps)
   1220{
   1221	struct drm_i915_private *i915 = rps_to_i915(rps);
   1222	struct intel_gt *gt = rps_to_gt(rps);
   1223	u32 val;
   1224
   1225	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
   1226
   1227	switch (gt->info.sseu.eu_total) {
   1228	case 8:
   1229		/* (2 * 4) config */
   1230		val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
   1231		break;
   1232	case 12:
   1233		/* (2 * 6) config */
   1234		val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
   1235		break;
   1236	case 16:
   1237		/* (2 * 8) config */
   1238	default:
   1239		/* Setting (2 * 8) Min RP0 for any other combination */
   1240		val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
   1241		break;
   1242	}
   1243
   1244	return val & FB_GFX_FREQ_FUSE_MASK;
   1245}
   1246
   1247static int chv_rps_rpe_freq(struct intel_rps *rps)
   1248{
   1249	struct drm_i915_private *i915 = rps_to_i915(rps);
   1250	u32 val;
   1251
   1252	val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
   1253	val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
   1254
   1255	return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
   1256}
   1257
   1258static int chv_rps_guar_freq(struct intel_rps *rps)
   1259{
   1260	struct drm_i915_private *i915 = rps_to_i915(rps);
   1261	u32 val;
   1262
   1263	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
   1264
   1265	return val & FB_GFX_FREQ_FUSE_MASK;
   1266}
   1267
   1268static u32 chv_rps_min_freq(struct intel_rps *rps)
   1269{
   1270	struct drm_i915_private *i915 = rps_to_i915(rps);
   1271	u32 val;
   1272
   1273	val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
   1274	val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
   1275
   1276	return val & FB_GFX_FREQ_FUSE_MASK;
   1277}
   1278
   1279static bool chv_rps_enable(struct intel_rps *rps)
   1280{
   1281	struct intel_uncore *uncore = rps_to_uncore(rps);
   1282	struct drm_i915_private *i915 = rps_to_i915(rps);
   1283	u32 val;
   1284
   1285	/* 1: Program defaults and thresholds for RPS*/
   1286	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
   1287	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
   1288	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
   1289	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
   1290	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
   1291
   1292	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
   1293
   1294	/* 2: Enable RPS */
   1295	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
   1296			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
   1297			      GEN6_RP_MEDIA_IS_GFX |
   1298			      GEN6_RP_ENABLE |
   1299			      GEN6_RP_UP_BUSY_AVG |
   1300			      GEN6_RP_DOWN_IDLE_AVG);
   1301
   1302	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
   1303			  GEN6_PM_RP_DOWN_THRESHOLD |
   1304			  GEN6_PM_RP_DOWN_TIMEOUT);
   1305
   1306	/* Setting Fixed Bias */
   1307	vlv_punit_get(i915);
   1308
   1309	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
   1310	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
   1311
   1312	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1313
   1314	vlv_punit_put(i915);
   1315
   1316	/* RPS code assumes GPLL is used */
   1317	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
   1318		      "GPLL not enabled\n");
   1319
   1320	drm_dbg(&i915->drm, "GPLL enabled? %s\n",
   1321		str_yes_no(val & GPLLENABLE));
   1322	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
   1323
   1324	return rps_reset(rps);
   1325}
   1326
   1327static int vlv_rps_guar_freq(struct intel_rps *rps)
   1328{
   1329	struct drm_i915_private *i915 = rps_to_i915(rps);
   1330	u32 val, rp1;
   1331
   1332	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
   1333
   1334	rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
   1335	rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
   1336
   1337	return rp1;
   1338}
   1339
   1340static int vlv_rps_max_freq(struct intel_rps *rps)
   1341{
   1342	struct drm_i915_private *i915 = rps_to_i915(rps);
   1343	u32 val, rp0;
   1344
   1345	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
   1346
   1347	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
   1348	/* Clamp to max */
   1349	rp0 = min_t(u32, rp0, 0xea);
   1350
   1351	return rp0;
   1352}
   1353
   1354static int vlv_rps_rpe_freq(struct intel_rps *rps)
   1355{
   1356	struct drm_i915_private *i915 = rps_to_i915(rps);
   1357	u32 val, rpe;
   1358
   1359	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
   1360	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
   1361	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
   1362	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
   1363
   1364	return rpe;
   1365}
   1366
   1367static int vlv_rps_min_freq(struct intel_rps *rps)
   1368{
   1369	struct drm_i915_private *i915 = rps_to_i915(rps);
   1370	u32 val;
   1371
   1372	val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
   1373	/*
   1374	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
   1375	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
   1376	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
   1377	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
   1378	 * to make sure it matches what Punit accepts.
   1379	 */
   1380	return max_t(u32, val, 0xc0);
   1381}
   1382
   1383static bool vlv_rps_enable(struct intel_rps *rps)
   1384{
   1385	struct intel_uncore *uncore = rps_to_uncore(rps);
   1386	struct drm_i915_private *i915 = rps_to_i915(rps);
   1387	u32 val;
   1388
   1389	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
   1390	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
   1391	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
   1392	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
   1393	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
   1394
   1395	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
   1396
   1397	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
   1398			      GEN6_RP_MEDIA_TURBO |
   1399			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
   1400			      GEN6_RP_MEDIA_IS_GFX |
   1401			      GEN6_RP_ENABLE |
   1402			      GEN6_RP_UP_BUSY_AVG |
   1403			      GEN6_RP_DOWN_IDLE_CONT);
   1404
   1405	/* WaGsvRC0ResidencyMethod:vlv */
   1406	rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
   1407
   1408	vlv_punit_get(i915);
   1409
   1410	/* Setting Fixed Bias */
   1411	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
   1412	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
   1413
   1414	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1415
   1416	vlv_punit_put(i915);
   1417
   1418	/* RPS code assumes GPLL is used */
   1419	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
   1420		      "GPLL not enabled\n");
   1421
   1422	drm_dbg(&i915->drm, "GPLL enabled? %s\n",
   1423		str_yes_no(val & GPLLENABLE));
   1424	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
   1425
   1426	return rps_reset(rps);
   1427}
   1428
   1429static unsigned long __ips_gfx_val(struct intel_ips *ips)
   1430{
   1431	struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
   1432	struct intel_uncore *uncore = rps_to_uncore(rps);
   1433	unsigned int t, state1, state2;
   1434	u32 pxvid, ext_v;
   1435	u64 corr, corr2;
   1436
   1437	lockdep_assert_held(&mchdev_lock);
   1438
   1439	pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
   1440	pxvid = (pxvid >> 24) & 0x7f;
   1441	ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
   1442
   1443	state1 = ext_v;
   1444
   1445	/* Revel in the empirically derived constants */
   1446
   1447	/* Correction factor in 1/100000 units */
   1448	t = ips_mch_val(uncore);
   1449	if (t > 80)
   1450		corr = t * 2349 + 135940;
   1451	else if (t >= 50)
   1452		corr = t * 964 + 29317;
   1453	else /* < 50 */
   1454		corr = t * 301 + 1004;
   1455
   1456	corr = div_u64(corr * 150142 * state1, 10000) - 78642;
   1457	corr2 = div_u64(corr, 100000) * ips->corr;
   1458
   1459	state2 = div_u64(corr2 * state1, 10000);
   1460	state2 /= 100; /* convert to mW */
   1461
   1462	__gen5_ips_update(ips);
   1463
   1464	return ips->gfx_power + state2;
   1465}
   1466
   1467static bool has_busy_stats(struct intel_rps *rps)
   1468{
   1469	struct intel_engine_cs *engine;
   1470	enum intel_engine_id id;
   1471
   1472	for_each_engine(engine, rps_to_gt(rps), id) {
   1473		if (!intel_engine_supports_stats(engine))
   1474			return false;
   1475	}
   1476
   1477	return true;
   1478}
   1479
   1480void intel_rps_enable(struct intel_rps *rps)
   1481{
   1482	struct drm_i915_private *i915 = rps_to_i915(rps);
   1483	struct intel_uncore *uncore = rps_to_uncore(rps);
   1484	bool enabled = false;
   1485
   1486	if (!HAS_RPS(i915))
   1487		return;
   1488
   1489	if (rps_uses_slpc(rps))
   1490		return;
   1491
   1492	intel_gt_check_clock_frequency(rps_to_gt(rps));
   1493
   1494	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
   1495	if (rps->max_freq <= rps->min_freq)
   1496		/* leave disabled, no room for dynamic reclocking */;
   1497	else if (IS_CHERRYVIEW(i915))
   1498		enabled = chv_rps_enable(rps);
   1499	else if (IS_VALLEYVIEW(i915))
   1500		enabled = vlv_rps_enable(rps);
   1501	else if (GRAPHICS_VER(i915) >= 9)
   1502		enabled = gen9_rps_enable(rps);
   1503	else if (GRAPHICS_VER(i915) >= 8)
   1504		enabled = gen8_rps_enable(rps);
   1505	else if (GRAPHICS_VER(i915) >= 6)
   1506		enabled = gen6_rps_enable(rps);
   1507	else if (IS_IRONLAKE_M(i915))
   1508		enabled = gen5_rps_enable(rps);
   1509	else
   1510		MISSING_CASE(GRAPHICS_VER(i915));
   1511	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
   1512	if (!enabled)
   1513		return;
   1514
   1515	GT_TRACE(rps_to_gt(rps),
   1516		 "min:%x, max:%x, freq:[%d, %d]\n",
   1517		 rps->min_freq, rps->max_freq,
   1518		 intel_gpu_freq(rps, rps->min_freq),
   1519		 intel_gpu_freq(rps, rps->max_freq));
   1520
   1521	GEM_BUG_ON(rps->max_freq < rps->min_freq);
   1522	GEM_BUG_ON(rps->idle_freq > rps->max_freq);
   1523
   1524	GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
   1525	GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
   1526
   1527	if (has_busy_stats(rps))
   1528		intel_rps_set_timer(rps);
   1529	else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11)
   1530		intel_rps_set_interrupts(rps);
   1531	else
   1532		/* Ironlake currently uses intel_ips.ko */ {}
   1533
   1534	intel_rps_set_enabled(rps);
   1535}
   1536
   1537static void gen6_rps_disable(struct intel_rps *rps)
   1538{
   1539	set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
   1540}
   1541
   1542void intel_rps_disable(struct intel_rps *rps)
   1543{
   1544	struct drm_i915_private *i915 = rps_to_i915(rps);
   1545
   1546	intel_rps_clear_enabled(rps);
   1547	intel_rps_clear_interrupts(rps);
   1548	intel_rps_clear_timer(rps);
   1549
   1550	if (GRAPHICS_VER(i915) >= 6)
   1551		gen6_rps_disable(rps);
   1552	else if (IS_IRONLAKE_M(i915))
   1553		gen5_rps_disable(rps);
   1554}
   1555
   1556static int byt_gpu_freq(struct intel_rps *rps, int val)
   1557{
   1558	/*
   1559	 * N = val - 0xb7
   1560	 * Slow = Fast = GPLL ref * N
   1561	 */
   1562	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
   1563}
   1564
   1565static int byt_freq_opcode(struct intel_rps *rps, int val)
   1566{
   1567	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
   1568}
   1569
   1570static int chv_gpu_freq(struct intel_rps *rps, int val)
   1571{
   1572	/*
   1573	 * N = val / 2
   1574	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
   1575	 */
   1576	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
   1577}
   1578
   1579static int chv_freq_opcode(struct intel_rps *rps, int val)
   1580{
   1581	/* CHV needs even values */
   1582	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
   1583}
   1584
   1585int intel_gpu_freq(struct intel_rps *rps, int val)
   1586{
   1587	struct drm_i915_private *i915 = rps_to_i915(rps);
   1588
   1589	if (GRAPHICS_VER(i915) >= 9)
   1590		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
   1591					 GEN9_FREQ_SCALER);
   1592	else if (IS_CHERRYVIEW(i915))
   1593		return chv_gpu_freq(rps, val);
   1594	else if (IS_VALLEYVIEW(i915))
   1595		return byt_gpu_freq(rps, val);
   1596	else if (GRAPHICS_VER(i915) >= 6)
   1597		return val * GT_FREQUENCY_MULTIPLIER;
   1598	else
   1599		return val;
   1600}
   1601
   1602int intel_freq_opcode(struct intel_rps *rps, int val)
   1603{
   1604	struct drm_i915_private *i915 = rps_to_i915(rps);
   1605
   1606	if (GRAPHICS_VER(i915) >= 9)
   1607		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
   1608					 GT_FREQUENCY_MULTIPLIER);
   1609	else if (IS_CHERRYVIEW(i915))
   1610		return chv_freq_opcode(rps, val);
   1611	else if (IS_VALLEYVIEW(i915))
   1612		return byt_freq_opcode(rps, val);
   1613	else if (GRAPHICS_VER(i915) >= 6)
   1614		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
   1615	else
   1616		return val;
   1617}
   1618
   1619static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
   1620{
   1621	struct drm_i915_private *i915 = rps_to_i915(rps);
   1622
   1623	rps->gpll_ref_freq =
   1624		vlv_get_cck_clock(i915, "GPLL ref",
   1625				  CCK_GPLL_CLOCK_CONTROL,
   1626				  i915->czclk_freq);
   1627
   1628	drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
   1629		rps->gpll_ref_freq);
   1630}
   1631
   1632static void vlv_rps_init(struct intel_rps *rps)
   1633{
   1634	struct drm_i915_private *i915 = rps_to_i915(rps);
   1635	u32 val;
   1636
   1637	vlv_iosf_sb_get(i915,
   1638			BIT(VLV_IOSF_SB_PUNIT) |
   1639			BIT(VLV_IOSF_SB_NC) |
   1640			BIT(VLV_IOSF_SB_CCK));
   1641
   1642	vlv_init_gpll_ref_freq(rps);
   1643
   1644	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1645	switch ((val >> 6) & 3) {
   1646	case 0:
   1647	case 1:
   1648		i915->mem_freq = 800;
   1649		break;
   1650	case 2:
   1651		i915->mem_freq = 1066;
   1652		break;
   1653	case 3:
   1654		i915->mem_freq = 1333;
   1655		break;
   1656	}
   1657	drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
   1658
   1659	rps->max_freq = vlv_rps_max_freq(rps);
   1660	rps->rp0_freq = rps->max_freq;
   1661	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
   1662		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
   1663
   1664	rps->efficient_freq = vlv_rps_rpe_freq(rps);
   1665	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
   1666		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
   1667
   1668	rps->rp1_freq = vlv_rps_guar_freq(rps);
   1669	drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
   1670		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
   1671
   1672	rps->min_freq = vlv_rps_min_freq(rps);
   1673	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
   1674		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
   1675
   1676	vlv_iosf_sb_put(i915,
   1677			BIT(VLV_IOSF_SB_PUNIT) |
   1678			BIT(VLV_IOSF_SB_NC) |
   1679			BIT(VLV_IOSF_SB_CCK));
   1680}
   1681
   1682static void chv_rps_init(struct intel_rps *rps)
   1683{
   1684	struct drm_i915_private *i915 = rps_to_i915(rps);
   1685	u32 val;
   1686
   1687	vlv_iosf_sb_get(i915,
   1688			BIT(VLV_IOSF_SB_PUNIT) |
   1689			BIT(VLV_IOSF_SB_NC) |
   1690			BIT(VLV_IOSF_SB_CCK));
   1691
   1692	vlv_init_gpll_ref_freq(rps);
   1693
   1694	val = vlv_cck_read(i915, CCK_FUSE_REG);
   1695
   1696	switch ((val >> 2) & 0x7) {
   1697	case 3:
   1698		i915->mem_freq = 2000;
   1699		break;
   1700	default:
   1701		i915->mem_freq = 1600;
   1702		break;
   1703	}
   1704	drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
   1705
   1706	rps->max_freq = chv_rps_max_freq(rps);
   1707	rps->rp0_freq = rps->max_freq;
   1708	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
   1709		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
   1710
   1711	rps->efficient_freq = chv_rps_rpe_freq(rps);
   1712	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
   1713		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
   1714
   1715	rps->rp1_freq = chv_rps_guar_freq(rps);
   1716	drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
   1717		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
   1718
   1719	rps->min_freq = chv_rps_min_freq(rps);
   1720	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
   1721		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
   1722
   1723	vlv_iosf_sb_put(i915,
   1724			BIT(VLV_IOSF_SB_PUNIT) |
   1725			BIT(VLV_IOSF_SB_NC) |
   1726			BIT(VLV_IOSF_SB_CCK));
   1727
   1728	drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq |
   1729				   rps->rp1_freq | rps->min_freq) & 1,
   1730		      "Odd GPU freq values\n");
   1731}
   1732
   1733static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
   1734{
   1735	ei->ktime = ktime_get_raw();
   1736	ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
   1737	ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
   1738}
   1739
   1740static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
   1741{
   1742	struct intel_uncore *uncore = rps_to_uncore(rps);
   1743	const struct intel_rps_ei *prev = &rps->ei;
   1744	struct intel_rps_ei now;
   1745	u32 events = 0;
   1746
   1747	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
   1748		return 0;
   1749
   1750	vlv_c0_read(uncore, &now);
   1751
   1752	if (prev->ktime) {
   1753		u64 time, c0;
   1754		u32 render, media;
   1755
   1756		time = ktime_us_delta(now.ktime, prev->ktime);
   1757
   1758		time *= rps_to_i915(rps)->czclk_freq;
   1759
   1760		/* Workload can be split between render + media,
   1761		 * e.g. SwapBuffers being blitted in X after being rendered in
   1762		 * mesa. To account for this we need to combine both engines
   1763		 * into our activity counter.
   1764		 */
   1765		render = now.render_c0 - prev->render_c0;
   1766		media = now.media_c0 - prev->media_c0;
   1767		c0 = max(render, media);
   1768		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
   1769
   1770		if (c0 > time * rps->power.up_threshold)
   1771			events = GEN6_PM_RP_UP_THRESHOLD;
   1772		else if (c0 < time * rps->power.down_threshold)
   1773			events = GEN6_PM_RP_DOWN_THRESHOLD;
   1774	}
   1775
   1776	rps->ei = now;
   1777	return events;
   1778}
   1779
   1780static void rps_work(struct work_struct *work)
   1781{
   1782	struct intel_rps *rps = container_of(work, typeof(*rps), work);
   1783	struct intel_gt *gt = rps_to_gt(rps);
   1784	struct drm_i915_private *i915 = rps_to_i915(rps);
   1785	bool client_boost = false;
   1786	int new_freq, adj, min, max;
   1787	u32 pm_iir = 0;
   1788
   1789	spin_lock_irq(&gt->irq_lock);
   1790	pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
   1791	client_boost = atomic_read(&rps->num_waiters);
   1792	spin_unlock_irq(&gt->irq_lock);
   1793
   1794	/* Make sure we didn't queue anything we're not going to process. */
   1795	if (!pm_iir && !client_boost)
   1796		goto out;
   1797
   1798	mutex_lock(&rps->lock);
   1799	if (!intel_rps_is_active(rps)) {
   1800		mutex_unlock(&rps->lock);
   1801		return;
   1802	}
   1803
   1804	pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
   1805
   1806	adj = rps->last_adj;
   1807	new_freq = rps->cur_freq;
   1808	min = rps->min_freq_softlimit;
   1809	max = rps->max_freq_softlimit;
   1810	if (client_boost)
   1811		max = rps->max_freq;
   1812
   1813	GT_TRACE(gt,
   1814		 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
   1815		 pm_iir, str_yes_no(client_boost),
   1816		 adj, new_freq, min, max);
   1817
   1818	if (client_boost && new_freq < rps->boost_freq) {
   1819		new_freq = rps->boost_freq;
   1820		adj = 0;
   1821	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
   1822		if (adj > 0)
   1823			adj *= 2;
   1824		else /* CHV needs even encode values */
   1825			adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
   1826
   1827		if (new_freq >= rps->max_freq_softlimit)
   1828			adj = 0;
   1829	} else if (client_boost) {
   1830		adj = 0;
   1831	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
   1832		if (rps->cur_freq > rps->efficient_freq)
   1833			new_freq = rps->efficient_freq;
   1834		else if (rps->cur_freq > rps->min_freq_softlimit)
   1835			new_freq = rps->min_freq_softlimit;
   1836		adj = 0;
   1837	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
   1838		if (adj < 0)
   1839			adj *= 2;
   1840		else /* CHV needs even encode values */
   1841			adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
   1842
   1843		if (new_freq <= rps->min_freq_softlimit)
   1844			adj = 0;
   1845	} else { /* unknown event */
   1846		adj = 0;
   1847	}
   1848
   1849	/*
   1850	 * sysfs frequency limits may have snuck in while
   1851	 * servicing the interrupt
   1852	 */
   1853	new_freq += adj;
   1854	new_freq = clamp_t(int, new_freq, min, max);
   1855
   1856	if (intel_rps_set(rps, new_freq)) {
   1857		drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
   1858		adj = 0;
   1859	}
   1860	rps->last_adj = adj;
   1861
   1862	mutex_unlock(&rps->lock);
   1863
   1864out:
   1865	spin_lock_irq(&gt->irq_lock);
   1866	gen6_gt_pm_unmask_irq(gt, rps->pm_events);
   1867	spin_unlock_irq(&gt->irq_lock);
   1868}
   1869
   1870void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
   1871{
   1872	struct intel_gt *gt = rps_to_gt(rps);
   1873	const u32 events = rps->pm_events & pm_iir;
   1874
   1875	lockdep_assert_held(&gt->irq_lock);
   1876
   1877	if (unlikely(!events))
   1878		return;
   1879
   1880	GT_TRACE(gt, "irq events:%x\n", events);
   1881
   1882	gen6_gt_pm_mask_irq(gt, events);
   1883
   1884	rps->pm_iir |= events;
   1885	schedule_work(&rps->work);
   1886}
   1887
   1888void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
   1889{
   1890	struct intel_gt *gt = rps_to_gt(rps);
   1891	u32 events;
   1892
   1893	events = pm_iir & rps->pm_events;
   1894	if (events) {
   1895		spin_lock(&gt->irq_lock);
   1896
   1897		GT_TRACE(gt, "irq events:%x\n", events);
   1898
   1899		gen6_gt_pm_mask_irq(gt, events);
   1900		rps->pm_iir |= events;
   1901
   1902		schedule_work(&rps->work);
   1903		spin_unlock(&gt->irq_lock);
   1904	}
   1905
   1906	if (GRAPHICS_VER(gt->i915) >= 8)
   1907		return;
   1908
   1909	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
   1910		intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10);
   1911
   1912	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
   1913		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
   1914}
   1915
   1916void gen5_rps_irq_handler(struct intel_rps *rps)
   1917{
   1918	struct intel_uncore *uncore = rps_to_uncore(rps);
   1919	u32 busy_up, busy_down, max_avg, min_avg;
   1920	u8 new_freq;
   1921
   1922	spin_lock(&mchdev_lock);
   1923
   1924	intel_uncore_write16(uncore,
   1925			     MEMINTRSTS,
   1926			     intel_uncore_read(uncore, MEMINTRSTS));
   1927
   1928	intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
   1929	busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
   1930	busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
   1931	max_avg = intel_uncore_read(uncore, RCBMAXAVG);
   1932	min_avg = intel_uncore_read(uncore, RCBMINAVG);
   1933
   1934	/* Handle RCS change request from hw */
   1935	new_freq = rps->cur_freq;
   1936	if (busy_up > max_avg)
   1937		new_freq++;
   1938	else if (busy_down < min_avg)
   1939		new_freq--;
   1940	new_freq = clamp(new_freq,
   1941			 rps->min_freq_softlimit,
   1942			 rps->max_freq_softlimit);
   1943
   1944	if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq))
   1945		rps->cur_freq = new_freq;
   1946
   1947	spin_unlock(&mchdev_lock);
   1948}
   1949
   1950void intel_rps_init_early(struct intel_rps *rps)
   1951{
   1952	mutex_init(&rps->lock);
   1953	mutex_init(&rps->power.mutex);
   1954
   1955	INIT_WORK(&rps->work, rps_work);
   1956	timer_setup(&rps->timer, rps_timer, 0);
   1957
   1958	atomic_set(&rps->num_waiters, 0);
   1959}
   1960
   1961void intel_rps_init(struct intel_rps *rps)
   1962{
   1963	struct drm_i915_private *i915 = rps_to_i915(rps);
   1964
   1965	if (rps_uses_slpc(rps))
   1966		return;
   1967
   1968	if (IS_CHERRYVIEW(i915))
   1969		chv_rps_init(rps);
   1970	else if (IS_VALLEYVIEW(i915))
   1971		vlv_rps_init(rps);
   1972	else if (GRAPHICS_VER(i915) >= 6)
   1973		gen6_rps_init(rps);
   1974	else if (IS_IRONLAKE_M(i915))
   1975		gen5_rps_init(rps);
   1976
   1977	/* Derive initial user preferences/limits from the hardware limits */
   1978	rps->max_freq_softlimit = rps->max_freq;
   1979	rps->min_freq_softlimit = rps->min_freq;
   1980
   1981	/* After setting max-softlimit, find the overclock max freq */
   1982	if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
   1983		u32 params = 0;
   1984
   1985		snb_pcode_read(i915, GEN6_READ_OC_PARAMS, &params, NULL);
   1986		if (params & BIT(31)) { /* OC supported */
   1987			drm_dbg(&i915->drm,
   1988				"Overclocking supported, max: %dMHz, overclock: %dMHz\n",
   1989				(rps->max_freq & 0xff) * 50,
   1990				(params & 0xff) * 50);
   1991			rps->max_freq = params & 0xff;
   1992		}
   1993	}
   1994
   1995	/* Finally allow us to boost to max by default */
   1996	rps->boost_freq = rps->max_freq;
   1997	rps->idle_freq = rps->min_freq;
   1998
   1999	/* Start in the middle, from here we will autotune based on workload */
   2000	rps->cur_freq = rps->efficient_freq;
   2001
   2002	rps->pm_intrmsk_mbz = 0;
   2003
   2004	/*
   2005	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
   2006	 * if GEN6_PM_UP_EI_EXPIRED is masked.
   2007	 *
   2008	 * TODO: verify if this can be reproduced on VLV,CHV.
   2009	 */
   2010	if (GRAPHICS_VER(i915) <= 7)
   2011		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
   2012
   2013	if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11)
   2014		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
   2015
   2016	/* GuC needs ARAT expired interrupt unmasked */
   2017	if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc))
   2018		rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
   2019}
   2020
   2021void intel_rps_sanitize(struct intel_rps *rps)
   2022{
   2023	if (rps_uses_slpc(rps))
   2024		return;
   2025
   2026	if (GRAPHICS_VER(rps_to_i915(rps)) >= 6)
   2027		rps_disable_interrupts(rps);
   2028}
   2029
   2030u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
   2031{
   2032	struct drm_i915_private *i915 = rps_to_i915(rps);
   2033	u32 cagf;
   2034
   2035	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
   2036		cagf = (rpstat >> 8) & 0xff;
   2037	else if (GRAPHICS_VER(i915) >= 9)
   2038		cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
   2039	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
   2040		cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
   2041	else if (GRAPHICS_VER(i915) >= 6)
   2042		cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
   2043	else
   2044		cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >>
   2045					MEMSTAT_PSTATE_SHIFT);
   2046
   2047	return cagf;
   2048}
   2049
   2050static u32 read_cagf(struct intel_rps *rps)
   2051{
   2052	struct drm_i915_private *i915 = rps_to_i915(rps);
   2053	struct intel_uncore *uncore = rps_to_uncore(rps);
   2054	u32 freq;
   2055
   2056	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
   2057		vlv_punit_get(i915);
   2058		freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   2059		vlv_punit_put(i915);
   2060	} else if (GRAPHICS_VER(i915) >= 6) {
   2061		freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
   2062	} else {
   2063		freq = intel_uncore_read(uncore, MEMSTAT_ILK);
   2064	}
   2065
   2066	return intel_rps_get_cagf(rps, freq);
   2067}
   2068
   2069u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
   2070{
   2071	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
   2072	intel_wakeref_t wakeref;
   2073	u32 freq = 0;
   2074
   2075	with_intel_runtime_pm_if_in_use(rpm, wakeref)
   2076		freq = intel_gpu_freq(rps, read_cagf(rps));
   2077
   2078	return freq;
   2079}
   2080
   2081u32 intel_rps_read_punit_req(struct intel_rps *rps)
   2082{
   2083	struct intel_uncore *uncore = rps_to_uncore(rps);
   2084	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
   2085	intel_wakeref_t wakeref;
   2086	u32 freq = 0;
   2087
   2088	with_intel_runtime_pm_if_in_use(rpm, wakeref)
   2089		freq = intel_uncore_read(uncore, GEN6_RPNSWREQ);
   2090
   2091	return freq;
   2092}
   2093
   2094static u32 intel_rps_get_req(u32 pureq)
   2095{
   2096	u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT;
   2097
   2098	return req;
   2099}
   2100
   2101u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps)
   2102{
   2103	u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps));
   2104
   2105	return intel_gpu_freq(rps, freq);
   2106}
   2107
   2108u32 intel_rps_get_requested_frequency(struct intel_rps *rps)
   2109{
   2110	if (rps_uses_slpc(rps))
   2111		return intel_rps_read_punit_req_frequency(rps);
   2112	else
   2113		return intel_gpu_freq(rps, rps->cur_freq);
   2114}
   2115
   2116u32 intel_rps_get_max_frequency(struct intel_rps *rps)
   2117{
   2118	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
   2119
   2120	if (rps_uses_slpc(rps))
   2121		return slpc->max_freq_softlimit;
   2122	else
   2123		return intel_gpu_freq(rps, rps->max_freq_softlimit);
   2124}
   2125
   2126u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
   2127{
   2128	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
   2129
   2130	if (rps_uses_slpc(rps))
   2131		return slpc->rp0_freq;
   2132	else
   2133		return intel_gpu_freq(rps, rps->rp0_freq);
   2134}
   2135
   2136u32 intel_rps_get_rp1_frequency(struct intel_rps *rps)
   2137{
   2138	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
   2139
   2140	if (rps_uses_slpc(rps))
   2141		return slpc->rp1_freq;
   2142	else
   2143		return intel_gpu_freq(rps, rps->rp1_freq);
   2144}
   2145
   2146u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
   2147{
   2148	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
   2149
   2150	if (rps_uses_slpc(rps))
   2151		return slpc->min_freq;
   2152	else
   2153		return intel_gpu_freq(rps, rps->min_freq);
   2154}
   2155
   2156static int set_max_freq(struct intel_rps *rps, u32 val)
   2157{
   2158	struct drm_i915_private *i915 = rps_to_i915(rps);
   2159	int ret = 0;
   2160
   2161	mutex_lock(&rps->lock);
   2162
   2163	val = intel_freq_opcode(rps, val);
   2164	if (val < rps->min_freq ||
   2165	    val > rps->max_freq ||
   2166	    val < rps->min_freq_softlimit) {
   2167		ret = -EINVAL;
   2168		goto unlock;
   2169	}
   2170
   2171	if (val > rps->rp0_freq)
   2172		drm_dbg(&i915->drm, "User requested overclocking to %d\n",
   2173			intel_gpu_freq(rps, val));
   2174
   2175	rps->max_freq_softlimit = val;
   2176
   2177	val = clamp_t(int, rps->cur_freq,
   2178		      rps->min_freq_softlimit,
   2179		      rps->max_freq_softlimit);
   2180
   2181	/*
   2182	 * We still need *_set_rps to process the new max_delay and
   2183	 * update the interrupt limits and PMINTRMSK even though
   2184	 * frequency request may be unchanged.
   2185	 */
   2186	intel_rps_set(rps, val);
   2187
   2188unlock:
   2189	mutex_unlock(&rps->lock);
   2190
   2191	return ret;
   2192}
   2193
   2194int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val)
   2195{
   2196	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
   2197
   2198	if (rps_uses_slpc(rps))
   2199		return intel_guc_slpc_set_max_freq(slpc, val);
   2200	else
   2201		return set_max_freq(rps, val);
   2202}
   2203
   2204u32 intel_rps_get_min_frequency(struct intel_rps *rps)
   2205{
   2206	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
   2207
   2208	if (rps_uses_slpc(rps))
   2209		return slpc->min_freq_softlimit;
   2210	else
   2211		return intel_gpu_freq(rps, rps->min_freq_softlimit);
   2212}
   2213
   2214static int set_min_freq(struct intel_rps *rps, u32 val)
   2215{
   2216	int ret = 0;
   2217
   2218	mutex_lock(&rps->lock);
   2219
   2220	val = intel_freq_opcode(rps, val);
   2221	if (val < rps->min_freq ||
   2222	    val > rps->max_freq ||
   2223	    val > rps->max_freq_softlimit) {
   2224		ret = -EINVAL;
   2225		goto unlock;
   2226	}
   2227
   2228	rps->min_freq_softlimit = val;
   2229
   2230	val = clamp_t(int, rps->cur_freq,
   2231		      rps->min_freq_softlimit,
   2232		      rps->max_freq_softlimit);
   2233
   2234	/*
   2235	 * We still need *_set_rps to process the new min_delay and
   2236	 * update the interrupt limits and PMINTRMSK even though
   2237	 * frequency request may be unchanged.
   2238	 */
   2239	intel_rps_set(rps, val);
   2240
   2241unlock:
   2242	mutex_unlock(&rps->lock);
   2243
   2244	return ret;
   2245}
   2246
   2247int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
   2248{
   2249	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
   2250
   2251	if (rps_uses_slpc(rps))
   2252		return intel_guc_slpc_set_min_freq(slpc, val);
   2253	else
   2254		return set_min_freq(rps, val);
   2255}
   2256
   2257static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
   2258{
   2259	struct intel_uncore *uncore = rps_to_uncore(rps);
   2260	u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE;
   2261
   2262	/* Allow punit to process software requests */
   2263	intel_uncore_write(uncore, GEN6_RP_CONTROL, state);
   2264}
   2265
   2266void intel_rps_raise_unslice(struct intel_rps *rps)
   2267{
   2268	struct intel_uncore *uncore = rps_to_uncore(rps);
   2269
   2270	mutex_lock(&rps->lock);
   2271
   2272	if (rps_uses_slpc(rps)) {
   2273		/* RP limits have not been initialized yet for SLPC path */
   2274		struct intel_rps_freq_caps caps;
   2275
   2276		gen6_rps_get_freq_caps(rps, &caps);
   2277
   2278		intel_rps_set_manual(rps, true);
   2279		intel_uncore_write(uncore, GEN6_RPNSWREQ,
   2280				   ((caps.rp0_freq <<
   2281				   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
   2282				   GEN9_IGNORE_SLICE_RATIO));
   2283		intel_rps_set_manual(rps, false);
   2284	} else {
   2285		intel_rps_set(rps, rps->rp0_freq);
   2286	}
   2287
   2288	mutex_unlock(&rps->lock);
   2289}
   2290
   2291void intel_rps_lower_unslice(struct intel_rps *rps)
   2292{
   2293	struct intel_uncore *uncore = rps_to_uncore(rps);
   2294
   2295	mutex_lock(&rps->lock);
   2296
   2297	if (rps_uses_slpc(rps)) {
   2298		/* RP limits have not been initialized yet for SLPC path */
   2299		struct intel_rps_freq_caps caps;
   2300
   2301		gen6_rps_get_freq_caps(rps, &caps);
   2302
   2303		intel_rps_set_manual(rps, true);
   2304		intel_uncore_write(uncore, GEN6_RPNSWREQ,
   2305				   ((caps.min_freq <<
   2306				   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
   2307				   GEN9_IGNORE_SLICE_RATIO));
   2308		intel_rps_set_manual(rps, false);
   2309	} else {
   2310		intel_rps_set(rps, rps->min_freq);
   2311	}
   2312
   2313	mutex_unlock(&rps->lock);
   2314}
   2315
   2316static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32)
   2317{
   2318	struct intel_gt *gt = rps_to_gt(rps);
   2319	intel_wakeref_t wakeref;
   2320	u32 val;
   2321
   2322	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
   2323		val = intel_uncore_read(gt->uncore, reg32);
   2324
   2325	return val;
   2326}
   2327
   2328bool rps_read_mask_mmio(struct intel_rps *rps,
   2329			i915_reg_t reg32, u32 mask)
   2330{
   2331	return rps_read_mmio(rps, reg32) & mask;
   2332}
   2333
   2334/* External interface for intel_ips.ko */
   2335
   2336static struct drm_i915_private __rcu *ips_mchdev;
   2337
   2338/**
   2339 * Tells the intel_ips driver that the i915 driver is now loaded, if
   2340 * IPS got loaded first.
   2341 *
   2342 * This awkward dance is so that neither module has to depend on the
   2343 * other in order for IPS to do the appropriate communication of
   2344 * GPU turbo limits to i915.
   2345 */
   2346static void
   2347ips_ping_for_i915_load(void)
   2348{
   2349	void (*link)(void);
   2350
   2351	link = symbol_get(ips_link_to_i915_driver);
   2352	if (link) {
   2353		link();
   2354		symbol_put(ips_link_to_i915_driver);
   2355	}
   2356}
   2357
   2358void intel_rps_driver_register(struct intel_rps *rps)
   2359{
   2360	struct intel_gt *gt = rps_to_gt(rps);
   2361
   2362	/*
   2363	 * We only register the i915 ips part with intel-ips once everything is
   2364	 * set up, to avoid intel-ips sneaking in and reading bogus values.
   2365	 */
   2366	if (GRAPHICS_VER(gt->i915) == 5) {
   2367		GEM_BUG_ON(ips_mchdev);
   2368		rcu_assign_pointer(ips_mchdev, gt->i915);
   2369		ips_ping_for_i915_load();
   2370	}
   2371}
   2372
   2373void intel_rps_driver_unregister(struct intel_rps *rps)
   2374{
   2375	if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
   2376		rcu_assign_pointer(ips_mchdev, NULL);
   2377}
   2378
   2379static struct drm_i915_private *mchdev_get(void)
   2380{
   2381	struct drm_i915_private *i915;
   2382
   2383	rcu_read_lock();
   2384	i915 = rcu_dereference(ips_mchdev);
   2385	if (i915 && !kref_get_unless_zero(&i915->drm.ref))
   2386		i915 = NULL;
   2387	rcu_read_unlock();
   2388
   2389	return i915;
   2390}
   2391
   2392/**
   2393 * i915_read_mch_val - return value for IPS use
   2394 *
   2395 * Calculate and return a value for the IPS driver to use when deciding whether
   2396 * we have thermal and power headroom to increase CPU or GPU power budget.
   2397 */
   2398unsigned long i915_read_mch_val(void)
   2399{
   2400	struct drm_i915_private *i915;
   2401	unsigned long chipset_val = 0;
   2402	unsigned long graphics_val = 0;
   2403	intel_wakeref_t wakeref;
   2404
   2405	i915 = mchdev_get();
   2406	if (!i915)
   2407		return 0;
   2408
   2409	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
   2410		struct intel_ips *ips = &to_gt(i915)->rps.ips;
   2411
   2412		spin_lock_irq(&mchdev_lock);
   2413		chipset_val = __ips_chipset_val(ips);
   2414		graphics_val = __ips_gfx_val(ips);
   2415		spin_unlock_irq(&mchdev_lock);
   2416	}
   2417
   2418	drm_dev_put(&i915->drm);
   2419	return chipset_val + graphics_val;
   2420}
   2421EXPORT_SYMBOL_GPL(i915_read_mch_val);
   2422
   2423/**
   2424 * i915_gpu_raise - raise GPU frequency limit
   2425 *
   2426 * Raise the limit; IPS indicates we have thermal headroom.
   2427 */
   2428bool i915_gpu_raise(void)
   2429{
   2430	struct drm_i915_private *i915;
   2431	struct intel_rps *rps;
   2432
   2433	i915 = mchdev_get();
   2434	if (!i915)
   2435		return false;
   2436
   2437	rps = &to_gt(i915)->rps;
   2438
   2439	spin_lock_irq(&mchdev_lock);
   2440	if (rps->max_freq_softlimit < rps->max_freq)
   2441		rps->max_freq_softlimit++;
   2442	spin_unlock_irq(&mchdev_lock);
   2443
   2444	drm_dev_put(&i915->drm);
   2445	return true;
   2446}
   2447EXPORT_SYMBOL_GPL(i915_gpu_raise);
   2448
   2449/**
   2450 * i915_gpu_lower - lower GPU frequency limit
   2451 *
   2452 * IPS indicates we're close to a thermal limit, so throttle back the GPU
   2453 * frequency maximum.
   2454 */
   2455bool i915_gpu_lower(void)
   2456{
   2457	struct drm_i915_private *i915;
   2458	struct intel_rps *rps;
   2459
   2460	i915 = mchdev_get();
   2461	if (!i915)
   2462		return false;
   2463
   2464	rps = &to_gt(i915)->rps;
   2465
   2466	spin_lock_irq(&mchdev_lock);
   2467	if (rps->max_freq_softlimit > rps->min_freq)
   2468		rps->max_freq_softlimit--;
   2469	spin_unlock_irq(&mchdev_lock);
   2470
   2471	drm_dev_put(&i915->drm);
   2472	return true;
   2473}
   2474EXPORT_SYMBOL_GPL(i915_gpu_lower);
   2475
   2476/**
   2477 * i915_gpu_busy - indicate GPU business to IPS
   2478 *
   2479 * Tell the IPS driver whether or not the GPU is busy.
   2480 */
   2481bool i915_gpu_busy(void)
   2482{
   2483	struct drm_i915_private *i915;
   2484	bool ret;
   2485
   2486	i915 = mchdev_get();
   2487	if (!i915)
   2488		return false;
   2489
   2490	ret = to_gt(i915)->awake;
   2491
   2492	drm_dev_put(&i915->drm);
   2493	return ret;
   2494}
   2495EXPORT_SYMBOL_GPL(i915_gpu_busy);
   2496
   2497/**
   2498 * i915_gpu_turbo_disable - disable graphics turbo
   2499 *
   2500 * Disable graphics turbo by resetting the max frequency and setting the
   2501 * current frequency to the default.
   2502 */
   2503bool i915_gpu_turbo_disable(void)
   2504{
   2505	struct drm_i915_private *i915;
   2506	struct intel_rps *rps;
   2507	bool ret;
   2508
   2509	i915 = mchdev_get();
   2510	if (!i915)
   2511		return false;
   2512
   2513	rps = &to_gt(i915)->rps;
   2514
   2515	spin_lock_irq(&mchdev_lock);
   2516	rps->max_freq_softlimit = rps->min_freq;
   2517	ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq);
   2518	spin_unlock_irq(&mchdev_lock);
   2519
   2520	drm_dev_put(&i915->drm);
   2521	return ret;
   2522}
   2523EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
   2524
   2525#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
   2526#include "selftest_rps.c"
   2527#include "selftest_slpc.c"
   2528#endif