cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel_rc6.c (23649B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2019 Intel Corporation
      4 */
      5
      6#include <linux/pm_runtime.h>
      7#include <linux/string_helpers.h>
      8
      9#include "gem/i915_gem_region.h"
     10#include "i915_drv.h"
     11#include "i915_reg.h"
     12#include "i915_vgpu.h"
     13#include "intel_engine_regs.h"
     14#include "intel_gt.h"
     15#include "intel_gt_pm.h"
     16#include "intel_gt_regs.h"
     17#include "intel_pcode.h"
     18#include "intel_rc6.h"
     19
     20/**
     21 * DOC: RC6
     22 *
     23 * RC6 is a special power stage which allows the GPU to enter an very
     24 * low-voltage mode when idle, using down to 0V while at this stage.  This
     25 * stage is entered automatically when the GPU is idle when RC6 support is
     26 * enabled, and as soon as new workload arises GPU wakes up automatically as
     27 * well.
     28 *
     29 * There are different RC6 modes available in Intel GPU, which differentiate
     30 * among each other with the latency required to enter and leave RC6 and
     31 * voltage consumed by the GPU in different states.
     32 *
     33 * The combination of the following flags define which states GPU is allowed
     34 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
     35 * RC6pp is deepest RC6. Their support by hardware varies according to the
     36 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
     37 * which brings the most power savings; deeper states save more power, but
     38 * require higher latency to switch to and wake up.
     39 */
     40
     41static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
     42{
     43	return container_of(rc6, struct intel_gt, rc6);
     44}
     45
     46static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
     47{
     48	return rc6_to_gt(rc)->uncore;
     49}
     50
     51static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
     52{
     53	return rc6_to_gt(rc)->i915;
     54}
     55
     56static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
     57{
     58	intel_uncore_write_fw(uncore, reg, val);
     59}
     60
     61static void gen11_rc6_enable(struct intel_rc6 *rc6)
     62{
     63	struct intel_gt *gt = rc6_to_gt(rc6);
     64	struct intel_uncore *uncore = gt->uncore;
     65	struct intel_engine_cs *engine;
     66	enum intel_engine_id id;
     67	u32 pg_enable;
     68	int i;
     69
     70	/*
     71	 * With GuCRC, these parameters are set by GuC
     72	 */
     73	if (!intel_uc_uses_guc_rc(&gt->uc)) {
     74		/* 2b: Program RC6 thresholds.*/
     75		set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
     76		set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
     77
     78		set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
     79		set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
     80		for_each_engine(engine, rc6_to_gt(rc6), id)
     81			set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
     82
     83		set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
     84
     85		set(uncore, GEN6_RC_SLEEP, 0);
     86
     87		set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
     88	}
     89
     90	/*
     91	 * 2c: Program Coarse Power Gating Policies.
     92	 *
     93	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
     94	 * use instead is a more conservative estimate for the maximum time
     95	 * it takes us to service a CS interrupt and submit a new ELSP - that
     96	 * is the time which the GPU is idle waiting for the CPU to select the
     97	 * next request to execute. If the idle hysteresis is less than that
     98	 * interrupt service latency, the hardware will automatically gate
     99	 * the power well and we will then incur the wake up cost on top of
    100	 * the service latency. A similar guide from plane_state is that we
    101	 * do not want the enable hysteresis to less than the wakeup latency.
    102	 *
    103	 * igt/gem_exec_nop/sequential provides a rough estimate for the
    104	 * service latency, and puts it under 10us for Icelake, similar to
    105	 * Broadwell+, To be conservative, we want to factor in a context
    106	 * switch on top (due to ksoftirqd).
    107	 */
    108	set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
    109	set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
    110
    111	/* 3a: Enable RC6
    112	 *
    113	 * With GuCRC, we do not enable bit 31 of RC_CTL,
    114	 * thus allowing GuC to control RC6 entry/exit fully instead.
    115	 * We will not set the HW ENABLE and EI bits
    116	 */
    117	if (!intel_guc_rc_enable(&gt->uc.guc))
    118		rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE;
    119	else
    120		rc6->ctl_enable =
    121			GEN6_RC_CTL_HW_ENABLE |
    122			GEN6_RC_CTL_RC6_ENABLE |
    123			GEN6_RC_CTL_EI_MODE(1);
    124
    125	/* Wa_16011777198 - Render powergating must remain disabled */
    126	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
    127	    IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
    128		pg_enable =
    129			GEN9_MEDIA_PG_ENABLE |
    130			GEN11_MEDIA_SAMPLER_PG_ENABLE;
    131	else
    132		pg_enable =
    133			GEN9_RENDER_PG_ENABLE |
    134			GEN9_MEDIA_PG_ENABLE |
    135			GEN11_MEDIA_SAMPLER_PG_ENABLE;
    136
    137	if (GRAPHICS_VER(gt->i915) >= 12) {
    138		for (i = 0; i < I915_MAX_VCS; i++)
    139			if (HAS_ENGINE(gt, _VCS(i)))
    140				pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
    141					      VDN_MFX_POWERGATE_ENABLE(i));
    142	}
    143
    144	set(uncore, GEN9_PG_ENABLE, pg_enable);
    145}
    146
    147static void gen9_rc6_enable(struct intel_rc6 *rc6)
    148{
    149	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    150	struct intel_engine_cs *engine;
    151	enum intel_engine_id id;
    152
    153	/* 2b: Program RC6 thresholds.*/
    154	if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) {
    155		set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
    156		set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
    157	} else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
    158		/*
    159		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
    160		 * when CPG is enabled
    161		 */
    162		set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
    163	} else {
    164		set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
    165	}
    166
    167	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
    168	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
    169	for_each_engine(engine, rc6_to_gt(rc6), id)
    170		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
    171
    172	set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
    173
    174	set(uncore, GEN6_RC_SLEEP, 0);
    175
    176	/*
    177	 * 2c: Program Coarse Power Gating Policies.
    178	 *
    179	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
    180	 * use instead is a more conservative estimate for the maximum time
    181	 * it takes us to service a CS interrupt and submit a new ELSP - that
    182	 * is the time which the GPU is idle waiting for the CPU to select the
    183	 * next request to execute. If the idle hysteresis is less than that
    184	 * interrupt service latency, the hardware will automatically gate
    185	 * the power well and we will then incur the wake up cost on top of
    186	 * the service latency. A similar guide from plane_state is that we
    187	 * do not want the enable hysteresis to less than the wakeup latency.
    188	 *
    189	 * igt/gem_exec_nop/sequential provides a rough estimate for the
    190	 * service latency, and puts it around 10us for Broadwell (and other
    191	 * big core) and around 40us for Broxton (and other low power cores).
    192	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
    193	 * However, the wakeup latency on Broxton is closer to 100us. To be
    194	 * conservative, we have to factor in a context switch on top (due
    195	 * to ksoftirqd).
    196	 */
    197	set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
    198	set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
    199
    200	/* 3a: Enable RC6 */
    201	set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
    202
    203	rc6->ctl_enable =
    204		GEN6_RC_CTL_HW_ENABLE |
    205		GEN6_RC_CTL_RC6_ENABLE |
    206		GEN6_RC_CTL_EI_MODE(1);
    207
    208	/*
    209	 * WaRsDisableCoarsePowerGating:skl,cnl
    210	 *   - Render/Media PG need to be disabled with RC6.
    211	 */
    212	if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
    213		set(uncore, GEN9_PG_ENABLE,
    214		    GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
    215}
    216
    217static void gen8_rc6_enable(struct intel_rc6 *rc6)
    218{
    219	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    220	struct intel_engine_cs *engine;
    221	enum intel_engine_id id;
    222
    223	/* 2b: Program RC6 thresholds.*/
    224	set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
    225	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
    226	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
    227	for_each_engine(engine, rc6_to_gt(rc6), id)
    228		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
    229	set(uncore, GEN6_RC_SLEEP, 0);
    230	set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
    231
    232	/* 3: Enable RC6 */
    233	rc6->ctl_enable =
    234	    GEN6_RC_CTL_HW_ENABLE |
    235	    GEN7_RC_CTL_TO_MODE |
    236	    GEN6_RC_CTL_RC6_ENABLE;
    237}
    238
    239static void gen6_rc6_enable(struct intel_rc6 *rc6)
    240{
    241	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    242	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    243	struct intel_engine_cs *engine;
    244	enum intel_engine_id id;
    245	u32 rc6vids, rc6_mask;
    246	int ret;
    247
    248	set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
    249	set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
    250	set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
    251	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
    252	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
    253
    254	for_each_engine(engine, rc6_to_gt(rc6), id)
    255		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
    256
    257	set(uncore, GEN6_RC_SLEEP, 0);
    258	set(uncore, GEN6_RC1e_THRESHOLD, 1000);
    259	set(uncore, GEN6_RC6_THRESHOLD, 50000);
    260	set(uncore, GEN6_RC6p_THRESHOLD, 150000);
    261	set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
    262
    263	/* We don't use those on Haswell */
    264	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
    265	if (HAS_RC6p(i915))
    266		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
    267	if (HAS_RC6pp(i915))
    268		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
    269	rc6->ctl_enable =
    270	    rc6_mask |
    271	    GEN6_RC_CTL_EI_MODE(1) |
    272	    GEN6_RC_CTL_HW_ENABLE;
    273
    274	rc6vids = 0;
    275	ret = snb_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, &rc6vids, NULL);
    276	if (GRAPHICS_VER(i915) == 6 && ret) {
    277		drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n");
    278	} else if (GRAPHICS_VER(i915) == 6 &&
    279		   (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
    280		drm_dbg(&i915->drm,
    281			"You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
    282			GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
    283		rc6vids &= 0xffff00;
    284		rc6vids |= GEN6_ENCODE_RC6_VID(450);
    285		ret = snb_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
    286		if (ret)
    287			drm_err(&i915->drm,
    288				"Couldn't fix incorrect rc6 voltage\n");
    289	}
    290}
    291
    292/* Check that the pcbr address is not empty. */
    293static int chv_rc6_init(struct intel_rc6 *rc6)
    294{
    295	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    296	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    297	resource_size_t pctx_paddr, paddr;
    298	resource_size_t pctx_size = 32 * SZ_1K;
    299	u32 pcbr;
    300
    301	pcbr = intel_uncore_read(uncore, VLV_PCBR);
    302	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
    303		drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
    304		paddr = i915->dsm.end + 1 - pctx_size;
    305		GEM_BUG_ON(paddr > U32_MAX);
    306
    307		pctx_paddr = (paddr & ~4095);
    308		intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
    309	}
    310
    311	return 0;
    312}
    313
    314static int vlv_rc6_init(struct intel_rc6 *rc6)
    315{
    316	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    317	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    318	struct drm_i915_gem_object *pctx;
    319	resource_size_t pctx_paddr;
    320	resource_size_t pctx_size = 24 * SZ_1K;
    321	u32 pcbr;
    322
    323	pcbr = intel_uncore_read(uncore, VLV_PCBR);
    324	if (pcbr) {
    325		/* BIOS set it up already, grab the pre-alloc'd space */
    326		resource_size_t pcbr_offset;
    327
    328		pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
    329		pctx = i915_gem_object_create_region_at(i915->mm.stolen_region,
    330							pcbr_offset,
    331							pctx_size,
    332							0);
    333		if (IS_ERR(pctx))
    334			return PTR_ERR(pctx);
    335
    336		goto out;
    337	}
    338
    339	drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
    340
    341	/*
    342	 * From the Gunit register HAS:
    343	 * The Gfx driver is expected to program this register and ensure
    344	 * proper allocation within Gfx stolen memory.  For example, this
    345	 * register should be programmed such than the PCBR range does not
    346	 * overlap with other ranges, such as the frame buffer, protected
    347	 * memory, or any other relevant ranges.
    348	 */
    349	pctx = i915_gem_object_create_stolen(i915, pctx_size);
    350	if (IS_ERR(pctx)) {
    351		drm_dbg(&i915->drm,
    352			"not enough stolen space for PCTX, disabling\n");
    353		return PTR_ERR(pctx);
    354	}
    355
    356	GEM_BUG_ON(range_overflows_end_t(u64,
    357					 i915->dsm.start,
    358					 pctx->stolen->start,
    359					 U32_MAX));
    360	pctx_paddr = i915->dsm.start + pctx->stolen->start;
    361	intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
    362
    363out:
    364	rc6->pctx = pctx;
    365	return 0;
    366}
    367
    368static void chv_rc6_enable(struct intel_rc6 *rc6)
    369{
    370	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    371	struct intel_engine_cs *engine;
    372	enum intel_engine_id id;
    373
    374	/* 2a: Program RC6 thresholds.*/
    375	set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
    376	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
    377	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
    378
    379	for_each_engine(engine, rc6_to_gt(rc6), id)
    380		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
    381	set(uncore, GEN6_RC_SLEEP, 0);
    382
    383	/* TO threshold set to 500 us (0x186 * 1.28 us) */
    384	set(uncore, GEN6_RC6_THRESHOLD, 0x186);
    385
    386	/* Allows RC6 residency counter to work */
    387	set(uncore, VLV_COUNTER_CONTROL,
    388	    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
    389			       VLV_MEDIA_RC6_COUNT_EN |
    390			       VLV_RENDER_RC6_COUNT_EN));
    391
    392	/* 3: Enable RC6 */
    393	rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
    394}
    395
    396static void vlv_rc6_enable(struct intel_rc6 *rc6)
    397{
    398	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    399	struct intel_engine_cs *engine;
    400	enum intel_engine_id id;
    401
    402	set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
    403	set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
    404	set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
    405
    406	for_each_engine(engine, rc6_to_gt(rc6), id)
    407		set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
    408
    409	set(uncore, GEN6_RC6_THRESHOLD, 0x557);
    410
    411	/* Allows RC6 residency counter to work */
    412	set(uncore, VLV_COUNTER_CONTROL,
    413	    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
    414			       VLV_MEDIA_RC0_COUNT_EN |
    415			       VLV_RENDER_RC0_COUNT_EN |
    416			       VLV_MEDIA_RC6_COUNT_EN |
    417			       VLV_RENDER_RC6_COUNT_EN));
    418
    419	rc6->ctl_enable =
    420	    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
    421}
    422
    423static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
    424{
    425	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    426	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    427	u32 rc6_ctx_base, rc_ctl, rc_sw_target;
    428	bool enable_rc6 = true;
    429
    430	rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
    431	rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
    432	rc_sw_target &= RC_SW_TARGET_STATE_MASK;
    433	rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
    434	drm_dbg(&i915->drm, "BIOS enabled RC states: "
    435			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
    436			 str_on_off(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
    437			 str_on_off(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
    438			 rc_sw_target);
    439
    440	if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
    441		drm_dbg(&i915->drm, "RC6 Base location not set properly.\n");
    442		enable_rc6 = false;
    443	}
    444
    445	/*
    446	 * The exact context size is not known for BXT, so assume a page size
    447	 * for this check.
    448	 */
    449	rc6_ctx_base =
    450		intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
    451	if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
    452	      rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
    453		drm_dbg(&i915->drm, "RC6 Base address not as expected.\n");
    454		enable_rc6 = false;
    455	}
    456
    457	if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT(RENDER_RING_BASE)) & IDLE_TIME_MASK) > 1 &&
    458	      (intel_uncore_read(uncore, PWRCTX_MAXCNT(GEN6_BSD_RING_BASE)) & IDLE_TIME_MASK) > 1 &&
    459	      (intel_uncore_read(uncore, PWRCTX_MAXCNT(BLT_RING_BASE)) & IDLE_TIME_MASK) > 1 &&
    460	      (intel_uncore_read(uncore, PWRCTX_MAXCNT(VEBOX_RING_BASE)) & IDLE_TIME_MASK) > 1)) {
    461		drm_dbg(&i915->drm,
    462			"Engine Idle wait time not set properly.\n");
    463		enable_rc6 = false;
    464	}
    465
    466	if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
    467	    !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
    468	    !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
    469		drm_dbg(&i915->drm, "Pushbus not setup properly.\n");
    470		enable_rc6 = false;
    471	}
    472
    473	if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
    474		drm_dbg(&i915->drm, "GFX pause not setup properly.\n");
    475		enable_rc6 = false;
    476	}
    477
    478	if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
    479		drm_dbg(&i915->drm, "GPM control not setup properly.\n");
    480		enable_rc6 = false;
    481	}
    482
    483	return enable_rc6;
    484}
    485
    486static bool rc6_supported(struct intel_rc6 *rc6)
    487{
    488	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    489
    490	if (!HAS_RC6(i915))
    491		return false;
    492
    493	if (intel_vgpu_active(i915))
    494		return false;
    495
    496	if (is_mock_gt(rc6_to_gt(rc6)))
    497		return false;
    498
    499	if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
    500		drm_notice(&i915->drm,
    501			   "RC6 and powersaving disabled by BIOS\n");
    502		return false;
    503	}
    504
    505	return true;
    506}
    507
    508static void rpm_get(struct intel_rc6 *rc6)
    509{
    510	GEM_BUG_ON(rc6->wakeref);
    511	pm_runtime_get_sync(rc6_to_i915(rc6)->drm.dev);
    512	rc6->wakeref = true;
    513}
    514
    515static void rpm_put(struct intel_rc6 *rc6)
    516{
    517	GEM_BUG_ON(!rc6->wakeref);
    518	pm_runtime_put(rc6_to_i915(rc6)->drm.dev);
    519	rc6->wakeref = false;
    520}
    521
    522static bool pctx_corrupted(struct intel_rc6 *rc6)
    523{
    524	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    525
    526	if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
    527		return false;
    528
    529	if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
    530		return false;
    531
    532	drm_notice(&i915->drm,
    533		   "RC6 context corruption, disabling runtime power management\n");
    534	return true;
    535}
    536
    537static void __intel_rc6_disable(struct intel_rc6 *rc6)
    538{
    539	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    540	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    541	struct intel_gt *gt = rc6_to_gt(rc6);
    542
    543	/* Take control of RC6 back from GuC */
    544	intel_guc_rc_disable(&gt->uc.guc);
    545
    546	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
    547	if (GRAPHICS_VER(i915) >= 9)
    548		set(uncore, GEN9_PG_ENABLE, 0);
    549	set(uncore, GEN6_RC_CONTROL, 0);
    550	set(uncore, GEN6_RC_STATE, 0);
    551	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
    552}
    553
    554void intel_rc6_init(struct intel_rc6 *rc6)
    555{
    556	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    557	int err;
    558
    559	/* Disable runtime-pm until we can save the GPU state with rc6 pctx */
    560	rpm_get(rc6);
    561
    562	if (!rc6_supported(rc6))
    563		return;
    564
    565	if (IS_CHERRYVIEW(i915))
    566		err = chv_rc6_init(rc6);
    567	else if (IS_VALLEYVIEW(i915))
    568		err = vlv_rc6_init(rc6);
    569	else
    570		err = 0;
    571
    572	/* Sanitize rc6, ensure it is disabled before we are ready. */
    573	__intel_rc6_disable(rc6);
    574
    575	rc6->supported = err == 0;
    576}
    577
    578void intel_rc6_sanitize(struct intel_rc6 *rc6)
    579{
    580	memset(rc6->prev_hw_residency, 0, sizeof(rc6->prev_hw_residency));
    581
    582	if (rc6->enabled) { /* unbalanced suspend/resume */
    583		rpm_get(rc6);
    584		rc6->enabled = false;
    585	}
    586
    587	if (rc6->supported)
    588		__intel_rc6_disable(rc6);
    589}
    590
    591void intel_rc6_enable(struct intel_rc6 *rc6)
    592{
    593	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    594	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    595
    596	if (!rc6->supported)
    597		return;
    598
    599	GEM_BUG_ON(rc6->enabled);
    600
    601	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
    602
    603	if (IS_CHERRYVIEW(i915))
    604		chv_rc6_enable(rc6);
    605	else if (IS_VALLEYVIEW(i915))
    606		vlv_rc6_enable(rc6);
    607	else if (GRAPHICS_VER(i915) >= 11)
    608		gen11_rc6_enable(rc6);
    609	else if (GRAPHICS_VER(i915) >= 9)
    610		gen9_rc6_enable(rc6);
    611	else if (IS_BROADWELL(i915))
    612		gen8_rc6_enable(rc6);
    613	else if (GRAPHICS_VER(i915) >= 6)
    614		gen6_rc6_enable(rc6);
    615
    616	rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE;
    617	if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
    618		rc6->ctl_enable = 0;
    619
    620	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
    621
    622	if (unlikely(pctx_corrupted(rc6)))
    623		return;
    624
    625	/* rc6 is ready, runtime-pm is go! */
    626	rpm_put(rc6);
    627	rc6->enabled = true;
    628}
    629
    630void intel_rc6_unpark(struct intel_rc6 *rc6)
    631{
    632	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    633
    634	if (!rc6->enabled)
    635		return;
    636
    637	/* Restore HW timers for automatic RC6 entry while busy */
    638	set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
    639}
    640
    641void intel_rc6_park(struct intel_rc6 *rc6)
    642{
    643	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    644	unsigned int target;
    645
    646	if (!rc6->enabled)
    647		return;
    648
    649	if (unlikely(pctx_corrupted(rc6))) {
    650		intel_rc6_disable(rc6);
    651		return;
    652	}
    653
    654	if (!rc6->manual)
    655		return;
    656
    657	/* Turn off the HW timers and go directly to rc6 */
    658	set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
    659
    660	if (HAS_RC6pp(rc6_to_i915(rc6)))
    661		target = 0x6; /* deepest rc6 */
    662	else if (HAS_RC6p(rc6_to_i915(rc6)))
    663		target = 0x5; /* deep rc6 */
    664	else
    665		target = 0x4; /* normal rc6 */
    666	set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
    667}
    668
    669void intel_rc6_disable(struct intel_rc6 *rc6)
    670{
    671	if (!rc6->enabled)
    672		return;
    673
    674	rpm_get(rc6);
    675	rc6->enabled = false;
    676
    677	__intel_rc6_disable(rc6);
    678}
    679
    680void intel_rc6_fini(struct intel_rc6 *rc6)
    681{
    682	struct drm_i915_gem_object *pctx;
    683
    684	intel_rc6_disable(rc6);
    685
    686	pctx = fetch_and_zero(&rc6->pctx);
    687	if (pctx)
    688		i915_gem_object_put(pctx);
    689
    690	if (rc6->wakeref)
    691		rpm_put(rc6);
    692}
    693
    694static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
    695{
    696	u32 lower, upper, tmp;
    697	int loop = 2;
    698
    699	/*
    700	 * The register accessed do not need forcewake. We borrow
    701	 * uncore lock to prevent concurrent access to range reg.
    702	 */
    703	lockdep_assert_held(&uncore->lock);
    704
    705	/*
    706	 * vlv and chv residency counters are 40 bits in width.
    707	 * With a control bit, we can choose between upper or lower
    708	 * 32bit window into this counter.
    709	 *
    710	 * Although we always use the counter in high-range mode elsewhere,
    711	 * userspace may attempt to read the value before rc6 is initialised,
    712	 * before we have set the default VLV_COUNTER_CONTROL value. So always
    713	 * set the high bit to be safe.
    714	 */
    715	set(uncore, VLV_COUNTER_CONTROL,
    716	    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
    717	upper = intel_uncore_read_fw(uncore, reg);
    718	do {
    719		tmp = upper;
    720
    721		set(uncore, VLV_COUNTER_CONTROL,
    722		    _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
    723		lower = intel_uncore_read_fw(uncore, reg);
    724
    725		set(uncore, VLV_COUNTER_CONTROL,
    726		    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
    727		upper = intel_uncore_read_fw(uncore, reg);
    728	} while (upper != tmp && --loop);
    729
    730	/*
    731	 * Everywhere else we always use VLV_COUNTER_CONTROL with the
    732	 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
    733	 * now.
    734	 */
    735
    736	return lower | (u64)upper << 8;
    737}
    738
    739u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
    740{
    741	struct drm_i915_private *i915 = rc6_to_i915(rc6);
    742	struct intel_uncore *uncore = rc6_to_uncore(rc6);
    743	u64 time_hw, prev_hw, overflow_hw;
    744	unsigned int fw_domains;
    745	unsigned long flags;
    746	unsigned int i;
    747	u32 mul, div;
    748
    749	if (!rc6->supported)
    750		return 0;
    751
    752	/*
    753	 * Store previous hw counter values for counter wrap-around handling.
    754	 *
    755	 * There are only four interesting registers and they live next to each
    756	 * other so we can use the relative address, compared to the smallest
    757	 * one as the index into driver storage.
    758	 */
    759	i = (i915_mmio_reg_offset(reg) -
    760	     i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
    761	if (drm_WARN_ON_ONCE(&i915->drm, i >= ARRAY_SIZE(rc6->cur_residency)))
    762		return 0;
    763
    764	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
    765
    766	spin_lock_irqsave(&uncore->lock, flags);
    767	intel_uncore_forcewake_get__locked(uncore, fw_domains);
    768
    769	/* On VLV and CHV, residency time is in CZ units rather than 1.28us */
    770	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
    771		mul = 1000000;
    772		div = i915->czclk_freq;
    773		overflow_hw = BIT_ULL(40);
    774		time_hw = vlv_residency_raw(uncore, reg);
    775	} else {
    776		/* 833.33ns units on Gen9LP, 1.28us elsewhere. */
    777		if (IS_GEN9_LP(i915)) {
    778			mul = 10000;
    779			div = 12;
    780		} else {
    781			mul = 1280;
    782			div = 1;
    783		}
    784
    785		overflow_hw = BIT_ULL(32);
    786		time_hw = intel_uncore_read_fw(uncore, reg);
    787	}
    788
    789	/*
    790	 * Counter wrap handling.
    791	 *
    792	 * But relying on a sufficient frequency of queries otherwise counters
    793	 * can still wrap.
    794	 */
    795	prev_hw = rc6->prev_hw_residency[i];
    796	rc6->prev_hw_residency[i] = time_hw;
    797
    798	/* RC6 delta from last sample. */
    799	if (time_hw >= prev_hw)
    800		time_hw -= prev_hw;
    801	else
    802		time_hw += overflow_hw - prev_hw;
    803
    804	/* Add delta to RC6 extended raw driver copy. */
    805	time_hw += rc6->cur_residency[i];
    806	rc6->cur_residency[i] = time_hw;
    807
    808	intel_uncore_forcewake_put__locked(uncore, fw_domains);
    809	spin_unlock_irqrestore(&uncore->lock, flags);
    810
    811	return mul_u64_u32_div(time_hw, mul, div);
    812}
    813
    814u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
    815{
    816	return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
    817}
    818
    819#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    820#include "selftest_rc6.c"
    821#endif