cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel_bw.c (30871B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2019 Intel Corporation
      4 */
      5
      6#include <drm/drm_atomic_state_helper.h>
      7
      8#include "i915_reg.h"
      9#include "i915_utils.h"
     10#include "intel_atomic.h"
     11#include "intel_bw.h"
     12#include "intel_cdclk.h"
     13#include "intel_display_types.h"
     14#include "intel_mchbar_regs.h"
     15#include "intel_pcode.h"
     16#include "intel_pm.h"
     17
     18/* Parameters for Qclk Geyserville (QGV) */
     19struct intel_qgv_point {
     20	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
     21};
     22
     23struct intel_psf_gv_point {
     24	u8 clk; /* clock in multiples of 16.6666 MHz */
     25};
     26
     27struct intel_qgv_info {
     28	struct intel_qgv_point points[I915_NUM_QGV_POINTS];
     29	struct intel_psf_gv_point psf_points[I915_NUM_PSF_GV_POINTS];
     30	u8 num_points;
     31	u8 num_psf_points;
     32	u8 t_bl;
     33	u8 max_numchannels;
     34	u8 channel_width;
     35	u8 deinterleave;
     36};
     37
     38static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv,
     39					  struct intel_qgv_point *sp,
     40					  int point)
     41{
     42	u32 dclk_ratio, dclk_reference;
     43	u32 val;
     44
     45	val = intel_uncore_read(&dev_priv->uncore, SA_PERF_STATUS_0_0_0_MCHBAR_PC);
     46	dclk_ratio = REG_FIELD_GET(DG1_QCLK_RATIO_MASK, val);
     47	if (val & DG1_QCLK_REFERENCE)
     48		dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */
     49	else
     50		dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */
     51	sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + 500, 1000);
     52
     53	val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU);
     54	if (val & DG1_GEAR_TYPE)
     55		sp->dclk *= 2;
     56
     57	if (sp->dclk == 0)
     58		return -EINVAL;
     59
     60	val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR);
     61	sp->t_rp = REG_FIELD_GET(DG1_DRAM_T_RP_MASK, val);
     62	sp->t_rdpre = REG_FIELD_GET(DG1_DRAM_T_RDPRE_MASK, val);
     63
     64	val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH);
     65	sp->t_rcd = REG_FIELD_GET(DG1_DRAM_T_RCD_MASK, val);
     66	sp->t_ras = REG_FIELD_GET(DG1_DRAM_T_RAS_MASK, val);
     67
     68	sp->t_rc = sp->t_rp + sp->t_ras;
     69
     70	return 0;
     71}
     72
     73static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
     74					 struct intel_qgv_point *sp,
     75					 int point)
     76{
     77	u32 val = 0, val2 = 0;
     78	u16 dclk;
     79	int ret;
     80
     81	ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
     82			     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
     83			     &val, &val2);
     84	if (ret)
     85		return ret;
     86
     87	dclk = val & 0xffff;
     88	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000);
     89	sp->t_rp = (val & 0xff0000) >> 16;
     90	sp->t_rcd = (val & 0xff000000) >> 24;
     91
     92	sp->t_rdpre = val2 & 0xff;
     93	sp->t_ras = (val2 & 0xff00) >> 8;
     94
     95	sp->t_rc = sp->t_rp + sp->t_ras;
     96
     97	return 0;
     98}
     99
    100static int adls_pcode_read_psf_gv_point_info(struct drm_i915_private *dev_priv,
    101					    struct intel_psf_gv_point *points)
    102{
    103	u32 val = 0;
    104	int ret;
    105	int i;
    106
    107	ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
    108			     ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, &val, NULL);
    109	if (ret)
    110		return ret;
    111
    112	for (i = 0; i < I915_NUM_PSF_GV_POINTS; i++) {
    113		points[i].clk = val & 0xff;
    114		val >>= 8;
    115	}
    116
    117	return 0;
    118}
    119
    120int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv,
    121				  u32 points_mask)
    122{
    123	int ret;
    124
    125	/* bspec says to keep retrying for at least 1 ms */
    126	ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG,
    127				points_mask,
    128				ICL_PCODE_REP_QGV_MASK | ADLS_PCODE_REP_PSF_MASK,
    129				ICL_PCODE_REP_QGV_SAFE | ADLS_PCODE_REP_PSF_SAFE,
    130				1);
    131
    132	if (ret < 0) {
    133		drm_err(&dev_priv->drm, "Failed to disable qgv points (%d) points: 0x%x\n", ret, points_mask);
    134		return ret;
    135	}
    136
    137	return 0;
    138}
    139
    140static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
    141			      struct intel_qgv_info *qi,
    142			      bool is_y_tile)
    143{
    144	const struct dram_info *dram_info = &dev_priv->dram_info;
    145	int i, ret;
    146
    147	qi->num_points = dram_info->num_qgv_points;
    148	qi->num_psf_points = dram_info->num_psf_gv_points;
    149
    150	if (DISPLAY_VER(dev_priv) >= 12)
    151		switch (dram_info->type) {
    152		case INTEL_DRAM_DDR4:
    153			qi->t_bl = is_y_tile ? 8 : 4;
    154			qi->max_numchannels = 2;
    155			qi->channel_width = 64;
    156			qi->deinterleave = is_y_tile ? 1 : 2;
    157			break;
    158		case INTEL_DRAM_DDR5:
    159			qi->t_bl = is_y_tile ? 16 : 8;
    160			qi->max_numchannels = 4;
    161			qi->channel_width = 32;
    162			qi->deinterleave = is_y_tile ? 1 : 2;
    163			break;
    164		case INTEL_DRAM_LPDDR4:
    165			if (IS_ROCKETLAKE(dev_priv)) {
    166				qi->t_bl = 8;
    167				qi->max_numchannels = 4;
    168				qi->channel_width = 32;
    169				qi->deinterleave = 2;
    170				break;
    171			}
    172			fallthrough;
    173		case INTEL_DRAM_LPDDR5:
    174			qi->t_bl = 16;
    175			qi->max_numchannels = 8;
    176			qi->channel_width = 16;
    177			qi->deinterleave = is_y_tile ? 2 : 4;
    178			break;
    179		default:
    180			qi->t_bl = 16;
    181			qi->max_numchannels = 1;
    182			break;
    183		}
    184	else if (DISPLAY_VER(dev_priv) == 11) {
    185		qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8;
    186		qi->max_numchannels = 1;
    187	}
    188
    189	if (drm_WARN_ON(&dev_priv->drm,
    190			qi->num_points > ARRAY_SIZE(qi->points)))
    191		qi->num_points = ARRAY_SIZE(qi->points);
    192
    193	for (i = 0; i < qi->num_points; i++) {
    194		struct intel_qgv_point *sp = &qi->points[i];
    195
    196		if (IS_DG1(dev_priv))
    197			ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i);
    198		else
    199			ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
    200
    201		if (ret)
    202			return ret;
    203
    204		drm_dbg_kms(&dev_priv->drm,
    205			    "QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
    206			    i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
    207			    sp->t_rcd, sp->t_rc);
    208	}
    209
    210	if (qi->num_psf_points > 0) {
    211		ret = adls_pcode_read_psf_gv_point_info(dev_priv, qi->psf_points);
    212		if (ret) {
    213			drm_err(&dev_priv->drm, "Failed to read PSF point data; PSF points will not be considered in bandwidth calculations.\n");
    214			qi->num_psf_points = 0;
    215		}
    216
    217		for (i = 0; i < qi->num_psf_points; i++)
    218			drm_dbg_kms(&dev_priv->drm,
    219				    "PSF GV %d: CLK=%d \n",
    220				    i, qi->psf_points[i].clk);
    221	}
    222
    223	return 0;
    224}
    225
    226static int adl_calc_psf_bw(int clk)
    227{
    228	/*
    229	 * clk is multiples of 16.666MHz (100/6)
    230	 * According to BSpec PSF GV bandwidth is
    231	 * calculated as BW = 64 * clk * 16.666Mhz
    232	 */
    233	return DIV_ROUND_CLOSEST(64 * clk * 100, 6);
    234}
    235
    236static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
    237{
    238	u16 dclk = 0;
    239	int i;
    240
    241	for (i = 0; i < qi->num_points; i++)
    242		dclk = max(dclk, qi->points[i].dclk);
    243
    244	return dclk;
    245}
    246
    247struct intel_sa_info {
    248	u16 displayrtids;
    249	u8 deburst, deprogbwlimit, derating;
    250};
    251
    252static const struct intel_sa_info icl_sa_info = {
    253	.deburst = 8,
    254	.deprogbwlimit = 25, /* GB/s */
    255	.displayrtids = 128,
    256	.derating = 10,
    257};
    258
    259static const struct intel_sa_info tgl_sa_info = {
    260	.deburst = 16,
    261	.deprogbwlimit = 34, /* GB/s */
    262	.displayrtids = 256,
    263	.derating = 10,
    264};
    265
    266static const struct intel_sa_info rkl_sa_info = {
    267	.deburst = 8,
    268	.deprogbwlimit = 20, /* GB/s */
    269	.displayrtids = 128,
    270	.derating = 10,
    271};
    272
    273static const struct intel_sa_info adls_sa_info = {
    274	.deburst = 16,
    275	.deprogbwlimit = 38, /* GB/s */
    276	.displayrtids = 256,
    277	.derating = 10,
    278};
    279
    280static const struct intel_sa_info adlp_sa_info = {
    281	.deburst = 16,
    282	.deprogbwlimit = 38, /* GB/s */
    283	.displayrtids = 256,
    284	.derating = 20,
    285};
    286
    287static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
    288{
    289	struct intel_qgv_info qi = {};
    290	bool is_y_tile = true; /* assume y tile may be used */
    291	int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
    292	int ipqdepth, ipqdepthpch = 16;
    293	int dclk_max;
    294	int maxdebw;
    295	int num_groups = ARRAY_SIZE(dev_priv->max_bw);
    296	int i, ret;
    297
    298	ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
    299	if (ret) {
    300		drm_dbg_kms(&dev_priv->drm,
    301			    "Failed to get memory subsystem information, ignoring bandwidth limits");
    302		return ret;
    303	}
    304
    305	dclk_max = icl_sagv_max_dclk(&qi);
    306	maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10);
    307	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
    308	qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
    309
    310	for (i = 0; i < num_groups; i++) {
    311		struct intel_bw_info *bi = &dev_priv->max_bw[i];
    312		int clpchgroup;
    313		int j;
    314
    315		clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
    316		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
    317
    318		bi->num_qgv_points = qi.num_points;
    319		bi->num_psf_gv_points = qi.num_psf_points;
    320
    321		for (j = 0; j < qi.num_points; j++) {
    322			const struct intel_qgv_point *sp = &qi.points[j];
    323			int ct, bw;
    324
    325			/*
    326			 * Max row cycle time
    327			 *
    328			 * FIXME what is the logic behind the
    329			 * assumed burst length?
    330			 */
    331			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
    332				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
    333			bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);
    334
    335			bi->deratedbw[j] = min(maxdebw,
    336					       bw * (100 - sa->derating) / 100);
    337
    338			drm_dbg_kms(&dev_priv->drm,
    339				    "BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
    340				    i, j, bi->num_planes, bi->deratedbw[j]);
    341		}
    342	}
    343	/*
    344	 * In case if SAGV is disabled in BIOS, we always get 1
    345	 * SAGV point, but we can't send PCode commands to restrict it
    346	 * as it will fail and pointless anyway.
    347	 */
    348	if (qi.num_points == 1)
    349		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
    350	else
    351		dev_priv->sagv_status = I915_SAGV_ENABLED;
    352
    353	return 0;
    354}
    355
    356static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
    357{
    358	struct intel_qgv_info qi = {};
    359	const struct dram_info *dram_info = &dev_priv->dram_info;
    360	bool is_y_tile = true; /* assume y tile may be used */
    361	int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
    362	int ipqdepth, ipqdepthpch = 16;
    363	int dclk_max;
    364	int maxdebw, peakbw;
    365	int clperchgroup;
    366	int num_groups = ARRAY_SIZE(dev_priv->max_bw);
    367	int i, ret;
    368
    369	ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
    370	if (ret) {
    371		drm_dbg_kms(&dev_priv->drm,
    372			    "Failed to get memory subsystem information, ignoring bandwidth limits");
    373		return ret;
    374	}
    375
    376	if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5)
    377		num_channels *= 2;
    378
    379	qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
    380
    381	if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12)
    382		qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);
    383
    384	if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels)
    385		drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels.");
    386	if (qi.max_numchannels != 0)
    387		num_channels = min_t(u8, num_channels, qi.max_numchannels);
    388
    389	dclk_max = icl_sagv_max_dclk(&qi);
    390
    391	peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * dclk_max;
    392	maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */
    393
    394	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
    395	/*
    396	 * clperchgroup = 4kpagespermempage * clperchperblock,
    397	 * clperchperblock = 8 / num_channels * interleave
    398	 */
    399	clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave;
    400
    401	for (i = 0; i < num_groups; i++) {
    402		struct intel_bw_info *bi = &dev_priv->max_bw[i];
    403		struct intel_bw_info *bi_next;
    404		int clpchgroup;
    405		int j;
    406
    407		if (i < num_groups - 1)
    408			bi_next = &dev_priv->max_bw[i + 1];
    409
    410		clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
    411
    412		if (i < num_groups - 1 && clpchgroup < clperchgroup)
    413			bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
    414		else
    415			bi_next->num_planes = 0;
    416
    417		bi->num_qgv_points = qi.num_points;
    418		bi->num_psf_gv_points = qi.num_psf_points;
    419
    420		for (j = 0; j < qi.num_points; j++) {
    421			const struct intel_qgv_point *sp = &qi.points[j];
    422			int ct, bw;
    423
    424			/*
    425			 * Max row cycle time
    426			 *
    427			 * FIXME what is the logic behind the
    428			 * assumed burst length?
    429			 */
    430			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
    431				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
    432			bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);
    433
    434			bi->deratedbw[j] = min(maxdebw,
    435					       bw * (100 - sa->derating) / 100);
    436
    437			drm_dbg_kms(&dev_priv->drm,
    438				    "BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
    439				    i, j, bi->num_planes, bi->deratedbw[j]);
    440		}
    441
    442		for (j = 0; j < qi.num_psf_points; j++) {
    443			const struct intel_psf_gv_point *sp = &qi.psf_points[j];
    444
    445			bi->psf_bw[j] = adl_calc_psf_bw(sp->clk);
    446
    447			drm_dbg_kms(&dev_priv->drm,
    448				    "BW%d / PSF GV %d: num_planes=%d bw=%u\n",
    449				    i, j, bi->num_planes, bi->psf_bw[j]);
    450		}
    451	}
    452
    453	/*
    454	 * In case if SAGV is disabled in BIOS, we always get 1
    455	 * SAGV point, but we can't send PCode commands to restrict it
    456	 * as it will fail and pointless anyway.
    457	 */
    458	if (qi.num_points == 1)
    459		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
    460	else
    461		dev_priv->sagv_status = I915_SAGV_ENABLED;
    462
    463	return 0;
    464}
    465
    466static void dg2_get_bw_info(struct drm_i915_private *i915)
    467{
    468	unsigned int deratedbw = IS_DG2_G11(i915) ? 38000 : 50000;
    469	int num_groups = ARRAY_SIZE(i915->max_bw);
    470	int i;
    471
    472	/*
    473	 * DG2 doesn't have SAGV or QGV points, just a constant max bandwidth
    474	 * that doesn't depend on the number of planes enabled. So fill all the
    475	 * plane group with constant bw information for uniformity with other
    476	 * platforms. DG2-G10 platforms have a constant 50 GB/s bandwidth,
    477	 * whereas DG2-G11 platforms have 38 GB/s.
    478	 */
    479	for (i = 0; i < num_groups; i++) {
    480		struct intel_bw_info *bi = &i915->max_bw[i];
    481
    482		bi->num_planes = 1;
    483		/* Need only one dummy QGV point per group */
    484		bi->num_qgv_points = 1;
    485		bi->deratedbw[0] = deratedbw;
    486	}
    487
    488	i915->sagv_status = I915_SAGV_NOT_CONTROLLED;
    489}
    490
    491static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
    492			       int num_planes, int qgv_point)
    493{
    494	int i;
    495
    496	/*
    497	 * Let's return max bw for 0 planes
    498	 */
    499	num_planes = max(1, num_planes);
    500
    501	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
    502		const struct intel_bw_info *bi =
    503			&dev_priv->max_bw[i];
    504
    505		/*
    506		 * Pcode will not expose all QGV points when
    507		 * SAGV is forced to off/min/med/max.
    508		 */
    509		if (qgv_point >= bi->num_qgv_points)
    510			return UINT_MAX;
    511
    512		if (num_planes >= bi->num_planes)
    513			return bi->deratedbw[qgv_point];
    514	}
    515
    516	return 0;
    517}
    518
    519static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
    520			       int num_planes, int qgv_point)
    521{
    522	int i;
    523
    524	/*
    525	 * Let's return max bw for 0 planes
    526	 */
    527	num_planes = max(1, num_planes);
    528
    529	for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) {
    530		const struct intel_bw_info *bi =
    531			&dev_priv->max_bw[i];
    532
    533		/*
    534		 * Pcode will not expose all QGV points when
    535		 * SAGV is forced to off/min/med/max.
    536		 */
    537		if (qgv_point >= bi->num_qgv_points)
    538			return UINT_MAX;
    539
    540		if (num_planes <= bi->num_planes)
    541			return bi->deratedbw[qgv_point];
    542	}
    543
    544	return dev_priv->max_bw[0].deratedbw[qgv_point];
    545}
    546
    547static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv,
    548			       int psf_gv_point)
    549{
    550	const struct intel_bw_info *bi =
    551			&dev_priv->max_bw[0];
    552
    553	return bi->psf_bw[psf_gv_point];
    554}
    555
    556void intel_bw_init_hw(struct drm_i915_private *dev_priv)
    557{
    558	if (!HAS_DISPLAY(dev_priv))
    559		return;
    560
    561	if (IS_DG2(dev_priv))
    562		dg2_get_bw_info(dev_priv);
    563	else if (IS_ALDERLAKE_P(dev_priv))
    564		tgl_get_bw_info(dev_priv, &adlp_sa_info);
    565	else if (IS_ALDERLAKE_S(dev_priv))
    566		tgl_get_bw_info(dev_priv, &adls_sa_info);
    567	else if (IS_ROCKETLAKE(dev_priv))
    568		tgl_get_bw_info(dev_priv, &rkl_sa_info);
    569	else if (DISPLAY_VER(dev_priv) == 12)
    570		tgl_get_bw_info(dev_priv, &tgl_sa_info);
    571	else if (DISPLAY_VER(dev_priv) == 11)
    572		icl_get_bw_info(dev_priv, &icl_sa_info);
    573}
    574
    575static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
    576{
    577	/*
    578	 * We assume cursors are small enough
    579	 * to not not cause bandwidth problems.
    580	 */
    581	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
    582}
    583
    584static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
    585{
    586	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
    587	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
    588	unsigned int data_rate = 0;
    589	enum plane_id plane_id;
    590
    591	for_each_plane_id_on_crtc(crtc, plane_id) {
    592		/*
    593		 * We assume cursors are small enough
    594		 * to not not cause bandwidth problems.
    595		 */
    596		if (plane_id == PLANE_CURSOR)
    597			continue;
    598
    599		data_rate += crtc_state->data_rate[plane_id];
    600
    601		if (DISPLAY_VER(i915) < 11)
    602			data_rate += crtc_state->data_rate_y[plane_id];
    603	}
    604
    605	return data_rate;
    606}
    607
    608/* "Maximum Pipe Read Bandwidth" */
    609static int intel_bw_crtc_min_cdclk(const struct intel_crtc_state *crtc_state)
    610{
    611	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
    612	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
    613
    614	if (DISPLAY_VER(i915) < 12)
    615		return 0;
    616
    617	return DIV_ROUND_UP_ULL(mul_u32_u32(intel_bw_crtc_data_rate(crtc_state), 10), 512);
    618}
    619
    620void intel_bw_crtc_update(struct intel_bw_state *bw_state,
    621			  const struct intel_crtc_state *crtc_state)
    622{
    623	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
    624	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
    625
    626	bw_state->data_rate[crtc->pipe] =
    627		intel_bw_crtc_data_rate(crtc_state);
    628	bw_state->num_active_planes[crtc->pipe] =
    629		intel_bw_crtc_num_active_planes(crtc_state);
    630
    631	drm_dbg_kms(&i915->drm, "pipe %c data rate %u num active planes %u\n",
    632		    pipe_name(crtc->pipe),
    633		    bw_state->data_rate[crtc->pipe],
    634		    bw_state->num_active_planes[crtc->pipe]);
    635}
    636
    637static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
    638					       const struct intel_bw_state *bw_state)
    639{
    640	unsigned int num_active_planes = 0;
    641	enum pipe pipe;
    642
    643	for_each_pipe(dev_priv, pipe)
    644		num_active_planes += bw_state->num_active_planes[pipe];
    645
    646	return num_active_planes;
    647}
    648
    649static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
    650				       const struct intel_bw_state *bw_state)
    651{
    652	unsigned int data_rate = 0;
    653	enum pipe pipe;
    654
    655	for_each_pipe(dev_priv, pipe)
    656		data_rate += bw_state->data_rate[pipe];
    657
    658	if (DISPLAY_VER(dev_priv) >= 13 && i915_vtd_active(dev_priv))
    659		data_rate = DIV_ROUND_UP(data_rate * 105, 100);
    660
    661	return data_rate;
    662}
    663
    664struct intel_bw_state *
    665intel_atomic_get_old_bw_state(struct intel_atomic_state *state)
    666{
    667	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
    668	struct intel_global_state *bw_state;
    669
    670	bw_state = intel_atomic_get_old_global_obj_state(state, &dev_priv->bw_obj);
    671
    672	return to_intel_bw_state(bw_state);
    673}
    674
    675struct intel_bw_state *
    676intel_atomic_get_new_bw_state(struct intel_atomic_state *state)
    677{
    678	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
    679	struct intel_global_state *bw_state;
    680
    681	bw_state = intel_atomic_get_new_global_obj_state(state, &dev_priv->bw_obj);
    682
    683	return to_intel_bw_state(bw_state);
    684}
    685
    686struct intel_bw_state *
    687intel_atomic_get_bw_state(struct intel_atomic_state *state)
    688{
    689	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
    690	struct intel_global_state *bw_state;
    691
    692	bw_state = intel_atomic_get_global_obj_state(state, &dev_priv->bw_obj);
    693	if (IS_ERR(bw_state))
    694		return ERR_CAST(bw_state);
    695
    696	return to_intel_bw_state(bw_state);
    697}
    698
    699static bool intel_bw_state_changed(struct drm_i915_private *i915,
    700				   const struct intel_bw_state *old_bw_state,
    701				   const struct intel_bw_state *new_bw_state)
    702{
    703	enum pipe pipe;
    704
    705	for_each_pipe(i915, pipe) {
    706		const struct intel_dbuf_bw *old_crtc_bw =
    707			&old_bw_state->dbuf_bw[pipe];
    708		const struct intel_dbuf_bw *new_crtc_bw =
    709			&new_bw_state->dbuf_bw[pipe];
    710		enum dbuf_slice slice;
    711
    712		for_each_dbuf_slice(i915, slice) {
    713			if (old_crtc_bw->max_bw[slice] != new_crtc_bw->max_bw[slice] ||
    714			    old_crtc_bw->active_planes[slice] != new_crtc_bw->active_planes[slice])
    715				return true;
    716		}
    717
    718		if (old_bw_state->min_cdclk[pipe] != new_bw_state->min_cdclk[pipe])
    719			return true;
    720	}
    721
    722	return false;
    723}
    724
    725static void skl_plane_calc_dbuf_bw(struct intel_bw_state *bw_state,
    726				   struct intel_crtc *crtc,
    727				   enum plane_id plane_id,
    728				   const struct skl_ddb_entry *ddb,
    729				   unsigned int data_rate)
    730{
    731	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
    732	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
    733	unsigned int dbuf_mask = skl_ddb_dbuf_slice_mask(i915, ddb);
    734	enum dbuf_slice slice;
    735
    736	/*
    737	 * The arbiter can only really guarantee an
    738	 * equal share of the total bw to each plane.
    739	 */
    740	for_each_dbuf_slice_in_mask(i915, slice, dbuf_mask) {
    741		crtc_bw->max_bw[slice] = max(crtc_bw->max_bw[slice], data_rate);
    742		crtc_bw->active_planes[slice] |= BIT(plane_id);
    743	}
    744}
    745
    746static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
    747				  const struct intel_crtc_state *crtc_state)
    748{
    749	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
    750	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
    751	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
    752	enum plane_id plane_id;
    753
    754	memset(crtc_bw, 0, sizeof(*crtc_bw));
    755
    756	if (!crtc_state->hw.active)
    757		return;
    758
    759	for_each_plane_id_on_crtc(crtc, plane_id) {
    760		/*
    761		 * We assume cursors are small enough
    762		 * to not cause bandwidth problems.
    763		 */
    764		if (plane_id == PLANE_CURSOR)
    765			continue;
    766
    767		skl_plane_calc_dbuf_bw(bw_state, crtc, plane_id,
    768				       &crtc_state->wm.skl.plane_ddb[plane_id],
    769				       crtc_state->data_rate[plane_id]);
    770
    771		if (DISPLAY_VER(i915) < 11)
    772			skl_plane_calc_dbuf_bw(bw_state, crtc, plane_id,
    773					       &crtc_state->wm.skl.plane_ddb_y[plane_id],
    774					       crtc_state->data_rate[plane_id]);
    775	}
    776}
    777
    778/* "Maximum Data Buffer Bandwidth" */
    779static int
    780intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
    781			const struct intel_bw_state *bw_state)
    782{
    783	unsigned int total_max_bw = 0;
    784	enum dbuf_slice slice;
    785
    786	for_each_dbuf_slice(i915, slice) {
    787		int num_active_planes = 0;
    788		unsigned int max_bw = 0;
    789		enum pipe pipe;
    790
    791		/*
    792		 * The arbiter can only really guarantee an
    793		 * equal share of the total bw to each plane.
    794		 */
    795		for_each_pipe(i915, pipe) {
    796			const struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[pipe];
    797
    798			max_bw = max(crtc_bw->max_bw[slice], max_bw);
    799			num_active_planes += hweight8(crtc_bw->active_planes[slice]);
    800		}
    801		max_bw *= num_active_planes;
    802
    803		total_max_bw = max(total_max_bw, max_bw);
    804	}
    805
    806	return DIV_ROUND_UP(total_max_bw, 64);
    807}
    808
    809int intel_bw_min_cdclk(struct drm_i915_private *i915,
    810		       const struct intel_bw_state *bw_state)
    811{
    812	enum pipe pipe;
    813	int min_cdclk;
    814
    815	min_cdclk = intel_bw_dbuf_min_cdclk(i915, bw_state);
    816
    817	for_each_pipe(i915, pipe)
    818		min_cdclk = max(bw_state->min_cdclk[pipe], min_cdclk);
    819
    820	return min_cdclk;
    821}
    822
    823int intel_bw_calc_min_cdclk(struct intel_atomic_state *state,
    824			    bool *need_cdclk_calc)
    825{
    826	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
    827	struct intel_bw_state *new_bw_state = NULL;
    828	const struct intel_bw_state *old_bw_state = NULL;
    829	const struct intel_cdclk_state *cdclk_state;
    830	const struct intel_crtc_state *crtc_state;
    831	int old_min_cdclk, new_min_cdclk;
    832	struct intel_crtc *crtc;
    833	int i;
    834
    835	if (DISPLAY_VER(dev_priv) < 9)
    836		return 0;
    837
    838	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
    839		new_bw_state = intel_atomic_get_bw_state(state);
    840		if (IS_ERR(new_bw_state))
    841			return PTR_ERR(new_bw_state);
    842
    843		old_bw_state = intel_atomic_get_old_bw_state(state);
    844
    845		skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state);
    846
    847		new_bw_state->min_cdclk[crtc->pipe] =
    848			intel_bw_crtc_min_cdclk(crtc_state);
    849	}
    850
    851	if (!old_bw_state)
    852		return 0;
    853
    854	if (intel_bw_state_changed(dev_priv, old_bw_state, new_bw_state)) {
    855		int ret = intel_atomic_lock_global_state(&new_bw_state->base);
    856		if (ret)
    857			return ret;
    858	}
    859
    860	old_min_cdclk = intel_bw_min_cdclk(dev_priv, old_bw_state);
    861	new_min_cdclk = intel_bw_min_cdclk(dev_priv, new_bw_state);
    862
    863	/*
    864	 * No need to check against the cdclk state if
    865	 * the min cdclk doesn't increase.
    866	 *
    867	 * Ie. we only ever increase the cdclk due to bandwidth
    868	 * requirements. This can reduce back and forth
    869	 * display blinking due to constant cdclk changes.
    870	 */
    871	if (new_min_cdclk <= old_min_cdclk)
    872		return 0;
    873
    874	cdclk_state = intel_atomic_get_cdclk_state(state);
    875	if (IS_ERR(cdclk_state))
    876		return PTR_ERR(cdclk_state);
    877
    878	/*
    879	 * No need to recalculate the cdclk state if
    880	 * the min cdclk doesn't increase.
    881	 *
    882	 * Ie. we only ever increase the cdclk due to bandwidth
    883	 * requirements. This can reduce back and forth
    884	 * display blinking due to constant cdclk changes.
    885	 */
    886	if (new_min_cdclk <= cdclk_state->bw_min_cdclk)
    887		return 0;
    888
    889	drm_dbg_kms(&dev_priv->drm,
    890		    "new bandwidth min cdclk (%d kHz) > old min cdclk (%d kHz)\n",
    891		    new_min_cdclk, cdclk_state->bw_min_cdclk);
    892	*need_cdclk_calc = true;
    893
    894	return 0;
    895}
    896
    897static u16 icl_qgv_points_mask(struct drm_i915_private *i915)
    898{
    899	unsigned int num_psf_gv_points = i915->max_bw[0].num_psf_gv_points;
    900	unsigned int num_qgv_points = i915->max_bw[0].num_qgv_points;
    901	u16 qgv_points = 0, psf_points = 0;
    902
    903	/*
    904	 * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects
    905	 * it with failure if we try masking any unadvertised points.
    906	 * So need to operate only with those returned from PCode.
    907	 */
    908	if (num_qgv_points > 0)
    909		qgv_points = GENMASK(num_qgv_points - 1, 0);
    910
    911	if (num_psf_gv_points > 0)
    912		psf_points = GENMASK(num_psf_gv_points - 1, 0);
    913
    914	return ICL_PCODE_REQ_QGV_PT(qgv_points) | ADLS_PCODE_REQ_PSF_PT(psf_points);
    915}
    916
    917static int intel_bw_check_data_rate(struct intel_atomic_state *state, bool *changed)
    918{
    919	struct drm_i915_private *i915 = to_i915(state->base.dev);
    920	const struct intel_crtc_state *new_crtc_state, *old_crtc_state;
    921	struct intel_crtc *crtc;
    922	int i;
    923
    924	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
    925					    new_crtc_state, i) {
    926		unsigned int old_data_rate =
    927			intel_bw_crtc_data_rate(old_crtc_state);
    928		unsigned int new_data_rate =
    929			intel_bw_crtc_data_rate(new_crtc_state);
    930		unsigned int old_active_planes =
    931			intel_bw_crtc_num_active_planes(old_crtc_state);
    932		unsigned int new_active_planes =
    933			intel_bw_crtc_num_active_planes(new_crtc_state);
    934		struct intel_bw_state *new_bw_state;
    935
    936		/*
    937		 * Avoid locking the bw state when
    938		 * nothing significant has changed.
    939		 */
    940		if (old_data_rate == new_data_rate &&
    941		    old_active_planes == new_active_planes)
    942			continue;
    943
    944		new_bw_state = intel_atomic_get_bw_state(state);
    945		if (IS_ERR(new_bw_state))
    946			return PTR_ERR(new_bw_state);
    947
    948		new_bw_state->data_rate[crtc->pipe] = new_data_rate;
    949		new_bw_state->num_active_planes[crtc->pipe] = new_active_planes;
    950
    951		*changed = true;
    952
    953		drm_dbg_kms(&i915->drm,
    954			    "[CRTC:%d:%s] data rate %u num active planes %u\n",
    955			    crtc->base.base.id, crtc->base.name,
    956			    new_bw_state->data_rate[crtc->pipe],
    957			    new_bw_state->num_active_planes[crtc->pipe]);
    958	}
    959
    960	return 0;
    961}
    962
    963int intel_bw_atomic_check(struct intel_atomic_state *state)
    964{
    965	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
    966	const struct intel_bw_state *old_bw_state;
    967	struct intel_bw_state *new_bw_state;
    968	unsigned int data_rate;
    969	unsigned int num_active_planes;
    970	int i, ret;
    971	u16 qgv_points = 0, psf_points = 0;
    972	unsigned int max_bw_point = 0, max_bw = 0;
    973	unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points;
    974	unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points;
    975	bool changed = false;
    976
    977	/* FIXME earlier gens need some checks too */
    978	if (DISPLAY_VER(dev_priv) < 11)
    979		return 0;
    980
    981	ret = intel_bw_check_data_rate(state, &changed);
    982	if (ret)
    983		return ret;
    984
    985	old_bw_state = intel_atomic_get_old_bw_state(state);
    986	new_bw_state = intel_atomic_get_new_bw_state(state);
    987
    988	if (new_bw_state &&
    989	    intel_can_enable_sagv(dev_priv, old_bw_state) !=
    990	    intel_can_enable_sagv(dev_priv, new_bw_state))
    991		changed = true;
    992
    993	/*
    994	 * If none of our inputs (data rates, number of active
    995	 * planes, SAGV yes/no) changed then nothing to do here.
    996	 */
    997	if (!changed)
    998		return 0;
    999
   1000	ret = intel_atomic_lock_global_state(&new_bw_state->base);
   1001	if (ret)
   1002		return ret;
   1003
   1004	data_rate = intel_bw_data_rate(dev_priv, new_bw_state);
   1005	data_rate = DIV_ROUND_UP(data_rate, 1000);
   1006
   1007	num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state);
   1008
   1009	for (i = 0; i < num_qgv_points; i++) {
   1010		unsigned int max_data_rate;
   1011
   1012		if (DISPLAY_VER(dev_priv) > 11)
   1013			max_data_rate = tgl_max_bw(dev_priv, num_active_planes, i);
   1014		else
   1015			max_data_rate = icl_max_bw(dev_priv, num_active_planes, i);
   1016		/*
   1017		 * We need to know which qgv point gives us
   1018		 * maximum bandwidth in order to disable SAGV
   1019		 * if we find that we exceed SAGV block time
   1020		 * with watermarks. By that moment we already
   1021		 * have those, as it is calculated earlier in
   1022		 * intel_atomic_check,
   1023		 */
   1024		if (max_data_rate > max_bw) {
   1025			max_bw_point = i;
   1026			max_bw = max_data_rate;
   1027		}
   1028		if (max_data_rate >= data_rate)
   1029			qgv_points |= BIT(i);
   1030
   1031		drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n",
   1032			    i, max_data_rate, data_rate);
   1033	}
   1034
   1035	for (i = 0; i < num_psf_gv_points; i++) {
   1036		unsigned int max_data_rate = adl_psf_bw(dev_priv, i);
   1037
   1038		if (max_data_rate >= data_rate)
   1039			psf_points |= BIT(i);
   1040
   1041		drm_dbg_kms(&dev_priv->drm, "PSF GV point %d: max bw %d"
   1042			    " required %d\n",
   1043			    i, max_data_rate, data_rate);
   1044	}
   1045
   1046	/*
   1047	 * BSpec states that we always should have at least one allowed point
   1048	 * left, so if we couldn't - simply reject the configuration for obvious
   1049	 * reasons.
   1050	 */
   1051	if (qgv_points == 0) {
   1052		drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory"
   1053			    " bandwidth %d for display configuration(%d active planes).\n",
   1054			    data_rate, num_active_planes);
   1055		return -EINVAL;
   1056	}
   1057
   1058	if (num_psf_gv_points > 0 && psf_points == 0) {
   1059		drm_dbg_kms(&dev_priv->drm, "No PSF GV points provide sufficient memory"
   1060			    " bandwidth %d for display configuration(%d active planes).\n",
   1061			    data_rate, num_active_planes);
   1062		return -EINVAL;
   1063	}
   1064
   1065	/*
   1066	 * Leave only single point with highest bandwidth, if
   1067	 * we can't enable SAGV due to the increased memory latency it may
   1068	 * cause.
   1069	 */
   1070	if (!intel_can_enable_sagv(dev_priv, new_bw_state)) {
   1071		qgv_points = BIT(max_bw_point);
   1072		drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n",
   1073			    max_bw_point);
   1074	}
   1075
   1076	/*
   1077	 * We store the ones which need to be masked as that is what PCode
   1078	 * actually accepts as a parameter.
   1079	 */
   1080	new_bw_state->qgv_points_mask =
   1081		~(ICL_PCODE_REQ_QGV_PT(qgv_points) |
   1082		  ADLS_PCODE_REQ_PSF_PT(psf_points)) &
   1083		icl_qgv_points_mask(dev_priv);
   1084
   1085	/*
   1086	 * If the actual mask had changed we need to make sure that
   1087	 * the commits are serialized(in case this is a nomodeset, nonblocking)
   1088	 */
   1089	if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) {
   1090		ret = intel_atomic_serialize_global_state(&new_bw_state->base);
   1091		if (ret)
   1092			return ret;
   1093	}
   1094
   1095	return 0;
   1096}
   1097
   1098static struct intel_global_state *
   1099intel_bw_duplicate_state(struct intel_global_obj *obj)
   1100{
   1101	struct intel_bw_state *state;
   1102
   1103	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
   1104	if (!state)
   1105		return NULL;
   1106
   1107	return &state->base;
   1108}
   1109
   1110static void intel_bw_destroy_state(struct intel_global_obj *obj,
   1111				   struct intel_global_state *state)
   1112{
   1113	kfree(state);
   1114}
   1115
   1116static const struct intel_global_state_funcs intel_bw_funcs = {
   1117	.atomic_duplicate_state = intel_bw_duplicate_state,
   1118	.atomic_destroy_state = intel_bw_destroy_state,
   1119};
   1120
   1121int intel_bw_init(struct drm_i915_private *dev_priv)
   1122{
   1123	struct intel_bw_state *state;
   1124
   1125	state = kzalloc(sizeof(*state), GFP_KERNEL);
   1126	if (!state)
   1127		return -ENOMEM;
   1128
   1129	intel_atomic_global_obj_init(dev_priv, &dev_priv->bw_obj,
   1130				     &state->base, &intel_bw_funcs);
   1131
   1132	return 0;
   1133}