cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dcn30_hubbub.c (15680B)


      1/*
      2 * Copyright 2020 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 * Authors: AMD
     23 *
     24 */
     25
     26
     27#include "dm_services.h"
     28#include "reg_helper.h"
     29#include "dcn30_hubbub.h"
     30
     31
     32#define CTX \
     33	hubbub1->base.ctx
     34#define DC_LOGGER \
     35	hubbub1->base.ctx->logger
     36#define REG(reg)\
     37	hubbub1->regs->reg
     38
     39#undef FN
     40#define FN(reg_name, field_name) \
     41	hubbub1->shifts->field_name, hubbub1->masks->field_name
     42
     43#ifdef NUM_VMID
     44#undef NUM_VMID
     45#endif
     46#define NUM_VMID 16
     47
     48
     49static uint32_t convert_and_clamp(
     50	uint32_t wm_ns,
     51	uint32_t refclk_mhz,
     52	uint32_t clamp_value)
     53{
     54	uint32_t ret_val = 0;
     55	ret_val = wm_ns * refclk_mhz;
     56	ret_val /= 1000;
     57
     58	if (ret_val > clamp_value)
     59		ret_val = clamp_value;
     60
     61	return ret_val;
     62}
     63
     64int hubbub3_init_dchub_sys_ctx(struct hubbub *hubbub,
     65		struct dcn_hubbub_phys_addr_config *pa_config)
     66{
     67	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
     68	struct dcn_vmid_page_table_config phys_config;
     69
     70	REG_SET(DCN_VM_FB_LOCATION_BASE, 0,
     71			FB_BASE, pa_config->system_aperture.fb_base >> 24);
     72	REG_SET(DCN_VM_FB_LOCATION_TOP, 0,
     73			FB_TOP, pa_config->system_aperture.fb_top >> 24);
     74	REG_SET(DCN_VM_FB_OFFSET, 0,
     75			FB_OFFSET, pa_config->system_aperture.fb_offset >> 24);
     76	REG_SET(DCN_VM_AGP_BOT, 0,
     77			AGP_BOT, pa_config->system_aperture.agp_bot >> 24);
     78	REG_SET(DCN_VM_AGP_TOP, 0,
     79			AGP_TOP, pa_config->system_aperture.agp_top >> 24);
     80	REG_SET(DCN_VM_AGP_BASE, 0,
     81			AGP_BASE, pa_config->system_aperture.agp_base >> 24);
     82
     83	if (pa_config->gart_config.page_table_start_addr != pa_config->gart_config.page_table_end_addr) {
     84		phys_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr >> 12;
     85		phys_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr >> 12;
     86		phys_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr;
     87		phys_config.depth = 0;
     88		phys_config.block_size = 0;
     89		// Init VMID 0 based on PA config
     90		dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config);
     91	}
     92
     93	return NUM_VMID;
     94}
     95
     96bool hubbub3_program_watermarks(
     97		struct hubbub *hubbub,
     98		struct dcn_watermark_set *watermarks,
     99		unsigned int refclk_mhz,
    100		bool safe_to_lower)
    101{
    102	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
    103	bool wm_pending = false;
    104
    105	if (hubbub21_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
    106		wm_pending = true;
    107
    108	if (hubbub21_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
    109		wm_pending = true;
    110
    111	if (hubbub21_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
    112		wm_pending = true;
    113
    114	/*
    115	 * The DCHub arbiter has a mechanism to dynamically rate limit the DCHub request stream to the fabric.
    116	 * If the memory controller is fully utilized and the DCHub requestors are
    117	 * well ahead of their amortized schedule, then it is safe to prevent the next winner
    118	 * from being committed and sent to the fabric.
    119	 * The utilization of the memory controller is approximated by ensuring that
    120	 * the number of outstanding requests is greater than a threshold specified
    121	 * by the ARB_MIN_REQ_OUTSTANDING. To determine that the DCHub requestors are well ahead of the amortized schedule,
    122	 * the slack of the next winner is compared with the ARB_SAT_LEVEL in DLG RefClk cycles.
    123	 *
    124	 * TODO: Revisit request limit after figure out right number. request limit for Renoir isn't decided yet, set maximum value (0x1FF)
    125	 * to turn off it for now.
    126	 */
    127	REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0,
    128			DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz);
    129	REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
    130			DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);
    131
    132	hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
    133
    134	return wm_pending;
    135}
    136
    137bool hubbub3_dcc_support_swizzle(
    138		enum swizzle_mode_values swizzle,
    139		unsigned int bytes_per_element,
    140		enum segment_order *segment_order_horz,
    141		enum segment_order *segment_order_vert)
    142{
    143	bool standard_swizzle = false;
    144	bool display_swizzle = false;
    145	bool render_swizzle = false;
    146
    147	switch (swizzle) {
    148	case DC_SW_4KB_S:
    149	case DC_SW_64KB_S:
    150	case DC_SW_VAR_S:
    151	case DC_SW_4KB_S_X:
    152	case DC_SW_64KB_S_X:
    153	case DC_SW_VAR_S_X:
    154		standard_swizzle = true;
    155		break;
    156	case DC_SW_4KB_R:
    157	case DC_SW_64KB_R:
    158	case DC_SW_VAR_R:
    159	case DC_SW_4KB_R_X:
    160	case DC_SW_64KB_R_X:
    161	case DC_SW_VAR_R_X:
    162		render_swizzle = true;
    163		break;
    164	case DC_SW_4KB_D:
    165	case DC_SW_64KB_D:
    166	case DC_SW_VAR_D:
    167	case DC_SW_4KB_D_X:
    168	case DC_SW_64KB_D_X:
    169	case DC_SW_VAR_D_X:
    170		display_swizzle = true;
    171		break;
    172	default:
    173		break;
    174	}
    175
    176	if (standard_swizzle) {
    177		if (bytes_per_element == 1) {
    178			*segment_order_horz = segment_order__contiguous;
    179			*segment_order_vert = segment_order__na;
    180			return true;
    181		}
    182		if (bytes_per_element == 2) {
    183			*segment_order_horz = segment_order__non_contiguous;
    184			*segment_order_vert = segment_order__contiguous;
    185			return true;
    186		}
    187		if (bytes_per_element == 4) {
    188			*segment_order_horz = segment_order__non_contiguous;
    189			*segment_order_vert = segment_order__contiguous;
    190			return true;
    191		}
    192		if (bytes_per_element == 8) {
    193			*segment_order_horz = segment_order__na;
    194			*segment_order_vert = segment_order__contiguous;
    195			return true;
    196		}
    197	}
    198	if (render_swizzle) {
    199		if (bytes_per_element == 1) {
    200			*segment_order_horz = segment_order__contiguous;
    201			*segment_order_vert = segment_order__na;
    202			return true;
    203		}
    204		if (bytes_per_element == 2) {
    205			*segment_order_horz = segment_order__non_contiguous;
    206			*segment_order_vert = segment_order__contiguous;
    207			return true;
    208		}
    209		if (bytes_per_element == 4) {
    210			*segment_order_horz = segment_order__contiguous;
    211			*segment_order_vert = segment_order__non_contiguous;
    212			return true;
    213		}
    214		if (bytes_per_element == 8) {
    215			*segment_order_horz = segment_order__contiguous;
    216			*segment_order_vert = segment_order__non_contiguous;
    217			return true;
    218		}
    219	}
    220	if (display_swizzle && bytes_per_element == 8) {
    221		*segment_order_horz = segment_order__contiguous;
    222		*segment_order_vert = segment_order__non_contiguous;
    223		return true;
    224	}
    225
    226	return false;
    227}
    228
    229static void hubbub3_get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height,
    230		unsigned int bytes_per_element)
    231{
    232	/* copied from DML.  might want to refactor DML to leverage from DML */
    233	/* DML : get_blk256_size */
    234	if (bytes_per_element == 1) {
    235		*blk256_width = 16;
    236		*blk256_height = 16;
    237	} else if (bytes_per_element == 2) {
    238		*blk256_width = 16;
    239		*blk256_height = 8;
    240	} else if (bytes_per_element == 4) {
    241		*blk256_width = 8;
    242		*blk256_height = 8;
    243	} else if (bytes_per_element == 8) {
    244		*blk256_width = 8;
    245		*blk256_height = 4;
    246	}
    247}
    248
    249static void hubbub3_det_request_size(
    250		unsigned int detile_buf_size,
    251		unsigned int height,
    252		unsigned int width,
    253		unsigned int bpe,
    254		bool *req128_horz_wc,
    255		bool *req128_vert_wc)
    256{
    257	unsigned int blk256_height = 0;
    258	unsigned int blk256_width = 0;
    259	unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc;
    260
    261	hubbub3_get_blk256_size(&blk256_width, &blk256_height, bpe);
    262
    263	swath_bytes_horz_wc = width * blk256_height * bpe;
    264	swath_bytes_vert_wc = height * blk256_width * bpe;
    265
    266	*req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ?
    267			false : /* full 256B request */
    268			true; /* half 128b request */
    269
    270	*req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ?
    271			false : /* full 256B request */
    272			true; /* half 128b request */
    273}
    274
    275bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub,
    276		const struct dc_dcc_surface_param *input,
    277		struct dc_surface_dcc_cap *output)
    278{
    279	struct dc *dc = hubbub->ctx->dc;
    280	/* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */
    281	enum dcc_control dcc_control;
    282	unsigned int bpe;
    283	enum segment_order segment_order_horz, segment_order_vert;
    284	bool req128_horz_wc, req128_vert_wc;
    285
    286	memset(output, 0, sizeof(*output));
    287
    288	if (dc->debug.disable_dcc == DCC_DISABLE)
    289		return false;
    290
    291	if (!hubbub->funcs->dcc_support_pixel_format(input->format,
    292			&bpe))
    293		return false;
    294
    295	if (!hubbub->funcs->dcc_support_swizzle(input->swizzle_mode, bpe,
    296			&segment_order_horz, &segment_order_vert))
    297		return false;
    298
    299	hubbub3_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size,
    300			input->surface_size.height,  input->surface_size.width,
    301			bpe, &req128_horz_wc, &req128_vert_wc);
    302
    303	if (!req128_horz_wc && !req128_vert_wc) {
    304		dcc_control = dcc_control__256_256_xxx;
    305	} else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
    306		if (!req128_horz_wc)
    307			dcc_control = dcc_control__256_256_xxx;
    308		else if (segment_order_horz == segment_order__contiguous)
    309			dcc_control = dcc_control__128_128_xxx;
    310		else
    311			dcc_control = dcc_control__256_64_64;
    312	} else if (input->scan == SCAN_DIRECTION_VERTICAL) {
    313		if (!req128_vert_wc)
    314			dcc_control = dcc_control__256_256_xxx;
    315		else if (segment_order_vert == segment_order__contiguous)
    316			dcc_control = dcc_control__128_128_xxx;
    317		else
    318			dcc_control = dcc_control__256_64_64;
    319	} else {
    320		if ((req128_horz_wc &&
    321			segment_order_horz == segment_order__non_contiguous) ||
    322			(req128_vert_wc &&
    323			segment_order_vert == segment_order__non_contiguous))
    324			/* access_dir not known, must use most constraining */
    325			dcc_control = dcc_control__256_64_64;
    326		else
    327			/* reg128 is true for either horz and vert
    328			 * but segment_order is contiguous
    329			 */
    330			dcc_control = dcc_control__128_128_xxx;
    331	}
    332
    333	/* Exception for 64KB_R_X */
    334	if ((bpe == 2) && (input->swizzle_mode == DC_SW_64KB_R_X))
    335		dcc_control = dcc_control__128_128_xxx;
    336
    337	if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE &&
    338		dcc_control != dcc_control__256_256_xxx)
    339		return false;
    340
    341	switch (dcc_control) {
    342	case dcc_control__256_256_xxx:
    343		output->grph.rgb.max_uncompressed_blk_size = 256;
    344		output->grph.rgb.max_compressed_blk_size = 256;
    345		output->grph.rgb.independent_64b_blks = false;
    346		output->grph.rgb.dcc_controls.dcc_256_256_unconstrained = 1;
    347		output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
    348		break;
    349	case dcc_control__128_128_xxx:
    350		output->grph.rgb.max_uncompressed_blk_size = 128;
    351		output->grph.rgb.max_compressed_blk_size = 128;
    352		output->grph.rgb.independent_64b_blks = false;
    353		output->grph.rgb.dcc_controls.dcc_128_128_uncontrained = 1;
    354		output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
    355		break;
    356	case dcc_control__256_64_64:
    357		output->grph.rgb.max_uncompressed_blk_size = 256;
    358		output->grph.rgb.max_compressed_blk_size = 64;
    359		output->grph.rgb.independent_64b_blks = true;
    360		output->grph.rgb.dcc_controls.dcc_256_64_64 = 1;
    361		break;
    362	case dcc_control__256_128_128:
    363		output->grph.rgb.max_uncompressed_blk_size = 256;
    364		output->grph.rgb.max_compressed_blk_size = 128;
    365		output->grph.rgb.independent_64b_blks = false;
    366		output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
    367		break;
    368	}
    369	output->capable = true;
    370	output->const_color_support = true;
    371
    372	return true;
    373}
    374
    375void hubbub3_force_wm_propagate_to_pipes(struct hubbub *hubbub)
    376{
    377	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
    378	uint32_t refclk_mhz = hubbub->ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000;
    379	uint32_t prog_wm_value = convert_and_clamp(hubbub1->watermarks.a.urgent_ns,
    380			refclk_mhz, 0x1fffff);
    381
    382	REG_SET_2(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0,
    383			DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value,
    384			DCHUBBUB_ARB_VM_ROW_URGENCY_WATERMARK_A, prog_wm_value);
    385}
    386
    387void hubbub3_force_pstate_change_control(struct hubbub *hubbub,
    388		bool force, bool allow)
    389{
    390	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
    391
    392	REG_UPDATE_2(DCHUBBUB_ARB_DRAM_STATE_CNTL,
    393			DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, allow,
    394			DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, force);
    395}
    396
    397/* Copy values from WM set A to all other sets */
    398void hubbub3_init_watermarks(struct hubbub *hubbub)
    399{
    400	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
    401	uint32_t reg;
    402
    403	reg = REG_READ(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A);
    404	REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, reg);
    405	REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, reg);
    406	REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, reg);
    407
    408	reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A);
    409	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, reg);
    410	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_C, reg);
    411	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_D, reg);
    412
    413	reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A);
    414	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, reg);
    415	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_C, reg);
    416	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_D, reg);
    417
    418	reg = REG_READ(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A);
    419	REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, reg);
    420	REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, reg);
    421	REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, reg);
    422
    423	reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A);
    424	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, reg);
    425	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, reg);
    426	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, reg);
    427
    428	reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A);
    429	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, reg);
    430	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, reg);
    431	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, reg);
    432
    433	reg = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
    434	REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, reg);
    435	REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, reg);
    436	REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, reg);
    437}
    438
    439static const struct hubbub_funcs hubbub30_funcs = {
    440	.update_dchub = hubbub2_update_dchub,
    441	.init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx,
    442	.init_vm_ctx = hubbub2_init_vm_ctx,
    443	.dcc_support_swizzle = hubbub3_dcc_support_swizzle,
    444	.dcc_support_pixel_format = hubbub2_dcc_support_pixel_format,
    445	.get_dcc_compression_cap = hubbub3_get_dcc_compression_cap,
    446	.wm_read_state = hubbub21_wm_read_state,
    447	.get_dchub_ref_freq = hubbub2_get_dchub_ref_freq,
    448	.program_watermarks = hubbub3_program_watermarks,
    449	.allow_self_refresh_control = hubbub1_allow_self_refresh_control,
    450	.is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
    451	.verify_allow_pstate_change_high = hubbub1_verify_allow_pstate_change_high,
    452	.force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes,
    453	.force_pstate_change_control = hubbub3_force_pstate_change_control,
    454	.init_watermarks = hubbub3_init_watermarks,
    455	.hubbub_read_state = hubbub2_read_state,
    456};
    457
    458void hubbub3_construct(struct dcn20_hubbub *hubbub3,
    459	struct dc_context *ctx,
    460	const struct dcn_hubbub_registers *hubbub_regs,
    461	const struct dcn_hubbub_shift *hubbub_shift,
    462	const struct dcn_hubbub_mask *hubbub_mask)
    463{
    464	hubbub3->base.ctx = ctx;
    465	hubbub3->base.funcs = &hubbub30_funcs;
    466	hubbub3->regs = hubbub_regs;
    467	hubbub3->shifts = hubbub_shift;
    468	hubbub3->masks = hubbub_mask;
    469
    470	hubbub3->debug_test_index_pstate = 0xB;
    471	hubbub3->detile_buf_size = 184 * 1024; /* 184KB for DCN3 */
    472}
    473