dcn30_fpu.c (26222B)
1/* 2 * Copyright 2020-2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25#include "resource.h" 26#include "clk_mgr.h" 27#include "reg_helper.h" 28#include "dcn_calc_math.h" 29#include "dcn20/dcn20_resource.h" 30#include "dcn30/dcn30_resource.h" 31 32 33#include "display_mode_vba_30.h" 34#include "dcn30_fpu.h" 35 36#define REG(reg)\ 37 optc1->tg_regs->reg 38 39#define CTX \ 40 optc1->base.ctx 41 42#undef FN 43#define FN(reg_name, field_name) \ 44 optc1->tg_shift->field_name, optc1->tg_mask->field_name 45 46 47struct _vcs_dpi_ip_params_st dcn3_0_ip = { 48 .use_min_dcfclk = 0, 49 .clamp_min_dcfclk = 0, 50 .odm_capable = 1, 51 .gpuvm_enable = 0, 52 .hostvm_enable = 0, 53 .gpuvm_max_page_table_levels = 4, 54 .hostvm_max_page_table_levels = 4, 55 .hostvm_cached_page_table_levels = 0, 56 .pte_group_size_bytes = 2048, 57 .num_dsc = 6, 58 .rob_buffer_size_kbytes = 184, 59 .det_buffer_size_kbytes = 184, 60 .dpte_buffer_size_in_pte_reqs_luma = 84, 61 .pde_proc_buffer_size_64k_reqs = 48, 62 .dpp_output_buffer_pixels = 2560, 63 .opp_output_buffer_lines = 1, 64 .pixel_chunk_size_kbytes = 8, 65 .pte_enable = 1, 66 .max_page_table_levels = 2, 67 .pte_chunk_size_kbytes = 2, // ? 68 .meta_chunk_size_kbytes = 2, 69 .writeback_chunk_size_kbytes = 8, 70 .line_buffer_size_bits = 789504, 71 .is_line_buffer_bpp_fixed = 0, // ? 72 .line_buffer_fixed_bpp = 0, // ? 73 .dcc_supported = true, 74 .writeback_interface_buffer_size_kbytes = 90, 75 .writeback_line_buffer_buffer_size = 0, 76 .max_line_buffer_lines = 12, 77 .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640 78 .writeback_chroma_buffer_size_kbytes = 8, 79 .writeback_chroma_line_buffer_width_pixels = 4, 80 .writeback_max_hscl_ratio = 1, 81 .writeback_max_vscl_ratio = 1, 82 .writeback_min_hscl_ratio = 1, 83 .writeback_min_vscl_ratio = 1, 84 .writeback_max_hscl_taps = 1, 85 .writeback_max_vscl_taps = 1, 86 .writeback_line_buffer_luma_buffer_size = 0, 87 .writeback_line_buffer_chroma_buffer_size = 14643, 88 .cursor_buffer_size = 8, 89 .cursor_chunk_size = 2, 90 .max_num_otg = 6, 91 .max_num_dpp = 6, 92 .max_num_wb = 1, 93 .max_dchub_pscl_bw_pix_per_clk = 4, 94 .max_pscl_lb_bw_pix_per_clk = 2, 95 .max_lb_vscl_bw_pix_per_clk = 4, 96 .max_vscl_hscl_bw_pix_per_clk = 4, 97 .max_hscl_ratio = 6, 98 .max_vscl_ratio = 6, 99 .hscl_mults = 4, 100 .vscl_mults = 4, 101 .max_hscl_taps = 8, 102 .max_vscl_taps = 8, 103 .dispclk_ramp_margin_percent = 1, 104 .underscan_factor = 1.11, 105 .min_vblank_lines = 32, 106 .dppclk_delay_subtotal = 46, 107 .dynamic_metadata_vm_enabled = true, 108 .dppclk_delay_scl_lb_only = 16, 109 .dppclk_delay_scl = 50, 110 .dppclk_delay_cnvc_formatter = 27, 111 .dppclk_delay_cnvc_cursor = 6, 112 .dispclk_delay_subtotal = 119, 113 .dcfclk_cstate_latency = 5.2, // SRExitTime 114 .max_inter_dcn_tile_repeaters = 8, 115 .max_num_hdmi_frl_outputs = 1, 116 .odm_combine_4to1_supported = true, 117 118 .xfc_supported = false, 119 .xfc_fill_bw_overhead_percent = 10.0, 120 .xfc_fill_constant_bytes = 0, 121 .gfx7_compat_tiling_supported = 0, 122 .number_of_cursors = 1, 123}; 124 125struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = { 126 .clock_limits = { 127 { 128 .state = 0, 129 .dispclk_mhz = 562.0, 130 .dppclk_mhz = 300.0, 131 .phyclk_mhz = 300.0, 132 .phyclk_d18_mhz = 667.0, 133 .dscclk_mhz = 405.6, 134 }, 135 }, 136 137 .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ 138 .num_states = 1, 139 .sr_exit_time_us = 15.5, 140 .sr_enter_plus_exit_time_us = 20, 141 .urgent_latency_us = 4.0, 142 .urgent_latency_pixel_data_only_us = 4.0, 143 .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, 144 .urgent_latency_vm_data_only_us = 4.0, 145 .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, 146 .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, 147 .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, 148 .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, 149 .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, 150 .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, 151 .max_avg_sdp_bw_use_normal_percent = 60.0, 152 .max_avg_dram_bw_use_normal_percent = 40.0, 153 .writeback_latency_us = 12.0, 154 .max_request_size_bytes = 256, 155 .fabric_datapath_to_dcn_data_return_bytes = 64, 156 .dcn_downspread_percent = 0.5, 157 .downspread_percent = 0.38, 158 .dram_page_open_time_ns = 50.0, 159 .dram_rw_turnaround_time_ns = 17.5, 160 .dram_return_buffer_per_channel_bytes = 8192, 161 .round_trip_ping_latency_dcfclk_cycles = 191, 162 .urgent_out_of_order_return_per_channel_bytes = 4096, 163 .channel_interleave_bytes = 256, 164 .num_banks = 8, 165 .gpuvm_min_page_size_bytes = 4096, 166 .hostvm_min_page_size_bytes = 4096, 167 .dram_clock_change_latency_us = 404, 168 .dummy_pstate_latency_us = 5, 169 .writeback_dram_clock_change_latency_us = 23.0, 170 .return_bus_width_bytes = 64, 171 .dispclk_dppclk_vco_speed_mhz = 3650, 172 .xfc_bus_transport_time_us = 20, // ? 173 .xfc_xbuf_latency_tolerance_us = 4, // ? 174 .use_urgent_burst_bw = 1, // ? 175 .do_urgent_latency_adjustment = true, 176 .urgent_latency_adjustment_fabric_clock_component_us = 1.0, 177 .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, 178}; 179 180 181void optc3_fpu_set_vrr_m_const(struct timing_generator *optc, 182 double vtotal_avg) 183{ 184struct optc *optc1 = DCN10TG_FROM_TG(optc); 185 double vtotal_min, vtotal_max; 186 double ratio, modulo, phase; 187 uint32_t vblank_start; 188 uint32_t v_total_mask_value = 0; 189 190 dc_assert_fp_enabled(); 191 192 /* Compute VTOTAL_MIN and VTOTAL_MAX, so that 193 * VOTAL_MAX - VTOTAL_MIN = 1 194 */ 195 v_total_mask_value = 16; 196 vtotal_min = dcn_bw_floor(vtotal_avg); 197 vtotal_max = dcn_bw_ceil(vtotal_avg); 198 199 /* Check that bottom VBLANK is at least 2 lines tall when running with 200 * VTOTAL_MIN. Note that VTOTAL registers are defined as 'total number 201 * of lines in a frame - 1'. 202 */ 203 REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START, 204 &vblank_start); 205 ASSERT(vtotal_min >= vblank_start + 1); 206 207 /* Special case where the average frame rate can be achieved 208 * without using the DTO 209 */ 210 if (vtotal_min == vtotal_max) { 211 REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min); 212 213 optc->funcs->set_vtotal_min_max(optc, 0, 0); 214 REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0); 215 REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0); 216 REG_UPDATE_3(OTG_V_TOTAL_CONTROL, 217 OTG_V_TOTAL_MIN_SEL, 0, 218 OTG_V_TOTAL_MAX_SEL, 0, 219 OTG_SET_V_TOTAL_MIN_MASK_EN, 0); 220 return; 221 } 222 223 ratio = vtotal_max - vtotal_avg; 224 modulo = 65536.0 * 65536.0 - 1.0; /* 2^32 - 1 */ 225 phase = ratio * modulo; 226 227 /* Special cases where the DTO phase gets rounded to 0 or 228 * to DTO modulo 229 */ 230 if (phase <= 0 || phase >= modulo) { 231 REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, 232 phase <= 0 ? 233 (uint32_t)vtotal_max : (uint32_t)vtotal_min); 234 REG_SET(OTG_V_TOTAL_MIN, 0, OTG_V_TOTAL_MIN, 0); 235 REG_SET(OTG_V_TOTAL_MAX, 0, OTG_V_TOTAL_MAX, 0); 236 REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0); 237 REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0); 238 REG_UPDATE_3(OTG_V_TOTAL_CONTROL, 239 OTG_V_TOTAL_MIN_SEL, 0, 240 OTG_V_TOTAL_MAX_SEL, 0, 241 OTG_SET_V_TOTAL_MIN_MASK_EN, 0); 242 return; 243 } 244 REG_UPDATE_6(OTG_V_TOTAL_CONTROL, 245 OTG_V_TOTAL_MIN_SEL, 1, 246 OTG_V_TOTAL_MAX_SEL, 1, 247 OTG_SET_V_TOTAL_MIN_MASK_EN, 1, 248 OTG_SET_V_TOTAL_MIN_MASK, v_total_mask_value, 249 OTG_VTOTAL_MID_REPLACING_MIN_EN, 0, 250 OTG_VTOTAL_MID_REPLACING_MAX_EN, 0); 251 REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min); 252 optc->funcs->set_vtotal_min_max(optc, vtotal_min, vtotal_max); 253 REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, (uint32_t)phase); 254 REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, (uint32_t)modulo); 255} 256 257void dcn30_fpu_populate_dml_writeback_from_context( 258 struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) 259{ 260 int pipe_cnt, i, j; 261 double max_calc_writeback_dispclk; 262 double writeback_dispclk; 263 struct writeback_st dout_wb; 264 265 dc_assert_fp_enabled(); 266 267 for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { 268 struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; 269 270 if (!stream) 271 continue; 272 max_calc_writeback_dispclk = 0; 273 274 /* Set writeback information */ 275 pipes[pipe_cnt].dout.wb_enable = 0; 276 pipes[pipe_cnt].dout.num_active_wb = 0; 277 for (j = 0; j < stream->num_wb_info; j++) { 278 struct dc_writeback_info *wb_info = &stream->writeback_info[j]; 279 280 if (wb_info->wb_enabled && wb_info->writeback_source_plane && 281 (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { 282 pipes[pipe_cnt].dout.wb_enable = 1; 283 pipes[pipe_cnt].dout.num_active_wb++; 284 dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? 285 wb_info->dwb_params.cnv_params.crop_height : 286 wb_info->dwb_params.cnv_params.src_height; 287 dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? 288 wb_info->dwb_params.cnv_params.crop_width : 289 wb_info->dwb_params.cnv_params.src_width; 290 dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; 291 dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; 292 293 /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */ 294 if (dc->dml.ip.writeback_max_hscl_taps > 1) { 295 dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; 296 dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps; 297 } else { 298 dout_wb.wb_htaps_luma = 1; 299 dout_wb.wb_vtaps_luma = 1; 300 } 301 dout_wb.wb_htaps_chroma = 0; 302 dout_wb.wb_vtaps_chroma = 0; 303 dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? 304 (double)wb_info->dwb_params.cnv_params.crop_width / 305 (double)wb_info->dwb_params.dest_width : 306 (double)wb_info->dwb_params.cnv_params.src_width / 307 (double)wb_info->dwb_params.dest_width; 308 dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? 309 (double)wb_info->dwb_params.cnv_params.crop_height / 310 (double)wb_info->dwb_params.dest_height : 311 (double)wb_info->dwb_params.cnv_params.src_height / 312 (double)wb_info->dwb_params.dest_height; 313 if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB || 314 wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA) 315 dout_wb.wb_pixel_format = dm_444_64; 316 else 317 dout_wb.wb_pixel_format = dm_444_32; 318 319 /* Workaround for cases where multiple writebacks are connected to same plane 320 * In which case, need to compute worst case and set the associated writeback parameters 321 * This workaround is necessary due to DML computation assuming only 1 set of writeback 322 * parameters per pipe 323 */ 324 writeback_dispclk = dml30_CalculateWriteBackDISPCLK( 325 dout_wb.wb_pixel_format, 326 pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, 327 dout_wb.wb_hratio, 328 dout_wb.wb_vratio, 329 dout_wb.wb_htaps_luma, 330 dout_wb.wb_vtaps_luma, 331 dout_wb.wb_src_width, 332 dout_wb.wb_dst_width, 333 pipes[pipe_cnt].pipe.dest.htotal, 334 dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size); 335 336 if (writeback_dispclk > max_calc_writeback_dispclk) { 337 max_calc_writeback_dispclk = writeback_dispclk; 338 pipes[pipe_cnt].dout.wb = dout_wb; 339 } 340 } 341 } 342 343 pipe_cnt++; 344 } 345} 346 347void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params, 348 struct display_mode_lib *dml, 349 display_e2e_pipe_params_st *pipes, 350 int pipe_cnt, 351 int cur_pipe) 352{ 353 int i; 354 355 dc_assert_fp_enabled(); 356 357 for (i = 0; i < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); i++) { 358 wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000; 359 wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000; 360 } 361 362 wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */ 363} 364 365void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) 366{ 367 368dc_assert_fp_enabled(); 369 370if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { 371 context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; 372 context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; 373 context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; 374 } 375} 376 377void dcn30_fpu_calculate_wm_and_dlg( 378 struct dc *dc, struct dc_state *context, 379 display_e2e_pipe_params_st *pipes, 380 int pipe_cnt, 381 int vlevel) 382{ 383int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; 384 int i, pipe_idx; 385 double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb]; 386 bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported; 387 388dc_assert_fp_enabled(); 389 390 if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) 391 dcfclk = context->bw_ctx.dml.soc.min_dcfclk; 392 393 pipes[0].clks_cfg.voltage = vlevel; 394 pipes[0].clks_cfg.dcfclk_mhz = dcfclk; 395 pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; 396 397 /* Set B: 398 * DCFCLK: 1GHz or min required above 1GHz 399 * FCLK/UCLK: Max 400 */ 401 if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { 402 if (vlevel == 0) { 403 pipes[0].clks_cfg.voltage = 1; 404 pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; 405 } 406 context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; 407 context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; 408 context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; 409 } 410 context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 411 context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 412 context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 413 context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 414 context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 415 context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 416 context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 417 context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 418 419 pipes[0].clks_cfg.voltage = vlevel; 420 pipes[0].clks_cfg.dcfclk_mhz = dcfclk; 421 422 /* Set D: 423 * DCFCLK: Min Required 424 * FCLK(proportional to UCLK): 1GHz or Max 425 * MALL stutter, sr_enter_exit = 4, sr_exit = 2us 426 */ 427 /* 428 if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { 429 context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; 430 context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; 431 context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; 432 } 433 context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 434 context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 435 context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 436 context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 437 context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 438 context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 439 context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 440 context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 441 */ 442 443 /* Set C: 444 * DCFCLK: Min Required 445 * FCLK(proportional to UCLK): 1GHz or Max 446 * pstate latency overridden to 5us 447 */ 448 if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { 449 unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; 450 unsigned int min_dram_speed_mts_margin = 160; 451 452 if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported) 453 min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; 454 455 /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ 456 for (i = 3; i > 0; i--) 457 if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) 458 break; 459 460 context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; 461 462 context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; 463 context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; 464 } 465 466 context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 467 context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 468 context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 469 context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 470 context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 471 context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 472 context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 473 context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 474 475 if (!pstate_en) { 476 /* The only difference between A and C is p-state latency, if p-state is not supported we want to 477 * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark 478 */ 479 context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; 480 context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; 481 } else { 482 /* Set A: 483 * DCFCLK: Min Required 484 * FCLK(proportional to UCLK): 1GHz or Max 485 * 486 * Set A calculated last so that following calculations are based on Set A 487 */ 488 dc->res_pool->funcs->update_soc_for_wm_a(dc, context); 489 context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 490 context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 491 context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 492 context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 493 context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 494 context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 495 context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 496 context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 497 } 498 499 context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; 500 501 /* Make set D = set A until set D is enabled */ 502 context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; 503 504 for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { 505 if (!context->res_ctx.pipe_ctx[i].stream) 506 continue; 507 508 pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); 509 pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); 510 511 if (dc->config.forced_clocks) { 512 pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; 513 pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; 514 } 515 if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) 516 pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; 517 if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) 518 pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; 519 520 pipe_idx++; 521 } 522 523 DC_FP_START(); 524 dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); 525 DC_FP_END(); 526 527 if (!pstate_en) 528 /* Restore full p-state latency */ 529 context->bw_ctx.dml.soc.dram_clock_change_latency_us = 530 dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; 531 532} 533 534void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc) 535{ 536 dc_assert_fp_enabled(); 537 538 if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) 539 dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; 540} 541 542void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk) 543{ 544 dc_assert_fp_enabled(); 545 546 if (!dcn30_bb_max_clk->max_dcfclk_mhz) 547 dcn30_bb_max_clk->max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz; 548 if (!dcn30_bb_max_clk->max_dispclk_mhz) 549 dcn30_bb_max_clk->max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz; 550 if (!dcn30_bb_max_clk->max_dppclk_mhz) 551 dcn30_bb_max_clk->max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz; 552 if (!dcn30_bb_max_clk->max_phyclk_mhz) 553 dcn30_bb_max_clk->max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz; 554} 555 556void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, 557 unsigned int *optimal_dcfclk, 558 unsigned int *optimal_fclk) 559{ 560 double bw_from_dram, bw_from_dram1, bw_from_dram2; 561 562 dc_assert_fp_enabled(); 563 564 bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans * 565 dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100); 566 bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans * 567 dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100); 568 569 bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; 570 571 if (optimal_fclk) 572 *optimal_fclk = bw_from_dram / 573 (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); 574 575 if (optimal_dcfclk) 576 *optimal_dcfclk = bw_from_dram / 577 (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); 578} 579 580void dcn30_fpu_update_bw_bounding_box(struct dc *dc, 581 struct clk_bw_params *bw_params, 582 struct dc_bounding_box_max_clk *dcn30_bb_max_clk, 583 unsigned int *dcfclk_mhz, 584 unsigned int *dram_speed_mts) 585{ 586 unsigned int i; 587 588 dc_assert_fp_enabled(); 589 590 dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 591 dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 592 593 for (i = 0; i < dcn3_0_soc.num_states; i++) { 594 dcn3_0_soc.clock_limits[i].state = i; 595 dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; 596 dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; 597 dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; 598 599 /* Fill all states with max values of all other clocks */ 600 dcn3_0_soc.clock_limits[i].dispclk_mhz = dcn30_bb_max_clk->max_dispclk_mhz; 601 dcn3_0_soc.clock_limits[i].dppclk_mhz = dcn30_bb_max_clk->max_dppclk_mhz; 602 dcn3_0_soc.clock_limits[i].phyclk_mhz = dcn30_bb_max_clk->max_phyclk_mhz; 603 dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz; 604 /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */ 605 /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ 606 dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz; 607 dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz; 608 dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz; 609 } 610 /* re-init DML with updated bb */ 611 dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); 612 if (dc->current_state) 613 dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); 614 615} 616 617