dcn31_fpu.c (32538B)
1/* 2 * Copyright 2019-2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26#include "resource.h" 27#include "clk_mgr.h" 28 29#include "dml/dcn20/dcn20_fpu.h" 30#include "dcn31_fpu.h" 31 32/** 33 * DOC: DCN31x FPU manipulation Overview 34 * 35 * The DCN architecture relies on FPU operations, which require special 36 * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we 37 * want to avoid spreading FPU access across multiple files. With this idea in 38 * mind, this file aims to centralize all DCN3.1.x functions that require FPU 39 * access in a single place. Code in this file follows the following code 40 * pattern: 41 * 42 * 1. Functions that use FPU operations should be isolated in static functions. 43 * 2. The FPU functions should have the noinline attribute to ensure anything 44 * that deals with FP register is contained within this call. 45 * 3. All function that needs to be accessed outside this file requires a 46 * public interface that not uses any FPU reference. 47 * 4. Developers **must not** use DC_FP_START/END in this file, but they need 48 * to ensure that the caller invokes it before access any function available 49 * in this file. For this reason, public functions in this file must invoke 50 * dc_assert_fp_enabled(); 51 */ 52 53struct _vcs_dpi_ip_params_st dcn3_1_ip = { 54 .gpuvm_enable = 1, 55 .gpuvm_max_page_table_levels = 1, 56 .hostvm_enable = 1, 57 .hostvm_max_page_table_levels = 2, 58 .rob_buffer_size_kbytes = 64, 59 .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE, 60 .config_return_buffer_size_in_kbytes = 1792, 61 .compressed_buffer_segment_size_in_kbytes = 64, 62 .meta_fifo_size_in_kentries = 32, 63 .zero_size_buffer_entries = 512, 64 .compbuf_reserved_space_64b = 256, 65 .compbuf_reserved_space_zs = 64, 66 .dpp_output_buffer_pixels = 2560, 67 .opp_output_buffer_lines = 1, 68 .pixel_chunk_size_kbytes = 8, 69 .meta_chunk_size_kbytes = 2, 70 .min_meta_chunk_size_bytes = 256, 71 .writeback_chunk_size_kbytes = 8, 72 .ptoi_supported = false, 73 .num_dsc = 3, 74 .maximum_dsc_bits_per_component = 10, 75 .dsc422_native_support = false, 76 .is_line_buffer_bpp_fixed = true, 77 .line_buffer_fixed_bpp = 48, 78 .line_buffer_size_bits = 789504, 79 .max_line_buffer_lines = 12, 80 .writeback_interface_buffer_size_kbytes = 90, 81 .max_num_dpp = 4, 82 .max_num_otg = 4, 83 .max_num_hdmi_frl_outputs = 1, 84 .max_num_wb = 1, 85 .max_dchub_pscl_bw_pix_per_clk = 4, 86 .max_pscl_lb_bw_pix_per_clk = 2, 87 .max_lb_vscl_bw_pix_per_clk = 4, 88 .max_vscl_hscl_bw_pix_per_clk = 4, 89 .max_hscl_ratio = 6, 90 .max_vscl_ratio = 6, 91 .max_hscl_taps = 8, 92 .max_vscl_taps = 8, 93 .dpte_buffer_size_in_pte_reqs_luma = 64, 94 .dpte_buffer_size_in_pte_reqs_chroma = 34, 95 .dispclk_ramp_margin_percent = 1, 96 .max_inter_dcn_tile_repeaters = 8, 97 .cursor_buffer_size = 16, 98 .cursor_chunk_size = 2, 99 .writeback_line_buffer_buffer_size = 0, 100 .writeback_min_hscl_ratio = 1, 101 .writeback_min_vscl_ratio = 1, 102 .writeback_max_hscl_ratio = 1, 103 .writeback_max_vscl_ratio = 1, 104 .writeback_max_hscl_taps = 1, 105 .writeback_max_vscl_taps = 1, 106 .dppclk_delay_subtotal = 46, 107 .dppclk_delay_scl = 50, 108 .dppclk_delay_scl_lb_only = 16, 109 .dppclk_delay_cnvc_formatter = 27, 110 .dppclk_delay_cnvc_cursor = 6, 111 .dispclk_delay_subtotal = 119, 112 .dynamic_metadata_vm_enabled = false, 113 .odm_combine_4to1_supported = false, 114 .dcc_supported = true, 115}; 116 117struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { 118 /*TODO: correct dispclk/dppclk voltage level determination*/ 119 .clock_limits = { 120 { 121 .state = 0, 122 .dispclk_mhz = 1200.0, 123 .dppclk_mhz = 1200.0, 124 .phyclk_mhz = 600.0, 125 .phyclk_d18_mhz = 667.0, 126 .dscclk_mhz = 186.0, 127 .dtbclk_mhz = 625.0, 128 }, 129 { 130 .state = 1, 131 .dispclk_mhz = 1200.0, 132 .dppclk_mhz = 1200.0, 133 .phyclk_mhz = 810.0, 134 .phyclk_d18_mhz = 667.0, 135 .dscclk_mhz = 209.0, 136 .dtbclk_mhz = 625.0, 137 }, 138 { 139 .state = 2, 140 .dispclk_mhz = 1200.0, 141 .dppclk_mhz = 1200.0, 142 .phyclk_mhz = 810.0, 143 .phyclk_d18_mhz = 667.0, 144 .dscclk_mhz = 209.0, 145 .dtbclk_mhz = 625.0, 146 }, 147 { 148 .state = 3, 149 .dispclk_mhz = 1200.0, 150 .dppclk_mhz = 1200.0, 151 .phyclk_mhz = 810.0, 152 .phyclk_d18_mhz = 667.0, 153 .dscclk_mhz = 371.0, 154 .dtbclk_mhz = 625.0, 155 }, 156 { 157 .state = 4, 158 .dispclk_mhz = 1200.0, 159 .dppclk_mhz = 1200.0, 160 .phyclk_mhz = 810.0, 161 .phyclk_d18_mhz = 667.0, 162 .dscclk_mhz = 417.0, 163 .dtbclk_mhz = 625.0, 164 }, 165 }, 166 .num_states = 5, 167 .sr_exit_time_us = 9.0, 168 .sr_enter_plus_exit_time_us = 11.0, 169 .sr_exit_z8_time_us = 442.0, 170 .sr_enter_plus_exit_z8_time_us = 560.0, 171 .writeback_latency_us = 12.0, 172 .dram_channel_width_bytes = 4, 173 .round_trip_ping_latency_dcfclk_cycles = 106, 174 .urgent_latency_pixel_data_only_us = 4.0, 175 .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, 176 .urgent_latency_vm_data_only_us = 4.0, 177 .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, 178 .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, 179 .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, 180 .pct_ideal_sdp_bw_after_urgent = 80.0, 181 .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, 182 .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, 183 .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, 184 .max_avg_sdp_bw_use_normal_percent = 60.0, 185 .max_avg_dram_bw_use_normal_percent = 60.0, 186 .fabric_datapath_to_dcn_data_return_bytes = 32, 187 .return_bus_width_bytes = 64, 188 .downspread_percent = 0.38, 189 .dcn_downspread_percent = 0.5, 190 .gpuvm_min_page_size_bytes = 4096, 191 .hostvm_min_page_size_bytes = 4096, 192 .do_urgent_latency_adjustment = false, 193 .urgent_latency_adjustment_fabric_clock_component_us = 0, 194 .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, 195}; 196 197struct _vcs_dpi_ip_params_st dcn3_15_ip = { 198 .gpuvm_enable = 1, 199 .gpuvm_max_page_table_levels = 1, 200 .hostvm_enable = 1, 201 .hostvm_max_page_table_levels = 2, 202 .rob_buffer_size_kbytes = 64, 203 .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE, 204 .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB, 205 .config_return_buffer_size_in_kbytes = 1024, 206 .compressed_buffer_segment_size_in_kbytes = 64, 207 .meta_fifo_size_in_kentries = 32, 208 .zero_size_buffer_entries = 512, 209 .compbuf_reserved_space_64b = 256, 210 .compbuf_reserved_space_zs = 64, 211 .dpp_output_buffer_pixels = 2560, 212 .opp_output_buffer_lines = 1, 213 .pixel_chunk_size_kbytes = 8, 214 .meta_chunk_size_kbytes = 2, 215 .min_meta_chunk_size_bytes = 256, 216 .writeback_chunk_size_kbytes = 8, 217 .ptoi_supported = false, 218 .num_dsc = 3, 219 .maximum_dsc_bits_per_component = 10, 220 .dsc422_native_support = false, 221 .is_line_buffer_bpp_fixed = true, 222 .line_buffer_fixed_bpp = 49, 223 .line_buffer_size_bits = 789504, 224 .max_line_buffer_lines = 12, 225 .writeback_interface_buffer_size_kbytes = 90, 226 .max_num_dpp = 4, 227 .max_num_otg = 4, 228 .max_num_hdmi_frl_outputs = 1, 229 .max_num_wb = 1, 230 .max_dchub_pscl_bw_pix_per_clk = 4, 231 .max_pscl_lb_bw_pix_per_clk = 2, 232 .max_lb_vscl_bw_pix_per_clk = 4, 233 .max_vscl_hscl_bw_pix_per_clk = 4, 234 .max_hscl_ratio = 6, 235 .max_vscl_ratio = 6, 236 .max_hscl_taps = 8, 237 .max_vscl_taps = 8, 238 .dpte_buffer_size_in_pte_reqs_luma = 64, 239 .dpte_buffer_size_in_pte_reqs_chroma = 34, 240 .dispclk_ramp_margin_percent = 1, 241 .max_inter_dcn_tile_repeaters = 9, 242 .cursor_buffer_size = 16, 243 .cursor_chunk_size = 2, 244 .writeback_line_buffer_buffer_size = 0, 245 .writeback_min_hscl_ratio = 1, 246 .writeback_min_vscl_ratio = 1, 247 .writeback_max_hscl_ratio = 1, 248 .writeback_max_vscl_ratio = 1, 249 .writeback_max_hscl_taps = 1, 250 .writeback_max_vscl_taps = 1, 251 .dppclk_delay_subtotal = 46, 252 .dppclk_delay_scl = 50, 253 .dppclk_delay_scl_lb_only = 16, 254 .dppclk_delay_cnvc_formatter = 27, 255 .dppclk_delay_cnvc_cursor = 6, 256 .dispclk_delay_subtotal = 119, 257 .dynamic_metadata_vm_enabled = false, 258 .odm_combine_4to1_supported = false, 259 .dcc_supported = true, 260}; 261 262struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { 263 .sr_exit_time_us = 9.0, 264 .sr_enter_plus_exit_time_us = 11.0, 265 .sr_exit_z8_time_us = 50.0, 266 .sr_enter_plus_exit_z8_time_us = 50.0, 267 .writeback_latency_us = 12.0, 268 .dram_channel_width_bytes = 4, 269 .round_trip_ping_latency_dcfclk_cycles = 106, 270 .urgent_latency_pixel_data_only_us = 4.0, 271 .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, 272 .urgent_latency_vm_data_only_us = 4.0, 273 .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, 274 .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, 275 .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, 276 .pct_ideal_sdp_bw_after_urgent = 80.0, 277 .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, 278 .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, 279 .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, 280 .max_avg_sdp_bw_use_normal_percent = 60.0, 281 .max_avg_dram_bw_use_normal_percent = 60.0, 282 .fabric_datapath_to_dcn_data_return_bytes = 32, 283 .return_bus_width_bytes = 64, 284 .downspread_percent = 0.38, 285 .dcn_downspread_percent = 0.38, 286 .gpuvm_min_page_size_bytes = 4096, 287 .hostvm_min_page_size_bytes = 4096, 288 .do_urgent_latency_adjustment = false, 289 .urgent_latency_adjustment_fabric_clock_component_us = 0, 290 .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, 291}; 292 293struct _vcs_dpi_ip_params_st dcn3_16_ip = { 294 .gpuvm_enable = 1, 295 .gpuvm_max_page_table_levels = 1, 296 .hostvm_enable = 1, 297 .hostvm_max_page_table_levels = 2, 298 .rob_buffer_size_kbytes = 64, 299 .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE, 300 .config_return_buffer_size_in_kbytes = 1024, 301 .compressed_buffer_segment_size_in_kbytes = 64, 302 .meta_fifo_size_in_kentries = 32, 303 .zero_size_buffer_entries = 512, 304 .compbuf_reserved_space_64b = 256, 305 .compbuf_reserved_space_zs = 64, 306 .dpp_output_buffer_pixels = 2560, 307 .opp_output_buffer_lines = 1, 308 .pixel_chunk_size_kbytes = 8, 309 .meta_chunk_size_kbytes = 2, 310 .min_meta_chunk_size_bytes = 256, 311 .writeback_chunk_size_kbytes = 8, 312 .ptoi_supported = false, 313 .num_dsc = 3, 314 .maximum_dsc_bits_per_component = 10, 315 .dsc422_native_support = false, 316 .is_line_buffer_bpp_fixed = true, 317 .line_buffer_fixed_bpp = 48, 318 .line_buffer_size_bits = 789504, 319 .max_line_buffer_lines = 12, 320 .writeback_interface_buffer_size_kbytes = 90, 321 .max_num_dpp = 4, 322 .max_num_otg = 4, 323 .max_num_hdmi_frl_outputs = 1, 324 .max_num_wb = 1, 325 .max_dchub_pscl_bw_pix_per_clk = 4, 326 .max_pscl_lb_bw_pix_per_clk = 2, 327 .max_lb_vscl_bw_pix_per_clk = 4, 328 .max_vscl_hscl_bw_pix_per_clk = 4, 329 .max_hscl_ratio = 6, 330 .max_vscl_ratio = 6, 331 .max_hscl_taps = 8, 332 .max_vscl_taps = 8, 333 .dpte_buffer_size_in_pte_reqs_luma = 64, 334 .dpte_buffer_size_in_pte_reqs_chroma = 34, 335 .dispclk_ramp_margin_percent = 1, 336 .max_inter_dcn_tile_repeaters = 8, 337 .cursor_buffer_size = 16, 338 .cursor_chunk_size = 2, 339 .writeback_line_buffer_buffer_size = 0, 340 .writeback_min_hscl_ratio = 1, 341 .writeback_min_vscl_ratio = 1, 342 .writeback_max_hscl_ratio = 1, 343 .writeback_max_vscl_ratio = 1, 344 .writeback_max_hscl_taps = 1, 345 .writeback_max_vscl_taps = 1, 346 .dppclk_delay_subtotal = 46, 347 .dppclk_delay_scl = 50, 348 .dppclk_delay_scl_lb_only = 16, 349 .dppclk_delay_cnvc_formatter = 27, 350 .dppclk_delay_cnvc_cursor = 6, 351 .dispclk_delay_subtotal = 119, 352 .dynamic_metadata_vm_enabled = false, 353 .odm_combine_4to1_supported = false, 354 .dcc_supported = true, 355}; 356 357struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { 358 /*TODO: correct dispclk/dppclk voltage level determination*/ 359 .clock_limits = { 360 { 361 .state = 0, 362 .dispclk_mhz = 556.0, 363 .dppclk_mhz = 556.0, 364 .phyclk_mhz = 600.0, 365 .phyclk_d18_mhz = 445.0, 366 .dscclk_mhz = 186.0, 367 .dtbclk_mhz = 625.0, 368 }, 369 { 370 .state = 1, 371 .dispclk_mhz = 625.0, 372 .dppclk_mhz = 625.0, 373 .phyclk_mhz = 810.0, 374 .phyclk_d18_mhz = 667.0, 375 .dscclk_mhz = 209.0, 376 .dtbclk_mhz = 625.0, 377 }, 378 { 379 .state = 2, 380 .dispclk_mhz = 625.0, 381 .dppclk_mhz = 625.0, 382 .phyclk_mhz = 810.0, 383 .phyclk_d18_mhz = 667.0, 384 .dscclk_mhz = 209.0, 385 .dtbclk_mhz = 625.0, 386 }, 387 { 388 .state = 3, 389 .dispclk_mhz = 1112.0, 390 .dppclk_mhz = 1112.0, 391 .phyclk_mhz = 810.0, 392 .phyclk_d18_mhz = 667.0, 393 .dscclk_mhz = 371.0, 394 .dtbclk_mhz = 625.0, 395 }, 396 { 397 .state = 4, 398 .dispclk_mhz = 1250.0, 399 .dppclk_mhz = 1250.0, 400 .phyclk_mhz = 810.0, 401 .phyclk_d18_mhz = 667.0, 402 .dscclk_mhz = 417.0, 403 .dtbclk_mhz = 625.0, 404 }, 405 }, 406 .num_states = 5, 407 .sr_exit_time_us = 9.0, 408 .sr_enter_plus_exit_time_us = 11.0, 409 .sr_exit_z8_time_us = 442.0, 410 .sr_enter_plus_exit_z8_time_us = 560.0, 411 .writeback_latency_us = 12.0, 412 .dram_channel_width_bytes = 4, 413 .round_trip_ping_latency_dcfclk_cycles = 106, 414 .urgent_latency_pixel_data_only_us = 4.0, 415 .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, 416 .urgent_latency_vm_data_only_us = 4.0, 417 .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, 418 .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, 419 .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, 420 .pct_ideal_sdp_bw_after_urgent = 80.0, 421 .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, 422 .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, 423 .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, 424 .max_avg_sdp_bw_use_normal_percent = 60.0, 425 .max_avg_dram_bw_use_normal_percent = 60.0, 426 .fabric_datapath_to_dcn_data_return_bytes = 32, 427 .return_bus_width_bytes = 64, 428 .downspread_percent = 0.38, 429 .dcn_downspread_percent = 0.5, 430 .gpuvm_min_page_size_bytes = 4096, 431 .hostvm_min_page_size_bytes = 4096, 432 .do_urgent_latency_adjustment = false, 433 .urgent_latency_adjustment_fabric_clock_component_us = 0, 434 .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, 435}; 436 437void dcn31_calculate_wm_and_dlg_fp( 438 struct dc *dc, struct dc_state *context, 439 display_e2e_pipe_params_st *pipes, 440 int pipe_cnt, 441 int vlevel) 442{ 443 int i, pipe_idx; 444 double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; 445 446 dc_assert_fp_enabled(); 447 448 if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) 449 dcfclk = context->bw_ctx.dml.soc.min_dcfclk; 450 451 /* We don't recalculate clocks for 0 pipe configs, which can block 452 * S0i3 as high clocks will block low power states 453 * Override any clocks that can block S0i3 to min here 454 */ 455 if (pipe_cnt == 0) { 456 context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0 457 return; 458 } 459 460 pipes[0].clks_cfg.voltage = vlevel; 461 pipes[0].clks_cfg.dcfclk_mhz = dcfclk; 462 pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; 463 464#if 0 // TODO 465 /* Set B: 466 * TODO 467 */ 468 if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { 469 if (vlevel == 0) { 470 pipes[0].clks_cfg.voltage = 1; 471 pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; 472 } 473 context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; 474 context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; 475 context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; 476 } 477 context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 478 context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 479 context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 480 context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 481 context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 482 context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 483 context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 484 context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 485 context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 486 context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 487 488 pipes[0].clks_cfg.voltage = vlevel; 489 pipes[0].clks_cfg.dcfclk_mhz = dcfclk; 490 491 /* Set C: 492 * TODO 493 */ 494 if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { 495 context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us; 496 context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; 497 context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; 498 } 499 context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 500 context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 501 context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 502 context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 503 context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 504 context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 505 context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 506 context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 507 context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 508 context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 509 510 /* Set D: 511 * TODO 512 */ 513 if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { 514 context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; 515 context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; 516 context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; 517 } 518 context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 519 context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 520 context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 521 context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 522 context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 523 context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 524 context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 525 context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 526 context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 527 context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 528#endif 529 530 /* Set A: 531 * All clocks min required 532 * 533 * Set A calculated last so that following calculations are based on Set A 534 */ 535 dc->res_pool->funcs->update_soc_for_wm_a(dc, context); 536 context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 537 context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 538 context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 539 context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 540 context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 541 context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 542 context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 543 context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 544 context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 545 context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 546 /* TODO: remove: */ 547 context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a; 548 context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a; 549 context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; 550 /* end remove*/ 551 552 for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { 553 if (!context->res_ctx.pipe_ctx[i].stream) 554 continue; 555 556 pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); 557 pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); 558 559 if (dc->config.forced_clocks || dc->debug.max_disp_clk) { 560 pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; 561 pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; 562 } 563 if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) 564 pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; 565 if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) 566 pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; 567 568 pipe_idx++; 569 } 570 571 dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); 572} 573 574void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) 575{ 576 struct clk_limit_table *clk_table = &bw_params->clk_table; 577 struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; 578 unsigned int i, closest_clk_lvl; 579 int j; 580 581 dc_assert_fp_enabled(); 582 583 // Default clock levels are used for diags, which may lead to overclocking. 584 if (!IS_DIAG_DC(dc->ctx->dce_environment)) { 585 int max_dispclk_mhz = 0, max_dppclk_mhz = 0; 586 587 dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; 588 dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count; 589 dcn3_1_soc.num_chans = bw_params->num_channels; 590 591 ASSERT(clk_table->num_entries); 592 593 /* Prepass to find max clocks independent of voltage level. */ 594 for (i = 0; i < clk_table->num_entries; ++i) { 595 if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) 596 max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; 597 if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) 598 max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; 599 } 600 601 for (i = 0; i < clk_table->num_entries; i++) { 602 /* loop backwards*/ 603 for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) { 604 if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { 605 closest_clk_lvl = j; 606 break; 607 } 608 } 609 610 clock_limits[i].state = i; 611 612 /* Clocks dependent on voltage level. */ 613 clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; 614 clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; 615 clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; 616 clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; 617 618 /* Clocks independent of voltage level. */ 619 clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : 620 dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; 621 622 clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : 623 dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; 624 625 clock_limits[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; 626 clock_limits[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; 627 clock_limits[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; 628 clock_limits[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; 629 clock_limits[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; 630 } 631 for (i = 0; i < clk_table->num_entries; i++) 632 dcn3_1_soc.clock_limits[i] = clock_limits[i]; 633 if (clk_table->num_entries) { 634 dcn3_1_soc.num_states = clk_table->num_entries; 635 } 636 } 637 638 dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 639 dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 640 641 if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) 642 dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31); 643 else 644 dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31_FPGA); 645} 646 647void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) 648{ 649 struct clk_limit_table *clk_table = &bw_params->clk_table; 650 int i, max_dispclk_mhz = 0, max_dppclk_mhz = 0; 651 652 dc_assert_fp_enabled(); 653 654 dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; 655 dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count; 656 dcn3_15_soc.num_chans = bw_params->num_channels; 657 658 ASSERT(clk_table->num_entries); 659 660 /* Setup soc to always use max dispclk/dppclk to avoid odm-to-lower-voltage */ 661 for (i = 0; i < clk_table->num_entries; ++i) { 662 if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) 663 max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; 664 if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) 665 max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; 666 } 667 668 for (i = 0; i < clk_table->num_entries; i++) { 669 dcn3_15_soc.clock_limits[i].state = i; 670 671 /* Clocks dependent on voltage level. */ 672 dcn3_15_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; 673 dcn3_15_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; 674 dcn3_15_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; 675 dcn3_15_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; 676 677 /* These aren't actually read from smu, but rather set in clk_mgr defaults */ 678 dcn3_15_soc.clock_limits[i].dtbclk_mhz = clk_table->entries[i].dtbclk_mhz; 679 dcn3_15_soc.clock_limits[i].phyclk_d18_mhz = clk_table->entries[i].phyclk_d18_mhz; 680 dcn3_15_soc.clock_limits[i].phyclk_mhz = clk_table->entries[i].phyclk_mhz; 681 682 /* Clocks independent of voltage level. */ 683 dcn3_15_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; 684 dcn3_15_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; 685 dcn3_15_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3.0; 686 } 687 dcn3_15_soc.num_states = clk_table->num_entries; 688 689 690 /* Set vco to max_dispclk * 2 to make sure the highest dispclk is always available for dml calcs, 691 * no impact outside of dml validation 692 */ 693 dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; 694 695 if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) 696 dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31); 697 else 698 dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA); 699} 700 701void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) 702{ 703 struct clk_limit_table *clk_table = &bw_params->clk_table; 704 struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; 705 unsigned int i, closest_clk_lvl; 706 int max_dispclk_mhz = 0, max_dppclk_mhz = 0; 707 int j; 708 709 dc_assert_fp_enabled(); 710 711 // Default clock levels are used for diags, which may lead to overclocking. 712 if (!IS_DIAG_DC(dc->ctx->dce_environment)) { 713 714 dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; 715 dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count; 716 dcn3_16_soc.num_chans = bw_params->num_channels; 717 718 ASSERT(clk_table->num_entries); 719 720 /* Prepass to find max clocks independent of voltage level. */ 721 for (i = 0; i < clk_table->num_entries; ++i) { 722 if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) 723 max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; 724 if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) 725 max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; 726 } 727 728 for (i = 0; i < clk_table->num_entries; i++) { 729 /* loop backwards*/ 730 for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) { 731 if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { 732 closest_clk_lvl = j; 733 break; 734 } 735 } 736 // Ported from DCN315 737 if (clk_table->num_entries == 1) { 738 /*smu gives one DPM level, let's take the highest one*/ 739 closest_clk_lvl = dcn3_16_soc.num_states - 1; 740 } 741 742 clock_limits[i].state = i; 743 744 /* Clocks dependent on voltage level. */ 745 clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; 746 if (clk_table->num_entries == 1 && 747 clock_limits[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { 748 /*SMU fix not released yet*/ 749 clock_limits[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; 750 } 751 clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; 752 clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; 753 clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; 754 755 /* Clocks independent of voltage level. */ 756 clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : 757 dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz; 758 759 clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : 760 dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz; 761 762 clock_limits[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; 763 clock_limits[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; 764 clock_limits[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; 765 clock_limits[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; 766 clock_limits[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; 767 } 768 for (i = 0; i < clk_table->num_entries; i++) 769 dcn3_16_soc.clock_limits[i] = clock_limits[i]; 770 if (clk_table->num_entries) { 771 dcn3_16_soc.num_states = clk_table->num_entries; 772 } 773 } 774 775 if (max_dispclk_mhz) { 776 dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; 777 dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; 778 } 779 780 if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) 781 dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31); 782 else 783 dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA); 784}