hfi_plat_bufs_v6.c (43462B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (c) 2020, The Linux Foundation. All rights reserved. 4 */ 5#include <linux/kernel.h> 6#include <linux/sizes.h> 7#include <linux/videodev2.h> 8 9#include "hfi.h" 10#include "hfi_plat_bufs.h" 11#include "helpers.h" 12 13#define MIN_INPUT_BUFFERS 4 14#define MIN_ENC_OUTPUT_BUFFERS 4 15 16#define NV12_UBWC_Y_TILE_WIDTH 32 17#define NV12_UBWC_Y_TILE_HEIGHT 8 18#define NV12_UBWC_UV_TILE_WIDTH 16 19#define NV12_UBWC_UV_TILE_HEIGHT 8 20#define TP10_UBWC_Y_TILE_WIDTH 48 21#define TP10_UBWC_Y_TILE_HEIGHT 4 22#define METADATA_STRIDE_MULTIPLE 64 23#define METADATA_HEIGHT_MULTIPLE 16 24#define HFI_DMA_ALIGNMENT 256 25 26#define MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE 64 27#define MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE 64 28#define MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE 64 29#define MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE 640 30#define MAX_FE_NBR_DATA_CB_LINE_BUFFER_SIZE 320 31#define MAX_FE_NBR_DATA_CR_LINE_BUFFER_SIZE 320 32 33#define MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE (128 / 8) 34#define MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE (128 / 8) 35#define MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE (128 / 8) 36 37#define MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE (64 * 2 * 3) 38#define MAX_PE_NBR_DATA_LCU32_LINE_BUFFER_SIZE (32 * 2 * 3) 39#define MAX_PE_NBR_DATA_LCU16_LINE_BUFFER_SIZE (16 * 2 * 3) 40 41#define MAX_TILE_COLUMNS 32 /* 8K/256 */ 42 43#define VPP_CMD_MAX_SIZE BIT(20) 44#define NUM_HW_PIC_BUF 32 45#define BIN_BUFFER_THRESHOLD (1280 * 736) 46#define H264D_MAX_SLICE 1800 47/* sizeof(h264d_buftab_t) aligned to 256 */ 48#define SIZE_H264D_BUFTAB_T 256 49/* sizeof(h264d_hw_pic_t) aligned to 32 */ 50#define SIZE_H264D_HW_PIC_T BIT(11) 51#define SIZE_H264D_BSE_CMD_PER_BUF (32 * 4) 52#define SIZE_H264D_VPP_CMD_PER_BUF 512 53 54/* Line Buffer definitions, One for Luma and 1/2 for each Chroma */ 55#define SIZE_H264D_LB_FE_TOP_DATA(width, height) \ 56 (MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * ALIGN((width), 16) * 3) 57 58#define SIZE_H264D_LB_FE_TOP_CTRL(width, height) \ 59 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4)) 60 61#define SIZE_H264D_LB_FE_LEFT_CTRL(width, height) \ 62 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4)) 63 64#define SIZE_H264D_LB_SE_TOP_CTRL(width, height) \ 65 (MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4)) 66 67#define SIZE_H264D_LB_SE_LEFT_CTRL(width, height) \ 68 (MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4)) 69 70#define SIZE_H264D_LB_PE_TOP_DATA(width, height) \ 71 (MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4)) 72 73#define SIZE_H264D_LB_VSP_TOP(width, height) (((((width) + 15) >> 4) << 7)) 74 75#define SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height) \ 76 (ALIGN((height), 16) * 32) 77 78#define SIZE_H264D_QP(width, height) \ 79 ((((width) + 63) >> 6) * (((height) + 63) >> 6) * 128) 80 81#define SIZE_HW_PIC(size_per_buf) (NUM_HW_PIC_BUF * (size_per_buf)) 82 83#define H264_CABAC_HDR_RATIO_HD_TOT 1 84#define H264_CABAC_RES_RATIO_HD_TOT 3 85 86/* 87 * Some content need more bin buffer, but limit buffer 88 * size for high resolution 89 */ 90#define NUM_SLIST_BUF_H264 (256 + 32) 91#define SIZE_SLIST_BUF_H264 512 92#define LCU_MAX_SIZE_PELS 64 93#define LCU_MIN_SIZE_PELS 16 94#define SIZE_SEI_USERDATA 4096 95 96#define H265D_MAX_SLICE 600 97#define SIZE_H265D_HW_PIC_T SIZE_H264D_HW_PIC_T 98#define SIZE_H265D_BSE_CMD_PER_BUF (16 * sizeof(u32)) 99#define SIZE_H265D_VPP_CMD_PER_BUF 256 100 101#define SIZE_H265D_LB_FE_TOP_DATA(width, height) \ 102 (MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * (ALIGN(width, 64) + 8) * 2) 103 104#define SIZE_H265D_LB_FE_TOP_CTRL(width, height) \ 105 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * \ 106 (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) 107 108#define SIZE_H265D_LB_FE_LEFT_CTRL(width, height) \ 109 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * \ 110 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) 111 112#define SIZE_H265D_LB_SE_TOP_CTRL(width, height) \ 113 ((LCU_MAX_SIZE_PELS / 8 * (128 / 8)) * (((width) + 15) >> 4)) 114 115static inline u32 size_h265d_lb_se_left_ctrl(u32 width, u32 height) 116{ 117 u32 x, y, z; 118 119 x = ((height + 16 - 1) / 8) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 120 y = ((height + 32 - 1) / 8) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE; 121 z = ((height + 64 - 1) / 8) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE; 122 123 return max3(x, y, z); 124} 125 126#define SIZE_H265D_LB_PE_TOP_DATA(width, height) \ 127 (MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * \ 128 (ALIGN(width, LCU_MIN_SIZE_PELS) / LCU_MIN_SIZE_PELS)) 129 130#define SIZE_H265D_LB_VSP_TOP(width, height) ((((width) + 63) >> 6) * 128) 131 132#define SIZE_H265D_LB_VSP_LEFT(width, height) ((((height) + 63) >> 6) * 128) 133 134#define SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height) \ 135 SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height) 136 137#define SIZE_H265D_QP(width, height) SIZE_H264D_QP(width, height) 138 139#define H265_CABAC_HDR_RATIO_HD_TOT 2 140#define H265_CABAC_RES_RATIO_HD_TOT 2 141 142/* 143 * Some content need more bin buffer, but limit buffer size 144 * for high resolution 145 */ 146#define SIZE_SLIST_BUF_H265 BIT(10) 147#define NUM_SLIST_BUF_H265 (80 + 20) 148#define H265_NUM_TILE_COL 32 149#define H265_NUM_TILE_ROW 128 150#define H265_NUM_TILE (H265_NUM_TILE_ROW * H265_NUM_TILE_COL + 1) 151 152static inline u32 size_vpxd_lb_fe_left_ctrl(u32 width, u32 height) 153{ 154 u32 x, y, z; 155 156 x = ((height + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 157 y = ((height + 31) >> 5) * MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE; 158 z = ((height + 63) >> 6) * MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE; 159 160 return max3(x, y, z); 161} 162 163#define SIZE_VPXD_LB_FE_TOP_CTRL(width, height) \ 164 (((ALIGN(width, 64) + 8) * 10 * 2)) /* small line */ 165#define SIZE_VPXD_LB_SE_TOP_CTRL(width, height) \ 166 ((((width) + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE) 167 168static inline u32 size_vpxd_lb_se_left_ctrl(u32 width, u32 height) 169{ 170 u32 x, y, z; 171 172 x = ((height + 15) >> 4) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 173 y = ((height + 31) >> 5) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE; 174 z = ((height + 63) >> 6) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE; 175 176 return max3(x, y, z); 177} 178 179#define SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height) \ 180 ALIGN((ALIGN(height, 16) / (4 / 2)) * 64, 32) 181#define SIZE_VP8D_LB_FE_TOP_DATA(width, height) \ 182 ((ALIGN(width, 16) + 8) * 10 * 2) 183#define SIZE_VP9D_LB_FE_TOP_DATA(width, height) \ 184 ((ALIGN(ALIGN(width, 16), 64) + 8) * 10 * 2) 185#define SIZE_VP8D_LB_PE_TOP_DATA(width, height) \ 186 ((ALIGN(width, 16) >> 4) * 64) 187#define SIZE_VP9D_LB_PE_TOP_DATA(width, height) \ 188 ((ALIGN(ALIGN(width, 16), 64) >> 6) * 176) 189#define SIZE_VP8D_LB_VSP_TOP(width, height) \ 190 (((ALIGN(width, 16) >> 4) * 64 / 2) + 256) 191#define SIZE_VP9D_LB_VSP_TOP(width, height) \ 192 (((ALIGN(ALIGN(width, 16), 64) >> 6) * 64 * 8) + 256) 193 194#define HFI_IRIS2_VP9D_COMV_SIZE \ 195 ((((8192 + 63) >> 6) * ((4320 + 63) >> 6) * 8 * 8 * 2 * 8)) 196 197#define VPX_DECODER_FRAME_CONCURENCY_LVL 2 198#define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM 1 199#define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN 2 200#define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM 3 201#define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN 2 202 203#define VP8_NUM_FRAME_INFO_BUF (5 + 1) 204#define VP9_NUM_FRAME_INFO_BUF 32 205#define VP8_NUM_PROBABILITY_TABLE_BUF VP8_NUM_FRAME_INFO_BUF 206#define VP9_NUM_PROBABILITY_TABLE_BUF (VP9_NUM_FRAME_INFO_BUF + 4) 207#define VP8_PROB_TABLE_SIZE 3840 208#define VP9_PROB_TABLE_SIZE 3840 209 210#define VP9_UDC_HEADER_BUF_SIZE (3 * 128) 211#define MAX_SUPERFRAME_HEADER_LEN 34 212#define CCE_TILE_OFFSET_SIZE ALIGN(32 * 4 * 4, 32) 213 214#define QMATRIX_SIZE (sizeof(u32) * 128 + 256) 215#define MP2D_QPDUMP_SIZE 115200 216#define HFI_IRIS2_ENC_PERSIST_SIZE 204800 217#define HFI_MAX_COL_FRAME 6 218#define HFI_VENUS_VENC_TRE_WB_BUFF_SIZE (65 << 4) /* in Bytes */ 219#define HFI_VENUS_VENC_DB_LINE_BUFF_PER_MB 512 220#define HFI_VENUS_VPPSG_MAX_REGISTERS 2048 221#define HFI_VENUS_WIDTH_ALIGNMENT 128 222#define HFI_VENUS_WIDTH_TEN_BIT_ALIGNMENT 192 223#define HFI_VENUS_HEIGHT_ALIGNMENT 32 224 225#define SYSTEM_LAL_TILE10 192 226#define NUM_MBS_720P (((1280 + 15) >> 4) * ((720 + 15) >> 4)) 227#define NUM_MBS_4K (((4096 + 15) >> 4) * ((2304 + 15) >> 4)) 228#define MB_SIZE_IN_PIXEL (16 * 16) 229#define HDR10PLUS_PAYLOAD_SIZE 1024 230#define HDR10_HIST_EXTRADATA_SIZE 4096 231 232static u32 size_vpss_lb(u32 width, u32 height, u32 num_vpp_pipes) 233{ 234 u32 vpss_4tap_top_buffer_size, vpss_div2_top_buffer_size; 235 u32 vpss_4tap_left_buffer_size, vpss_div2_left_buffer_size; 236 u32 opb_wr_top_line_luma_buf_size, opb_wr_top_line_chroma_buf_size; 237 u32 opb_lb_wr_llb_y_buffer_size, opb_lb_wr_llb_uv_buffer_size; 238 u32 macrotiling_size; 239 u32 size = 0; 240 241 vpss_4tap_top_buffer_size = 0; 242 vpss_div2_top_buffer_size = 0; 243 vpss_4tap_left_buffer_size = 0; 244 vpss_div2_left_buffer_size = 0; 245 246 macrotiling_size = 32; 247 opb_wr_top_line_luma_buf_size = 248 ALIGN(width, macrotiling_size) / macrotiling_size * 256; 249 opb_wr_top_line_luma_buf_size = 250 ALIGN(opb_wr_top_line_luma_buf_size, HFI_DMA_ALIGNMENT) + 251 (MAX_TILE_COLUMNS - 1) * 256; 252 opb_wr_top_line_luma_buf_size = 253 max(opb_wr_top_line_luma_buf_size, (32 * ALIGN(height, 16))); 254 opb_wr_top_line_chroma_buf_size = opb_wr_top_line_luma_buf_size; 255 opb_lb_wr_llb_y_buffer_size = ALIGN((ALIGN(height, 16) / 2) * 64, 32); 256 opb_lb_wr_llb_uv_buffer_size = opb_lb_wr_llb_y_buffer_size; 257 size = num_vpp_pipes * 258 2 * (vpss_4tap_top_buffer_size + vpss_div2_top_buffer_size) + 259 2 * (vpss_4tap_left_buffer_size + vpss_div2_left_buffer_size) + 260 opb_wr_top_line_luma_buf_size + 261 opb_wr_top_line_chroma_buf_size + 262 opb_lb_wr_llb_uv_buffer_size + 263 opb_lb_wr_llb_y_buffer_size; 264 265 return size; 266} 267 268static u32 size_h264d_hw_bin_buffer(u32 width, u32 height) 269{ 270 u32 size_yuv, size_bin_hdr, size_bin_res; 271 u32 size = 0; 272 u32 product; 273 274 product = width * height; 275 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ? 276 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1); 277 278 size_bin_hdr = size_yuv * H264_CABAC_HDR_RATIO_HD_TOT; 279 size_bin_res = size_yuv * H264_CABAC_RES_RATIO_HD_TOT; 280 size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT); 281 size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT); 282 size = size_bin_hdr + size_bin_res; 283 284 return size; 285} 286 287static u32 h264d_scratch_size(u32 width, u32 height, bool is_interlaced) 288{ 289 u32 aligned_width = ALIGN(width, 16); 290 u32 aligned_height = ALIGN(height, 16); 291 u32 size = 0; 292 293 if (!is_interlaced) 294 size = size_h264d_hw_bin_buffer(aligned_width, aligned_height); 295 296 return size; 297} 298 299static u32 size_h265d_hw_bin_buffer(u32 width, u32 height) 300{ 301 u32 size_yuv, size_bin_hdr, size_bin_res; 302 u32 size = 0; 303 u32 product; 304 305 product = width * height; 306 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ? 307 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1); 308 size_bin_hdr = size_yuv * H265_CABAC_HDR_RATIO_HD_TOT; 309 size_bin_res = size_yuv * H265_CABAC_RES_RATIO_HD_TOT; 310 size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT); 311 size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT); 312 size = size_bin_hdr + size_bin_res; 313 314 return size; 315} 316 317static u32 h265d_scratch_size(u32 width, u32 height, bool is_interlaced) 318{ 319 u32 aligned_width = ALIGN(width, 16); 320 u32 aligned_height = ALIGN(height, 16); 321 u32 size = 0; 322 323 if (!is_interlaced) 324 size = size_h265d_hw_bin_buffer(aligned_width, aligned_height); 325 326 return size; 327} 328 329static u32 vpxd_scratch_size(u32 width, u32 height, bool is_interlaced) 330{ 331 u32 aligned_width = ALIGN(width, 16); 332 u32 aligned_height = ALIGN(height, 16); 333 u32 size_yuv = aligned_width * aligned_height * 3 / 2; 334 u32 size = 0; 335 336 if (!is_interlaced) { 337 u32 binbuffer1_size, binbufer2_size; 338 339 binbuffer1_size = max_t(u32, size_yuv, 340 ((BIN_BUFFER_THRESHOLD * 3) >> 1)); 341 binbuffer1_size *= VPX_DECODER_FRAME_CONCURENCY_LVL * 342 VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM / 343 VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN; 344 binbufer2_size = max_t(u32, size_yuv, 345 ((BIN_BUFFER_THRESHOLD * 3) >> 1)); 346 binbufer2_size *= VPX_DECODER_FRAME_CONCURENCY_LVL * 347 VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM / 348 VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN; 349 size = ALIGN(binbuffer1_size + binbufer2_size, 350 HFI_DMA_ALIGNMENT); 351 } 352 353 return size; 354} 355 356static u32 mpeg2d_scratch_size(u32 width, u32 height, bool is_interlaced) 357{ 358 return 0; 359} 360 361static u32 calculate_enc_output_frame_size(u32 width, u32 height, u32 rc_type) 362{ 363 u32 aligned_width, aligned_height; 364 u32 mbs_per_frame; 365 u32 frame_size; 366 367 /* 368 * Encoder output size calculation: 32 Align width/height 369 * For resolution < 720p : YUVsize * 4 370 * For resolution > 720p & <= 4K : YUVsize / 2 371 * For resolution > 4k : YUVsize / 4 372 * Initially frame_size = YUVsize * 2; 373 */ 374 aligned_width = ALIGN(width, 32); 375 aligned_height = ALIGN(height, 32); 376 mbs_per_frame = (ALIGN(aligned_height, 16) * 377 ALIGN(aligned_width, 16)) / 256; 378 frame_size = width * height * 3; 379 380 if (mbs_per_frame < NUM_MBS_720P) 381 frame_size = frame_size << 1; 382 else if (mbs_per_frame <= NUM_MBS_4K) 383 frame_size = frame_size >> 2; 384 else 385 frame_size = frame_size >> 3; 386 387 if (rc_type == HFI_RATE_CONTROL_OFF || rc_type == HFI_RATE_CONTROL_CQ) 388 frame_size = frame_size << 1; 389 390 /* 391 * In case of opaque color format bitdepth will be known 392 * with first ETB, buffers allocated already with 8 bit 393 * won't be sufficient for 10 bit 394 * calculate size considering 10-bit by default 395 * For 10-bit cases size = size * 1.25 396 */ 397 frame_size *= 5; 398 frame_size /= 4; 399 400 return ALIGN(frame_size, SZ_4K); 401} 402 403static u32 calculate_enc_scratch_size(u32 width, u32 height, u32 work_mode, 404 u32 lcu_size, u32 num_vpp_pipes, 405 u32 rc_type) 406{ 407 u32 aligned_width, aligned_height, bitstream_size; 408 u32 total_bitbin_buffers, size_single_pipe, bitbin_size; 409 u32 sao_bin_buffer_size, padded_bin_size, size; 410 411 aligned_width = ALIGN(width, lcu_size); 412 aligned_height = ALIGN(height, lcu_size); 413 bitstream_size = 414 calculate_enc_output_frame_size(width, height, rc_type); 415 416 bitstream_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT); 417 418 if (work_mode == VIDC_WORK_MODE_2) { 419 total_bitbin_buffers = 3; 420 bitbin_size = bitstream_size * 17 / 10; 421 bitbin_size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT); 422 } else { 423 total_bitbin_buffers = 1; 424 bitstream_size = aligned_width * aligned_height * 3; 425 bitbin_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT); 426 } 427 428 if (num_vpp_pipes > 2) 429 size_single_pipe = bitbin_size / 2; 430 else 431 size_single_pipe = bitbin_size; 432 433 size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT); 434 sao_bin_buffer_size = 435 (64 * (((width + 32) * (height + 32)) >> 10)) + 384; 436 padded_bin_size = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT); 437 size_single_pipe = sao_bin_buffer_size + padded_bin_size; 438 size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT); 439 bitbin_size = size_single_pipe * num_vpp_pipes; 440 size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT) * 441 total_bitbin_buffers + 512; 442 443 return size; 444} 445 446static u32 h264e_scratch_size(u32 width, u32 height, u32 work_mode, 447 u32 num_vpp_pipes, u32 rc_type) 448{ 449 return calculate_enc_scratch_size(width, height, work_mode, 16, 450 num_vpp_pipes, rc_type); 451} 452 453static u32 h265e_scratch_size(u32 width, u32 height, u32 work_mode, 454 u32 num_vpp_pipes, u32 rc_type) 455{ 456 return calculate_enc_scratch_size(width, height, work_mode, 32, 457 num_vpp_pipes, rc_type); 458} 459 460static u32 vp8e_scratch_size(u32 width, u32 height, u32 work_mode, 461 u32 num_vpp_pipes, u32 rc_type) 462{ 463 return calculate_enc_scratch_size(width, height, work_mode, 16, 464 num_vpp_pipes, rc_type); 465} 466 467static u32 hfi_iris2_h264d_comv_size(u32 width, u32 height, 468 u32 yuv_buf_min_count) 469{ 470 u32 frame_width_in_mbs = ((width + 15) >> 4); 471 u32 frame_height_in_mbs = ((height + 15) >> 4); 472 u32 col_mv_aligned_width = (frame_width_in_mbs << 7); 473 u32 col_zero_aligned_width = (frame_width_in_mbs << 2); 474 u32 col_zero_size = 0, size_colloc = 0, comv_size = 0; 475 476 col_mv_aligned_width = ALIGN(col_mv_aligned_width, 16); 477 col_zero_aligned_width = ALIGN(col_zero_aligned_width, 16); 478 col_zero_size = 479 col_zero_aligned_width * ((frame_height_in_mbs + 1) >> 1); 480 col_zero_size = ALIGN(col_zero_size, 64); 481 col_zero_size <<= 1; 482 col_zero_size = ALIGN(col_zero_size, 512); 483 size_colloc = col_mv_aligned_width * ((frame_height_in_mbs + 1) >> 1); 484 size_colloc = ALIGN(size_colloc, 64); 485 size_colloc <<= 1; 486 size_colloc = ALIGN(size_colloc, 512); 487 size_colloc += (col_zero_size + SIZE_H264D_BUFTAB_T * 2); 488 comv_size = size_colloc * yuv_buf_min_count; 489 comv_size += 512; 490 491 return comv_size; 492} 493 494static u32 size_h264d_bse_cmd_buf(u32 height) 495{ 496 u32 aligned_height = ALIGN(height, 32); 497 498 return min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4), 499 H264D_MAX_SLICE) * SIZE_H264D_BSE_CMD_PER_BUF; 500} 501 502static u32 size_h264d_vpp_cmd_buf(u32 height) 503{ 504 u32 aligned_height = ALIGN(height, 32); 505 u32 size; 506 507 size = min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4), 508 H264D_MAX_SLICE) * SIZE_H264D_VPP_CMD_PER_BUF; 509 if (size > VPP_CMD_MAX_SIZE) 510 size = VPP_CMD_MAX_SIZE; 511 512 return size; 513} 514 515static u32 hfi_iris2_h264d_non_comv_size(u32 width, u32 height, 516 u32 num_vpp_pipes) 517{ 518 u32 size_bse, size_vpp, size; 519 520 size_bse = size_h264d_bse_cmd_buf(height); 521 size_vpp = size_h264d_vpp_cmd_buf(height); 522 size = 523 ALIGN(size_bse, HFI_DMA_ALIGNMENT) + 524 ALIGN(size_vpp, HFI_DMA_ALIGNMENT) + 525 ALIGN(SIZE_HW_PIC(SIZE_H264D_HW_PIC_T), HFI_DMA_ALIGNMENT) + 526 ALIGN(SIZE_H264D_LB_FE_TOP_DATA(width, height), 527 HFI_DMA_ALIGNMENT) + 528 ALIGN(SIZE_H264D_LB_FE_TOP_CTRL(width, height), 529 HFI_DMA_ALIGNMENT) + 530 ALIGN(SIZE_H264D_LB_FE_LEFT_CTRL(width, height), 531 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 532 ALIGN(SIZE_H264D_LB_SE_TOP_CTRL(width, height), 533 HFI_DMA_ALIGNMENT) + 534 ALIGN(SIZE_H264D_LB_SE_LEFT_CTRL(width, height), 535 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 536 ALIGN(SIZE_H264D_LB_PE_TOP_DATA(width, height), 537 HFI_DMA_ALIGNMENT) + 538 ALIGN(SIZE_H264D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 539 ALIGN(SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height), 540 HFI_DMA_ALIGNMENT) * 2 + 541 ALIGN(SIZE_H264D_QP(width, height), HFI_DMA_ALIGNMENT); 542 543 return ALIGN(size, HFI_DMA_ALIGNMENT); 544} 545 546static u32 size_h265d_bse_cmd_buf(u32 width, u32 height) 547{ 548 u32 size; 549 550 size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 551 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 552 NUM_HW_PIC_BUF; 553 size = min_t(u32, size, H265D_MAX_SLICE + 1); 554 size = 2 * size * SIZE_H265D_BSE_CMD_PER_BUF; 555 556 return ALIGN(size, HFI_DMA_ALIGNMENT); 557} 558 559static u32 size_h265d_vpp_cmd_buf(u32 width, u32 height) 560{ 561 u32 size; 562 563 size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 564 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 565 NUM_HW_PIC_BUF; 566 size = min_t(u32, size, H265D_MAX_SLICE + 1); 567 size = ALIGN(size, 4); 568 size = 2 * size * SIZE_H265D_VPP_CMD_PER_BUF; 569 size = ALIGN(size, HFI_DMA_ALIGNMENT); 570 if (size > VPP_CMD_MAX_SIZE) 571 size = VPP_CMD_MAX_SIZE; 572 573 return size; 574} 575 576static u32 hfi_iris2_h265d_comv_size(u32 width, u32 height, 577 u32 yuv_buf_count_min) 578{ 579 u32 size; 580 581 size = ALIGN(((((width + 15) >> 4) * ((height + 15) >> 4)) << 8), 512); 582 size *= yuv_buf_count_min; 583 size += 512; 584 585 return size; 586} 587 588static u32 hfi_iris2_h265d_non_comv_size(u32 width, u32 height, 589 u32 num_vpp_pipes) 590{ 591 u32 size_bse, size_vpp, size; 592 593 size_bse = size_h265d_bse_cmd_buf(width, height); 594 size_vpp = size_h265d_vpp_cmd_buf(width, height); 595 size = 596 ALIGN(size_bse, HFI_DMA_ALIGNMENT) + 597 ALIGN(size_vpp, HFI_DMA_ALIGNMENT) + 598 ALIGN(NUM_HW_PIC_BUF * 20 * 22 * 4, HFI_DMA_ALIGNMENT) + 599 ALIGN(2 * sizeof(u16) * 600 (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 601 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS), 602 HFI_DMA_ALIGNMENT) + 603 ALIGN(SIZE_HW_PIC(SIZE_H265D_HW_PIC_T), HFI_DMA_ALIGNMENT) + 604 ALIGN(SIZE_H265D_LB_FE_TOP_DATA(width, height), 605 HFI_DMA_ALIGNMENT) + 606 ALIGN(SIZE_H265D_LB_FE_TOP_CTRL(width, height), 607 HFI_DMA_ALIGNMENT) + 608 ALIGN(SIZE_H265D_LB_FE_LEFT_CTRL(width, height), 609 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 610 ALIGN(size_h265d_lb_se_left_ctrl(width, height), 611 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 612 ALIGN(SIZE_H265D_LB_SE_TOP_CTRL(width, height), 613 HFI_DMA_ALIGNMENT) + 614 ALIGN(SIZE_H265D_LB_PE_TOP_DATA(width, height), 615 HFI_DMA_ALIGNMENT) + 616 ALIGN(SIZE_H265D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 617 ALIGN(SIZE_H265D_LB_VSP_LEFT(width, height), 618 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 619 ALIGN(SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height), 620 HFI_DMA_ALIGNMENT) 621 * 4 + 622 ALIGN(SIZE_H265D_QP(width, height), HFI_DMA_ALIGNMENT); 623 624 return ALIGN(size, HFI_DMA_ALIGNMENT); 625} 626 627static u32 hfi_iris2_vp8d_comv_size(u32 width, u32 height, 628 u32 yuv_min_buf_count) 629{ 630 return (((width + 15) >> 4) * ((height + 15) >> 4) * 8 * 2); 631} 632 633static u32 h264d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 634 bool split_mode_enabled, u32 num_vpp_pipes) 635{ 636 u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0; 637 638 co_mv_size = hfi_iris2_h264d_comv_size(width, height, min_buf_count); 639 nonco_mv_size = hfi_iris2_h264d_non_comv_size(width, height, 640 num_vpp_pipes); 641 if (split_mode_enabled) 642 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 643 644 return co_mv_size + nonco_mv_size + vpss_lb_size; 645} 646 647static u32 h265d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 648 bool split_mode_enabled, u32 num_vpp_pipes) 649{ 650 u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0; 651 652 co_mv_size = hfi_iris2_h265d_comv_size(width, height, min_buf_count); 653 nonco_mv_size = hfi_iris2_h265d_non_comv_size(width, height, 654 num_vpp_pipes); 655 if (split_mode_enabled) 656 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 657 658 return co_mv_size + nonco_mv_size + vpss_lb_size + 659 HDR10_HIST_EXTRADATA_SIZE; 660} 661 662static u32 vp8d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 663 bool split_mode_enabled, u32 num_vpp_pipes) 664{ 665 u32 vpss_lb_size = 0, size; 666 667 size = hfi_iris2_vp8d_comv_size(width, height, 0); 668 size += ALIGN(size_vpxd_lb_fe_left_ctrl(width, height), 669 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 670 ALIGN(size_vpxd_lb_se_left_ctrl(width, height), 671 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 672 ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 673 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height), 674 HFI_DMA_ALIGNMENT) + 675 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height), 676 HFI_DMA_ALIGNMENT) + 677 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height), 678 HFI_DMA_ALIGNMENT) + 679 ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height), 680 HFI_DMA_ALIGNMENT) + 681 ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height), 682 HFI_DMA_ALIGNMENT); 683 if (split_mode_enabled) 684 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 685 686 size += vpss_lb_size; 687 688 return size; 689} 690 691static u32 vp9d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 692 bool split_mode_enabled, u32 num_vpp_pipes) 693{ 694 u32 vpss_lb_size = 0; 695 u32 size; 696 697 size = 698 ALIGN(size_vpxd_lb_fe_left_ctrl(width, height), 699 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 700 ALIGN(size_vpxd_lb_se_left_ctrl(width, height), 701 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 702 ALIGN(SIZE_VP9D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 703 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height), 704 HFI_DMA_ALIGNMENT) + 705 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height), 706 HFI_DMA_ALIGNMENT) + 707 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height), 708 HFI_DMA_ALIGNMENT) + 709 ALIGN(SIZE_VP9D_LB_PE_TOP_DATA(width, height), 710 HFI_DMA_ALIGNMENT) + 711 ALIGN(SIZE_VP9D_LB_FE_TOP_DATA(width, height), 712 HFI_DMA_ALIGNMENT); 713 714 if (split_mode_enabled) 715 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 716 717 size += vpss_lb_size + HDR10_HIST_EXTRADATA_SIZE; 718 719 return size; 720} 721 722static u32 mpeg2d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 723 bool split_mode_enabled, u32 num_vpp_pipes) 724{ 725 u32 vpss_lb_size = 0; 726 u32 size; 727 728 size = 729 ALIGN(size_vpxd_lb_fe_left_ctrl(width, height), 730 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 731 ALIGN(size_vpxd_lb_se_left_ctrl(width, height), 732 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 733 ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 734 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height), 735 HFI_DMA_ALIGNMENT) + 736 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height), 737 HFI_DMA_ALIGNMENT) + 738 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height), 739 HFI_DMA_ALIGNMENT) + 740 ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height), 741 HFI_DMA_ALIGNMENT) + 742 ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height), 743 HFI_DMA_ALIGNMENT); 744 745 if (split_mode_enabled) 746 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 747 748 size += vpss_lb_size; 749 750 return size; 751} 752 753static u32 754calculate_enc_scratch1_size(u32 width, u32 height, u32 lcu_size, u32 num_ref, 755 bool ten_bit, u32 num_vpp_pipes, bool is_h265) 756{ 757 u32 line_buf_ctrl_size, line_buf_data_size, leftline_buf_ctrl_size; 758 u32 line_buf_sde_size, sps_pps_slice_hdr, topline_buf_ctrl_size_FE; 759 u32 leftline_buf_ctrl_size_FE, line_buf_recon_pix_size; 760 u32 leftline_buf_recon_pix_size, lambda_lut_size, override_buffer_size; 761 u32 col_mv_buf_size, vpp_reg_buffer_size, ir_buffer_size; 762 u32 vpss_line_buf, leftline_buf_meta_recony, h265e_colrcbuf_size; 763 u32 h265e_framerc_bufsize, h265e_lcubitcnt_bufsize; 764 u32 h265e_lcubitmap_bufsize, se_stats_bufsize; 765 u32 bse_reg_buffer_size, bse_slice_cmd_buffer_size, slice_info_bufsize; 766 u32 line_buf_ctrl_size_buffid2, slice_cmd_buffer_size; 767 u32 width_lcu_num, height_lcu_num, width_coded, height_coded; 768 u32 frame_num_lcu, linebuf_meta_recon_uv, topline_bufsize_fe_1stg_sao; 769 u32 size, bit_depth, num_lcu_mb; 770 u32 vpss_line_buffer_size_1; 771 772 width_lcu_num = (width + lcu_size - 1) / lcu_size; 773 height_lcu_num = (height + lcu_size - 1) / lcu_size; 774 frame_num_lcu = width_lcu_num * height_lcu_num; 775 width_coded = width_lcu_num * lcu_size; 776 height_coded = height_lcu_num * lcu_size; 777 num_lcu_mb = (height_coded / lcu_size) * 778 ((width_coded + lcu_size * 8) / lcu_size); 779 slice_info_bufsize = 256 + (frame_num_lcu << 4); 780 slice_info_bufsize = ALIGN(slice_info_bufsize, HFI_DMA_ALIGNMENT); 781 line_buf_ctrl_size = ALIGN(width_coded, HFI_DMA_ALIGNMENT); 782 line_buf_ctrl_size_buffid2 = ALIGN(width_coded, HFI_DMA_ALIGNMENT); 783 784 bit_depth = ten_bit ? 10 : 8; 785 line_buf_data_size = 786 (((((bit_depth * width_coded + 1024) + 787 (HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 1) + 788 (((((bit_depth * width_coded + 1024) >> 1) + 789 (HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 2)); 790 791 leftline_buf_ctrl_size = is_h265 ? 792 ((height_coded + 32) / 32 * 4 * 16) : 793 ((height_coded + 15) / 16 * 5 * 16); 794 795 if (num_vpp_pipes > 1) { 796 leftline_buf_ctrl_size += 512; 797 leftline_buf_ctrl_size = 798 ALIGN(leftline_buf_ctrl_size, 512) * num_vpp_pipes; 799 } 800 801 leftline_buf_ctrl_size = 802 ALIGN(leftline_buf_ctrl_size, HFI_DMA_ALIGNMENT); 803 leftline_buf_recon_pix_size = (((ten_bit + 1) * 2 * 804 (height_coded) + HFI_DMA_ALIGNMENT) + 805 (HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) & 806 (~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1; 807 808 topline_buf_ctrl_size_FE = is_h265 ? (64 * (width_coded >> 5)) : 809 (HFI_DMA_ALIGNMENT + 16 * (width_coded >> 4)); 810 topline_buf_ctrl_size_FE = 811 ALIGN(topline_buf_ctrl_size_FE, HFI_DMA_ALIGNMENT); 812 leftline_buf_ctrl_size_FE = 813 (((HFI_DMA_ALIGNMENT + 64 * (height_coded >> 4)) + 814 (HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) & 815 (~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1) * 816 num_vpp_pipes; 817 leftline_buf_meta_recony = (HFI_DMA_ALIGNMENT + 64 * 818 ((height_coded) / (8 * (ten_bit ? 4 : 8)))); 819 leftline_buf_meta_recony = 820 ALIGN(leftline_buf_meta_recony, HFI_DMA_ALIGNMENT); 821 leftline_buf_meta_recony = leftline_buf_meta_recony * num_vpp_pipes; 822 linebuf_meta_recon_uv = (HFI_DMA_ALIGNMENT + 64 * 823 ((height_coded) / (4 * (ten_bit ? 4 : 8)))); 824 linebuf_meta_recon_uv = ALIGN(linebuf_meta_recon_uv, HFI_DMA_ALIGNMENT); 825 linebuf_meta_recon_uv = linebuf_meta_recon_uv * num_vpp_pipes; 826 line_buf_recon_pix_size = ((ten_bit ? 3 : 2) * width_coded); 827 line_buf_recon_pix_size = 828 ALIGN(line_buf_recon_pix_size, HFI_DMA_ALIGNMENT); 829 slice_cmd_buffer_size = ALIGN(20480, HFI_DMA_ALIGNMENT); 830 sps_pps_slice_hdr = 2048 + 4096; 831 col_mv_buf_size = is_h265 ? (16 * ((frame_num_lcu << 2) + 32)) : 832 (3 * 16 * (width_lcu_num * height_lcu_num + 32)); 833 col_mv_buf_size = 834 ALIGN(col_mv_buf_size, HFI_DMA_ALIGNMENT) * (num_ref + 1); 835 h265e_colrcbuf_size = 836 (((width_lcu_num + 7) >> 3) * 16 * 2 * height_lcu_num); 837 if (num_vpp_pipes > 1) 838 h265e_colrcbuf_size = 839 ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) * 840 num_vpp_pipes; 841 842 h265e_colrcbuf_size = ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) * 843 HFI_MAX_COL_FRAME; 844 h265e_framerc_bufsize = (is_h265) ? (256 + 16 * 845 (14 + (((height_coded >> 5) + 7) >> 3))) : 846 (256 + 16 * (14 + (((height_coded >> 4) + 7) >> 3))); 847 h265e_framerc_bufsize *= 6; /* multiply by max numtilescol */ 848 if (num_vpp_pipes > 1) 849 h265e_framerc_bufsize = 850 ALIGN(h265e_framerc_bufsize, HFI_DMA_ALIGNMENT) * 851 num_vpp_pipes; 852 853 h265e_framerc_bufsize = ALIGN(h265e_framerc_bufsize, 512) * 854 HFI_MAX_COL_FRAME; 855 h265e_lcubitcnt_bufsize = 256 + 4 * frame_num_lcu; 856 h265e_lcubitcnt_bufsize = 857 ALIGN(h265e_lcubitcnt_bufsize, HFI_DMA_ALIGNMENT); 858 h265e_lcubitmap_bufsize = 256 + (frame_num_lcu >> 3); 859 h265e_lcubitmap_bufsize = 860 ALIGN(h265e_lcubitmap_bufsize, HFI_DMA_ALIGNMENT); 861 line_buf_sde_size = 256 + 16 * (width_coded >> 4); 862 line_buf_sde_size = ALIGN(line_buf_sde_size, HFI_DMA_ALIGNMENT); 863 if ((width_coded * height_coded) > (4096 * 2160)) 864 se_stats_bufsize = 0; 865 else if ((width_coded * height_coded) > (1920 * 1088)) 866 se_stats_bufsize = (40 * 4 * frame_num_lcu + 256 + 256); 867 else 868 se_stats_bufsize = (1024 * frame_num_lcu + 256 + 256); 869 870 se_stats_bufsize = ALIGN(se_stats_bufsize, HFI_DMA_ALIGNMENT) * 2; 871 bse_slice_cmd_buffer_size = (((8192 << 2) + 7) & (~7)) * 6; 872 bse_reg_buffer_size = (((512 << 3) + 7) & (~7)) * 4; 873 vpp_reg_buffer_size = 874 (((HFI_VENUS_VPPSG_MAX_REGISTERS << 3) + 31) & (~31)) * 10; 875 lambda_lut_size = 256 * 11; 876 override_buffer_size = 16 * ((num_lcu_mb + 7) >> 3); 877 override_buffer_size = 878 ALIGN(override_buffer_size, HFI_DMA_ALIGNMENT) * 2; 879 ir_buffer_size = (((frame_num_lcu << 1) + 7) & (~7)) * 3; 880 vpss_line_buffer_size_1 = (((8192 >> 2) << 5) * num_vpp_pipes) + 64; 881 vpss_line_buf = 882 (((((max(width_coded, height_coded) + 3) >> 2) << 5) + 256) * 883 16) + vpss_line_buffer_size_1; 884 topline_bufsize_fe_1stg_sao = 16 * (width_coded >> 5); 885 topline_bufsize_fe_1stg_sao = 886 ALIGN(topline_bufsize_fe_1stg_sao, HFI_DMA_ALIGNMENT); 887 888 size = 889 line_buf_ctrl_size + line_buf_data_size + 890 line_buf_ctrl_size_buffid2 + leftline_buf_ctrl_size + 891 vpss_line_buf + col_mv_buf_size + topline_buf_ctrl_size_FE + 892 leftline_buf_ctrl_size_FE + line_buf_recon_pix_size + 893 leftline_buf_recon_pix_size + 894 leftline_buf_meta_recony + linebuf_meta_recon_uv + 895 h265e_colrcbuf_size + h265e_framerc_bufsize + 896 h265e_lcubitcnt_bufsize + h265e_lcubitmap_bufsize + 897 line_buf_sde_size + 898 topline_bufsize_fe_1stg_sao + override_buffer_size + 899 bse_reg_buffer_size + vpp_reg_buffer_size + sps_pps_slice_hdr + 900 slice_cmd_buffer_size + bse_slice_cmd_buffer_size + 901 ir_buffer_size + slice_info_bufsize + lambda_lut_size + 902 se_stats_bufsize + 1024; 903 904 return size; 905} 906 907static u32 h264e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit, 908 u32 num_vpp_pipes) 909{ 910 return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit, 911 num_vpp_pipes, false); 912} 913 914static u32 h265e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit, 915 u32 num_vpp_pipes) 916{ 917 return calculate_enc_scratch1_size(width, height, 32, num_ref, ten_bit, 918 num_vpp_pipes, true); 919} 920 921static u32 vp8e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit, 922 u32 num_vpp_pipes) 923{ 924 return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit, 925 1, false); 926} 927 928static u32 ubwc_metadata_plane_stride(u32 width, u32 metadata_stride_multi, 929 u32 tile_width_pels) 930{ 931 return ALIGN(((width + (tile_width_pels - 1)) / tile_width_pels), 932 metadata_stride_multi); 933} 934 935static u32 ubwc_metadata_plane_bufheight(u32 height, u32 metadata_height_multi, 936 u32 tile_height_pels) 937{ 938 return ALIGN(((height + (tile_height_pels - 1)) / tile_height_pels), 939 metadata_height_multi); 940} 941 942static u32 ubwc_metadata_plane_buffer_size(u32 metadata_stride, 943 u32 metadata_buf_height) 944{ 945 return ALIGN(metadata_stride * metadata_buf_height, SZ_4K); 946} 947 948static u32 enc_scratch2_size(u32 width, u32 height, u32 num_ref, bool ten_bit) 949{ 950 u32 aligned_width, aligned_height, chroma_height, ref_buf_height; 951 u32 luma_size, chroma_size; 952 u32 metadata_stride, meta_buf_height, meta_size_y, meta_size_c; 953 u32 ref_luma_stride_bytes, ref_chroma_height_bytes; 954 u32 ref_buf_size, ref_stride; 955 u32 size; 956 957 if (!ten_bit) { 958 aligned_height = ALIGN(height, HFI_VENUS_HEIGHT_ALIGNMENT); 959 chroma_height = height >> 1; 960 chroma_height = ALIGN(chroma_height, 961 HFI_VENUS_HEIGHT_ALIGNMENT); 962 aligned_width = ALIGN(width, HFI_VENUS_WIDTH_ALIGNMENT); 963 metadata_stride = 964 ubwc_metadata_plane_stride(width, 64, 965 NV12_UBWC_Y_TILE_WIDTH); 966 meta_buf_height = 967 ubwc_metadata_plane_bufheight(height, 16, 968 NV12_UBWC_Y_TILE_HEIGHT); 969 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 970 meta_buf_height); 971 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 972 meta_buf_height); 973 size = (aligned_height + chroma_height) * aligned_width + 974 meta_size_y + meta_size_c; 975 size = (size * (num_ref + 3)) + 4096; 976 } else { 977 ref_buf_height = (height + (HFI_VENUS_HEIGHT_ALIGNMENT - 1)) 978 & (~(HFI_VENUS_HEIGHT_ALIGNMENT - 1)); 979 ref_luma_stride_bytes = 980 ((width + SYSTEM_LAL_TILE10 - 1) / SYSTEM_LAL_TILE10) * 981 SYSTEM_LAL_TILE10; 982 ref_stride = 4 * (ref_luma_stride_bytes / 3); 983 ref_stride = (ref_stride + (128 - 1)) & (~(128 - 1)); 984 luma_size = ref_buf_height * ref_stride; 985 ref_chroma_height_bytes = (((height + 1) >> 1) + 986 (32 - 1)) & (~(32 - 1)); 987 chroma_size = ref_stride * ref_chroma_height_bytes; 988 luma_size = (luma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 989 chroma_size = (chroma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 990 ref_buf_size = luma_size + chroma_size; 991 metadata_stride = 992 ubwc_metadata_plane_stride(width, 993 METADATA_STRIDE_MULTIPLE, 994 TP10_UBWC_Y_TILE_WIDTH); 995 meta_buf_height = 996 ubwc_metadata_plane_bufheight(height, 997 METADATA_HEIGHT_MULTIPLE, 998 TP10_UBWC_Y_TILE_HEIGHT); 999 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 1000 meta_buf_height); 1001 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 1002 meta_buf_height); 1003 size = ref_buf_size + meta_size_y + meta_size_c; 1004 size = (size * (num_ref + 3)) + 4096; 1005 } 1006 1007 return size; 1008} 1009 1010static u32 enc_persist_size(void) 1011{ 1012 return HFI_IRIS2_ENC_PERSIST_SIZE; 1013} 1014 1015static u32 h264d_persist1_size(void) 1016{ 1017 return ALIGN((SIZE_SLIST_BUF_H264 * NUM_SLIST_BUF_H264 1018 + NUM_HW_PIC_BUF * SIZE_SEI_USERDATA), HFI_DMA_ALIGNMENT); 1019} 1020 1021static u32 h265d_persist1_size(void) 1022{ 1023 return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + H265_NUM_TILE 1024 * sizeof(u32)), HFI_DMA_ALIGNMENT); 1025} 1026 1027static u32 vp8d_persist1_size(void) 1028{ 1029 return ALIGN(VP8_NUM_PROBABILITY_TABLE_BUF * VP8_PROB_TABLE_SIZE, 1030 HFI_DMA_ALIGNMENT); 1031} 1032 1033static u32 vp9d_persist1_size(void) 1034{ 1035 return 1036 ALIGN(VP9_NUM_PROBABILITY_TABLE_BUF * VP9_PROB_TABLE_SIZE, 1037 HFI_DMA_ALIGNMENT) + 1038 ALIGN(HFI_IRIS2_VP9D_COMV_SIZE, HFI_DMA_ALIGNMENT) + 1039 ALIGN(MAX_SUPERFRAME_HEADER_LEN, HFI_DMA_ALIGNMENT) + 1040 ALIGN(VP9_UDC_HEADER_BUF_SIZE, HFI_DMA_ALIGNMENT) + 1041 ALIGN(VP9_NUM_FRAME_INFO_BUF * CCE_TILE_OFFSET_SIZE, 1042 HFI_DMA_ALIGNMENT); 1043} 1044 1045static u32 mpeg2d_persist1_size(void) 1046{ 1047 return QMATRIX_SIZE + MP2D_QPDUMP_SIZE; 1048} 1049 1050struct dec_bufsize_ops { 1051 u32 (*scratch)(u32 width, u32 height, bool is_interlaced); 1052 u32 (*scratch1)(u32 width, u32 height, u32 min_buf_count, 1053 bool split_mode_enabled, u32 num_vpp_pipes); 1054 u32 (*persist1)(void); 1055}; 1056 1057struct enc_bufsize_ops { 1058 u32 (*scratch)(u32 width, u32 height, u32 work_mode, u32 num_vpp_pipes, 1059 u32 rc_type); 1060 u32 (*scratch1)(u32 width, u32 height, u32 num_ref, bool ten_bit, 1061 u32 num_vpp_pipes); 1062 u32 (*scratch2)(u32 width, u32 height, u32 num_ref, bool ten_bit); 1063 u32 (*persist)(void); 1064}; 1065 1066static struct dec_bufsize_ops dec_h264_ops = { 1067 .scratch = h264d_scratch_size, 1068 .scratch1 = h264d_scratch1_size, 1069 .persist1 = h264d_persist1_size, 1070}; 1071 1072static struct dec_bufsize_ops dec_h265_ops = { 1073 .scratch = h265d_scratch_size, 1074 .scratch1 = h265d_scratch1_size, 1075 .persist1 = h265d_persist1_size, 1076}; 1077 1078static struct dec_bufsize_ops dec_vp8_ops = { 1079 .scratch = vpxd_scratch_size, 1080 .scratch1 = vp8d_scratch1_size, 1081 .persist1 = vp8d_persist1_size, 1082}; 1083 1084static struct dec_bufsize_ops dec_vp9_ops = { 1085 .scratch = vpxd_scratch_size, 1086 .scratch1 = vp9d_scratch1_size, 1087 .persist1 = vp9d_persist1_size, 1088}; 1089 1090static struct dec_bufsize_ops dec_mpeg2_ops = { 1091 .scratch = mpeg2d_scratch_size, 1092 .scratch1 = mpeg2d_scratch1_size, 1093 .persist1 = mpeg2d_persist1_size, 1094}; 1095 1096static struct enc_bufsize_ops enc_h264_ops = { 1097 .scratch = h264e_scratch_size, 1098 .scratch1 = h264e_scratch1_size, 1099 .scratch2 = enc_scratch2_size, 1100 .persist = enc_persist_size, 1101}; 1102 1103static struct enc_bufsize_ops enc_h265_ops = { 1104 .scratch = h265e_scratch_size, 1105 .scratch1 = h265e_scratch1_size, 1106 .scratch2 = enc_scratch2_size, 1107 .persist = enc_persist_size, 1108}; 1109 1110static struct enc_bufsize_ops enc_vp8_ops = { 1111 .scratch = vp8e_scratch_size, 1112 .scratch1 = vp8e_scratch1_size, 1113 .scratch2 = enc_scratch2_size, 1114 .persist = enc_persist_size, 1115}; 1116 1117static u32 1118calculate_dec_input_frame_size(u32 width, u32 height, u32 codec, 1119 u32 max_mbs_per_frame, u32 buffer_size_limit) 1120{ 1121 u32 frame_size, num_mbs; 1122 u32 div_factor = 1; 1123 u32 base_res_mbs = NUM_MBS_4K; 1124 1125 /* 1126 * Decoder input size calculation: 1127 * If clip is 8k buffer size is calculated for 8k : 8k mbs/4 1128 * For 8k cases we expect width/height to be set always. 1129 * In all other cases size is calculated for 4k: 1130 * 4k mbs for VP8/VP9 and 4k/2 for remaining codecs 1131 */ 1132 num_mbs = (ALIGN(height, 16) * ALIGN(width, 16)) / 256; 1133 if (num_mbs > NUM_MBS_4K) { 1134 div_factor = 4; 1135 base_res_mbs = max_mbs_per_frame; 1136 } else { 1137 base_res_mbs = NUM_MBS_4K; 1138 if (codec == V4L2_PIX_FMT_VP9) 1139 div_factor = 1; 1140 else 1141 div_factor = 2; 1142 } 1143 1144 frame_size = base_res_mbs * MB_SIZE_IN_PIXEL * 3 / 2 / div_factor; 1145 1146 /* multiply by 10/8 (1.25) to get size for 10 bit case */ 1147 if (codec == V4L2_PIX_FMT_VP9 || codec == V4L2_PIX_FMT_HEVC) 1148 frame_size = frame_size + (frame_size >> 2); 1149 1150 if (buffer_size_limit && buffer_size_limit < frame_size) 1151 frame_size = buffer_size_limit; 1152 1153 return ALIGN(frame_size, SZ_4K); 1154} 1155 1156static int output_buffer_count(u32 session_type, u32 codec) 1157{ 1158 u32 output_min_count; 1159 1160 if (session_type == VIDC_SESSION_TYPE_DEC) { 1161 switch (codec) { 1162 case V4L2_PIX_FMT_MPEG2: 1163 case V4L2_PIX_FMT_VP8: 1164 output_min_count = 6; 1165 break; 1166 case V4L2_PIX_FMT_VP9: 1167 output_min_count = 11; 1168 break; 1169 case V4L2_PIX_FMT_H264: 1170 case V4L2_PIX_FMT_HEVC: 1171 default: 1172 output_min_count = 18; 1173 break; 1174 } 1175 } else { 1176 output_min_count = MIN_ENC_OUTPUT_BUFFERS; 1177 } 1178 1179 return output_min_count; 1180} 1181 1182static int bufreq_dec(struct hfi_plat_buffers_params *params, u32 buftype, 1183 struct hfi_buffer_requirements *bufreq) 1184{ 1185 enum hfi_version version = params->version; 1186 u32 codec = params->codec; 1187 u32 width = params->width, height = params->height, out_min_count; 1188 struct dec_bufsize_ops *dec_ops; 1189 bool is_secondary_output = params->dec.is_secondary_output; 1190 bool is_interlaced = params->dec.is_interlaced; 1191 u32 max_mbs_per_frame = params->dec.max_mbs_per_frame; 1192 u32 buffer_size_limit = params->dec.buffer_size_limit; 1193 u32 num_vpp_pipes = params->num_vpp_pipes; 1194 1195 switch (codec) { 1196 case V4L2_PIX_FMT_H264: 1197 dec_ops = &dec_h264_ops; 1198 break; 1199 case V4L2_PIX_FMT_HEVC: 1200 dec_ops = &dec_h265_ops; 1201 break; 1202 case V4L2_PIX_FMT_VP8: 1203 dec_ops = &dec_vp8_ops; 1204 break; 1205 case V4L2_PIX_FMT_VP9: 1206 dec_ops = &dec_vp9_ops; 1207 break; 1208 case V4L2_PIX_FMT_MPEG2: 1209 dec_ops = &dec_mpeg2_ops; 1210 break; 1211 default: 1212 return -EINVAL; 1213 } 1214 1215 out_min_count = output_buffer_count(VIDC_SESSION_TYPE_DEC, codec); 1216 /* Max of driver and FW count */ 1217 out_min_count = max(out_min_count, bufreq->count_min); 1218 1219 bufreq->type = buftype; 1220 bufreq->region_size = 0; 1221 bufreq->count_min = 1; 1222 bufreq->count_actual = 1; 1223 bufreq->hold_count = 1; 1224 bufreq->contiguous = 1; 1225 bufreq->alignment = 256; 1226 1227 if (buftype == HFI_BUFFER_INPUT) { 1228 bufreq->count_min = MIN_INPUT_BUFFERS; 1229 bufreq->size = 1230 calculate_dec_input_frame_size(width, height, codec, 1231 max_mbs_per_frame, 1232 buffer_size_limit); 1233 } else if (buftype == HFI_BUFFER_OUTPUT || 1234 buftype == HFI_BUFFER_OUTPUT2) { 1235 bufreq->count_min = out_min_count; 1236 bufreq->size = 1237 venus_helper_get_framesz_raw(params->hfi_color_fmt, 1238 width, height); 1239 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) { 1240 bufreq->size = dec_ops->scratch(width, height, is_interlaced); 1241 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) { 1242 bufreq->size = dec_ops->scratch1(width, height, VB2_MAX_FRAME, 1243 is_secondary_output, 1244 num_vpp_pipes); 1245 } else if (buftype == HFI_BUFFER_INTERNAL_PERSIST_1) { 1246 bufreq->size = dec_ops->persist1(); 1247 } else { 1248 bufreq->size = 0; 1249 } 1250 1251 return 0; 1252} 1253 1254static int bufreq_enc(struct hfi_plat_buffers_params *params, u32 buftype, 1255 struct hfi_buffer_requirements *bufreq) 1256{ 1257 enum hfi_version version = params->version; 1258 struct enc_bufsize_ops *enc_ops; 1259 u32 width = params->width; 1260 u32 height = params->height; 1261 bool is_tenbit = params->enc.is_tenbit; 1262 u32 num_bframes = params->enc.num_b_frames; 1263 u32 codec = params->codec; 1264 u32 work_mode = params->enc.work_mode; 1265 u32 rc_type = params->enc.rc_type; 1266 u32 num_vpp_pipes = params->num_vpp_pipes; 1267 u32 num_ref; 1268 1269 switch (codec) { 1270 case V4L2_PIX_FMT_H264: 1271 enc_ops = &enc_h264_ops; 1272 break; 1273 case V4L2_PIX_FMT_HEVC: 1274 enc_ops = &enc_h265_ops; 1275 break; 1276 case V4L2_PIX_FMT_VP8: 1277 enc_ops = &enc_vp8_ops; 1278 break; 1279 default: 1280 return -EINVAL; 1281 } 1282 1283 num_ref = num_bframes > 0 ? num_bframes + 1 : 1; 1284 1285 bufreq->type = buftype; 1286 bufreq->region_size = 0; 1287 bufreq->count_min = 1; 1288 bufreq->count_actual = 1; 1289 bufreq->hold_count = 1; 1290 bufreq->contiguous = 1; 1291 bufreq->alignment = 256; 1292 1293 if (buftype == HFI_BUFFER_INPUT) { 1294 bufreq->count_min = MIN_INPUT_BUFFERS; 1295 bufreq->size = 1296 venus_helper_get_framesz_raw(params->hfi_color_fmt, 1297 width, height); 1298 } else if (buftype == HFI_BUFFER_OUTPUT || 1299 buftype == HFI_BUFFER_OUTPUT2) { 1300 bufreq->count_min = 1301 output_buffer_count(VIDC_SESSION_TYPE_ENC, codec); 1302 bufreq->size = calculate_enc_output_frame_size(width, height, 1303 rc_type); 1304 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) { 1305 bufreq->size = enc_ops->scratch(width, height, work_mode, 1306 num_vpp_pipes, rc_type); 1307 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) { 1308 bufreq->size = enc_ops->scratch1(width, height, num_ref, 1309 is_tenbit, num_vpp_pipes); 1310 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_2(version)) { 1311 bufreq->size = enc_ops->scratch2(width, height, num_ref, 1312 is_tenbit); 1313 } else if (buftype == HFI_BUFFER_INTERNAL_PERSIST) { 1314 bufreq->size = enc_ops->persist(); 1315 } else { 1316 bufreq->size = 0; 1317 } 1318 1319 return 0; 1320} 1321 1322int hfi_plat_bufreq_v6(struct hfi_plat_buffers_params *params, u32 session_type, 1323 u32 buftype, struct hfi_buffer_requirements *bufreq) 1324{ 1325 if (session_type == VIDC_SESSION_TYPE_DEC) 1326 return bufreq_dec(params, buftype, bufreq); 1327 else 1328 return bufreq_enc(params, buftype, bufreq); 1329}