venc_vp8_if.c (12889B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (c) 2016 MediaTek Inc. 4 * Author: Daniel Hsiao <daniel.hsiao@mediatek.com> 5 * PoChun Lin <pochun.lin@mediatek.com> 6 */ 7 8#include <linux/interrupt.h> 9#include <linux/kernel.h> 10#include <linux/slab.h> 11 12#include "../mtk_vcodec_drv.h" 13#include "../mtk_vcodec_util.h" 14#include "../mtk_vcodec_intr.h" 15#include "../mtk_vcodec_enc.h" 16#include "../mtk_vcodec_enc_pm.h" 17#include "../venc_drv_base.h" 18#include "../venc_ipi_msg.h" 19#include "../venc_vpu_if.h" 20 21#define VENC_BITSTREAM_FRAME_SIZE 0x0098 22#define VENC_BITSTREAM_HEADER_LEN 0x00e8 23 24/* This ac_tag is vp8 frame tag. */ 25#define MAX_AC_TAG_SIZE 10 26 27/* 28 * enum venc_vp8_vpu_work_buf - vp8 encoder buffer index 29 */ 30enum venc_vp8_vpu_work_buf { 31 VENC_VP8_VPU_WORK_BUF_LUMA, 32 VENC_VP8_VPU_WORK_BUF_LUMA2, 33 VENC_VP8_VPU_WORK_BUF_LUMA3, 34 VENC_VP8_VPU_WORK_BUF_CHROMA, 35 VENC_VP8_VPU_WORK_BUF_CHROMA2, 36 VENC_VP8_VPU_WORK_BUF_CHROMA3, 37 VENC_VP8_VPU_WORK_BUF_MV_INFO, 38 VENC_VP8_VPU_WORK_BUF_BS_HEADER, 39 VENC_VP8_VPU_WORK_BUF_PROB_BUF, 40 VENC_VP8_VPU_WORK_BUF_RC_INFO, 41 VENC_VP8_VPU_WORK_BUF_RC_CODE, 42 VENC_VP8_VPU_WORK_BUF_RC_CODE2, 43 VENC_VP8_VPU_WORK_BUF_RC_CODE3, 44 VENC_VP8_VPU_WORK_BUF_MAX, 45}; 46 47/* 48 * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration 49 * AP-W/R : AP is writer/reader on this item 50 * VPU-W/R: VPU is write/reader on this item 51 * @input_fourcc: input fourcc 52 * @bitrate: target bitrate (in bps) 53 * @pic_w: picture width. Picture size is visible stream resolution, in pixels, 54 * to be used for display purposes; must be smaller or equal to buffer 55 * size. 56 * @pic_h: picture height 57 * @buf_w: buffer width (with 16 alignment). Buffer size is stream resolution 58 * in pixels aligned to hardware requirements. 59 * @buf_h: buffer height (with 16 alignment) 60 * @gop_size: group of picture size (key frame) 61 * @framerate: frame rate in fps 62 * @ts_mode: temporal scalability mode (0: disable, 1: enable) 63 * support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps. 64 */ 65struct venc_vp8_vpu_config { 66 u32 input_fourcc; 67 u32 bitrate; 68 u32 pic_w; 69 u32 pic_h; 70 u32 buf_w; 71 u32 buf_h; 72 u32 gop_size; 73 u32 framerate; 74 u32 ts_mode; 75}; 76 77/* 78 * struct venc_vp8_vpu_buf - Structure for buffer information 79 * AP-W/R : AP is writer/reader on this item 80 * VPU-W/R: VPU is write/reader on this item 81 * @iova: IO virtual address 82 * @vpua: VPU side memory addr which is used by RC_CODE 83 * @size: buffer size (in bytes) 84 */ 85struct venc_vp8_vpu_buf { 86 u32 iova; 87 u32 vpua; 88 u32 size; 89}; 90 91/* 92 * struct venc_vp8_vsi - Structure for VPU driver control and info share 93 * AP-W/R : AP is writer/reader on this item 94 * VPU-W/R: VPU is write/reader on this item 95 * This structure is allocated in VPU side and shared to AP side. 96 * @config: vp8 encoder configuration 97 * @work_bufs: working buffer information in VPU side 98 * The work_bufs here is for storing the 'size' info shared to AP side. 99 * The similar item in struct venc_vp8_inst is for memory allocation 100 * in AP side. The AP driver will copy the 'size' from here to the one in 101 * struct mtk_vcodec_mem, then invoke mtk_vcodec_mem_alloc to allocate 102 * the buffer. After that, bypass the 'dma_addr' to the 'iova' field here for 103 * register setting in VPU side. 104 */ 105struct venc_vp8_vsi { 106 struct venc_vp8_vpu_config config; 107 struct venc_vp8_vpu_buf work_bufs[VENC_VP8_VPU_WORK_BUF_MAX]; 108}; 109 110/* 111 * struct venc_vp8_inst - vp8 encoder AP driver instance 112 * @hw_base: vp8 encoder hardware register base 113 * @work_bufs: working buffer 114 * @work_buf_allocated: working buffer allocated flag 115 * @frm_cnt: encoded frame count, it's used for I-frame judgement and 116 * reset when force intra cmd received. 117 * @ts_mode: temporal scalability mode (0: disable, 1: enable) 118 * support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps. 119 * @vpu_inst: VPU instance to exchange information between AP and VPU 120 * @vsi: driver structure allocated by VPU side and shared to AP side for 121 * control and info share 122 * @ctx: context for v4l2 layer integration 123 */ 124struct venc_vp8_inst { 125 void __iomem *hw_base; 126 struct mtk_vcodec_mem work_bufs[VENC_VP8_VPU_WORK_BUF_MAX]; 127 bool work_buf_allocated; 128 unsigned int frm_cnt; 129 unsigned int ts_mode; 130 struct venc_vpu_inst vpu_inst; 131 struct venc_vp8_vsi *vsi; 132 struct mtk_vcodec_ctx *ctx; 133}; 134 135static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr) 136{ 137 return readl(inst->hw_base + addr); 138} 139 140static void vp8_enc_free_work_buf(struct venc_vp8_inst *inst) 141{ 142 int i; 143 144 mtk_vcodec_debug_enter(inst); 145 146 /* Buffers need to be freed by AP. */ 147 for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) { 148 if (inst->work_bufs[i].size == 0) 149 continue; 150 mtk_vcodec_mem_free(inst->ctx, &inst->work_bufs[i]); 151 } 152 153 mtk_vcodec_debug_leave(inst); 154} 155 156static int vp8_enc_alloc_work_buf(struct venc_vp8_inst *inst) 157{ 158 int i; 159 int ret = 0; 160 struct venc_vp8_vpu_buf *wb = inst->vsi->work_bufs; 161 162 mtk_vcodec_debug_enter(inst); 163 164 for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) { 165 if (wb[i].size == 0) 166 continue; 167 /* 168 * This 'wb' structure is set by VPU side and shared to AP for 169 * buffer allocation and IO virtual addr mapping. For most of 170 * the buffers, AP will allocate the buffer according to 'size' 171 * field and store the IO virtual addr in 'iova' field. For the 172 * RC_CODEx buffers, they are pre-allocated in the VPU side 173 * because they are inside VPU SRAM, and save the VPU addr in 174 * the 'vpua' field. The AP will translate the VPU addr to the 175 * corresponding IO virtual addr and store in 'iova' field. 176 */ 177 inst->work_bufs[i].size = wb[i].size; 178 ret = mtk_vcodec_mem_alloc(inst->ctx, &inst->work_bufs[i]); 179 if (ret) { 180 mtk_vcodec_err(inst, 181 "cannot alloc work_bufs[%d]", i); 182 goto err_alloc; 183 } 184 /* 185 * This RC_CODEx is pre-allocated by VPU and saved in VPU addr. 186 * So we need use memcpy to copy RC_CODEx from VPU addr into IO 187 * virtual addr in 'iova' field for reg setting in VPU side. 188 */ 189 if (i == VENC_VP8_VPU_WORK_BUF_RC_CODE || 190 i == VENC_VP8_VPU_WORK_BUF_RC_CODE2 || 191 i == VENC_VP8_VPU_WORK_BUF_RC_CODE3) { 192 struct mtk_vcodec_fw *handler; 193 void *tmp_va; 194 195 handler = inst->vpu_inst.ctx->dev->fw_handler; 196 tmp_va = mtk_vcodec_fw_map_dm_addr(handler, 197 wb[i].vpua); 198 memcpy(inst->work_bufs[i].va, tmp_va, wb[i].size); 199 } 200 wb[i].iova = inst->work_bufs[i].dma_addr; 201 202 mtk_vcodec_debug(inst, 203 "work_bufs[%d] va=0x%p,iova=%pad,size=%zu", 204 i, inst->work_bufs[i].va, 205 &inst->work_bufs[i].dma_addr, 206 inst->work_bufs[i].size); 207 } 208 209 mtk_vcodec_debug_leave(inst); 210 211 return ret; 212 213err_alloc: 214 vp8_enc_free_work_buf(inst); 215 216 return ret; 217} 218 219static unsigned int vp8_enc_wait_venc_done(struct venc_vp8_inst *inst) 220{ 221 unsigned int irq_status = 0; 222 struct mtk_vcodec_ctx *ctx = (struct mtk_vcodec_ctx *)inst->ctx; 223 224 if (!mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 225 WAIT_INTR_TIMEOUT_MS, 0)) { 226 irq_status = ctx->irq_status; 227 mtk_vcodec_debug(inst, "isr return %x", irq_status); 228 } 229 return irq_status; 230} 231 232/* 233 * Compose ac_tag, bitstream header and bitstream payload into 234 * one bitstream buffer. 235 */ 236static int vp8_enc_compose_one_frame(struct venc_vp8_inst *inst, 237 struct mtk_vcodec_mem *bs_buf, 238 unsigned int *bs_size) 239{ 240 unsigned int not_key; 241 u32 bs_frm_size; 242 u32 bs_hdr_len; 243 unsigned int ac_tag_size; 244 u8 ac_tag[MAX_AC_TAG_SIZE]; 245 u32 tag; 246 247 bs_frm_size = vp8_enc_read_reg(inst, VENC_BITSTREAM_FRAME_SIZE); 248 bs_hdr_len = vp8_enc_read_reg(inst, VENC_BITSTREAM_HEADER_LEN); 249 250 /* if a frame is key frame, not_key is 0 */ 251 not_key = !inst->vpu_inst.is_key_frm; 252 tag = (bs_hdr_len << 5) | 0x10 | not_key; 253 ac_tag[0] = tag & 0xff; 254 ac_tag[1] = (tag >> 8) & 0xff; 255 ac_tag[2] = (tag >> 16) & 0xff; 256 257 /* key frame */ 258 if (not_key == 0) { 259 ac_tag_size = MAX_AC_TAG_SIZE; 260 ac_tag[3] = 0x9d; 261 ac_tag[4] = 0x01; 262 ac_tag[5] = 0x2a; 263 ac_tag[6] = inst->vsi->config.pic_w; 264 ac_tag[7] = inst->vsi->config.pic_w >> 8; 265 ac_tag[8] = inst->vsi->config.pic_h; 266 ac_tag[9] = inst->vsi->config.pic_h >> 8; 267 } else { 268 ac_tag_size = 3; 269 } 270 271 if (bs_buf->size < bs_hdr_len + bs_frm_size + ac_tag_size) { 272 mtk_vcodec_err(inst, "bitstream buf size is too small(%zu)", 273 bs_buf->size); 274 return -EINVAL; 275 } 276 277 /* 278 * (1) The vp8 bitstream header and body are generated by the HW vp8 279 * encoder separately at the same time. We cannot know the bitstream 280 * header length in advance. 281 * (2) From the vp8 spec, there is no stuffing byte allowed between the 282 * ac tag, bitstream header and bitstream body. 283 */ 284 memmove(bs_buf->va + bs_hdr_len + ac_tag_size, 285 bs_buf->va, bs_frm_size); 286 memcpy(bs_buf->va + ac_tag_size, 287 inst->work_bufs[VENC_VP8_VPU_WORK_BUF_BS_HEADER].va, 288 bs_hdr_len); 289 memcpy(bs_buf->va, ac_tag, ac_tag_size); 290 *bs_size = bs_frm_size + bs_hdr_len + ac_tag_size; 291 292 return 0; 293} 294 295static int vp8_enc_encode_frame(struct venc_vp8_inst *inst, 296 struct venc_frm_buf *frm_buf, 297 struct mtk_vcodec_mem *bs_buf, 298 unsigned int *bs_size) 299{ 300 int ret = 0; 301 unsigned int irq_status; 302 303 mtk_vcodec_debug(inst, "->frm_cnt=%d", inst->frm_cnt); 304 305 ret = vpu_enc_encode(&inst->vpu_inst, 0, frm_buf, bs_buf, NULL); 306 if (ret) 307 return ret; 308 309 irq_status = vp8_enc_wait_venc_done(inst); 310 if (irq_status != MTK_VENC_IRQ_STATUS_FRM) { 311 mtk_vcodec_err(inst, "irq_status=%d failed", irq_status); 312 return -EIO; 313 } 314 315 if (vp8_enc_compose_one_frame(inst, bs_buf, bs_size)) { 316 mtk_vcodec_err(inst, "vp8_enc_compose_one_frame failed"); 317 return -EINVAL; 318 } 319 320 inst->frm_cnt++; 321 mtk_vcodec_debug(inst, "<-size=%d key_frm=%d", *bs_size, 322 inst->vpu_inst.is_key_frm); 323 324 return ret; 325} 326 327static int vp8_enc_init(struct mtk_vcodec_ctx *ctx) 328{ 329 int ret = 0; 330 struct venc_vp8_inst *inst; 331 332 inst = kzalloc(sizeof(*inst), GFP_KERNEL); 333 if (!inst) 334 return -ENOMEM; 335 336 inst->ctx = ctx; 337 inst->vpu_inst.ctx = ctx; 338 inst->vpu_inst.id = IPI_VENC_VP8; 339 inst->hw_base = mtk_vcodec_get_reg_addr(inst->ctx, VENC_LT_SYS); 340 341 mtk_vcodec_debug_enter(inst); 342 343 ret = vpu_enc_init(&inst->vpu_inst); 344 345 inst->vsi = (struct venc_vp8_vsi *)inst->vpu_inst.vsi; 346 347 mtk_vcodec_debug_leave(inst); 348 349 if (ret) 350 kfree(inst); 351 else 352 ctx->drv_handle = inst; 353 354 return ret; 355} 356 357static int vp8_enc_encode(void *handle, 358 enum venc_start_opt opt, 359 struct venc_frm_buf *frm_buf, 360 struct mtk_vcodec_mem *bs_buf, 361 struct venc_done_result *result) 362{ 363 int ret = 0; 364 struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle; 365 struct mtk_vcodec_ctx *ctx = inst->ctx; 366 367 mtk_vcodec_debug_enter(inst); 368 369 enable_irq(ctx->dev->enc_irq); 370 371 switch (opt) { 372 case VENC_START_OPT_ENCODE_FRAME: 373 ret = vp8_enc_encode_frame(inst, frm_buf, bs_buf, 374 &result->bs_size); 375 if (ret) 376 goto encode_err; 377 result->is_key_frm = inst->vpu_inst.is_key_frm; 378 break; 379 380 default: 381 mtk_vcodec_err(inst, "opt not support:%d", opt); 382 ret = -EINVAL; 383 break; 384 } 385 386encode_err: 387 388 disable_irq(ctx->dev->enc_irq); 389 mtk_vcodec_debug_leave(inst); 390 391 return ret; 392} 393 394static int vp8_enc_set_param(void *handle, 395 enum venc_set_param_type type, 396 struct venc_enc_param *enc_prm) 397{ 398 int ret = 0; 399 struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle; 400 401 mtk_vcodec_debug(inst, "->type=%d", type); 402 403 switch (type) { 404 case VENC_SET_PARAM_ENC: 405 inst->vsi->config.input_fourcc = enc_prm->input_yuv_fmt; 406 inst->vsi->config.bitrate = enc_prm->bitrate; 407 inst->vsi->config.pic_w = enc_prm->width; 408 inst->vsi->config.pic_h = enc_prm->height; 409 inst->vsi->config.buf_w = enc_prm->buf_width; 410 inst->vsi->config.buf_h = enc_prm->buf_height; 411 inst->vsi->config.gop_size = enc_prm->gop_size; 412 inst->vsi->config.framerate = enc_prm->frm_rate; 413 inst->vsi->config.ts_mode = inst->ts_mode; 414 ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm); 415 if (ret) 416 break; 417 if (inst->work_buf_allocated) { 418 vp8_enc_free_work_buf(inst); 419 inst->work_buf_allocated = false; 420 } 421 ret = vp8_enc_alloc_work_buf(inst); 422 if (ret) 423 break; 424 inst->work_buf_allocated = true; 425 break; 426 427 /* 428 * VENC_SET_PARAM_TS_MODE must be called before VENC_SET_PARAM_ENC 429 */ 430 case VENC_SET_PARAM_TS_MODE: 431 inst->ts_mode = 1; 432 mtk_vcodec_debug(inst, "set ts_mode"); 433 break; 434 435 default: 436 ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm); 437 break; 438 } 439 440 mtk_vcodec_debug_leave(inst); 441 442 return ret; 443} 444 445static int vp8_enc_deinit(void *handle) 446{ 447 int ret = 0; 448 struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle; 449 450 mtk_vcodec_debug_enter(inst); 451 452 ret = vpu_enc_deinit(&inst->vpu_inst); 453 454 if (inst->work_buf_allocated) 455 vp8_enc_free_work_buf(inst); 456 457 mtk_vcodec_debug_leave(inst); 458 kfree(inst); 459 460 return ret; 461} 462 463const struct venc_common_if venc_vp8_if = { 464 .init = vp8_enc_init, 465 .encode = vp8_enc_encode, 466 .set_param = vp8_enc_set_param, 467 .deinit = vp8_enc_deinit, 468};