coda-bit.c (77171B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Coda multi-standard codec IP - BIT processor functions 4 * 5 * Copyright (C) 2012 Vista Silicon S.L. 6 * Javier Martin, <javier.martin@vista-silicon.com> 7 * Xavier Duret 8 * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix 9 */ 10 11#include <linux/clk.h> 12#include <linux/irqreturn.h> 13#include <linux/kernel.h> 14#include <linux/log2.h> 15#include <linux/platform_device.h> 16#include <linux/ratelimit.h> 17#include <linux/reset.h> 18#include <linux/slab.h> 19#include <linux/videodev2.h> 20 21#include <media/v4l2-common.h> 22#include <media/v4l2-ctrls.h> 23#include <media/v4l2-fh.h> 24#include <media/v4l2-mem2mem.h> 25#include <media/videobuf2-v4l2.h> 26#include <media/videobuf2-dma-contig.h> 27#include <media/videobuf2-vmalloc.h> 28 29#include "coda.h" 30#include "imx-vdoa.h" 31#define CREATE_TRACE_POINTS 32#include "trace.h" 33 34#define CODA_PARA_BUF_SIZE (10 * 1024) 35#define CODA7_PS_BUF_SIZE 0x28000 36#define CODA9_PS_SAVE_SIZE (512 * 1024) 37 38#define CODA_DEFAULT_GAMMA 4096 39#define CODA9_DEFAULT_GAMMA 24576 /* 0.75 * 32768 */ 40 41static void coda_free_bitstream_buffer(struct coda_ctx *ctx); 42 43static inline int coda_is_initialized(struct coda_dev *dev) 44{ 45 return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0; 46} 47 48static inline unsigned long coda_isbusy(struct coda_dev *dev) 49{ 50 return coda_read(dev, CODA_REG_BIT_BUSY); 51} 52 53static int coda_wait_timeout(struct coda_dev *dev) 54{ 55 unsigned long timeout = jiffies + msecs_to_jiffies(1000); 56 57 while (coda_isbusy(dev)) { 58 if (time_after(jiffies, timeout)) 59 return -ETIMEDOUT; 60 } 61 return 0; 62} 63 64static void coda_command_async(struct coda_ctx *ctx, int cmd) 65{ 66 struct coda_dev *dev = ctx->dev; 67 68 if (dev->devtype->product == CODA_HX4 || 69 dev->devtype->product == CODA_7541 || 70 dev->devtype->product == CODA_960) { 71 /* Restore context related registers to CODA */ 72 coda_write(dev, ctx->bit_stream_param, 73 CODA_REG_BIT_BIT_STREAM_PARAM); 74 coda_write(dev, ctx->frm_dis_flg, 75 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 76 coda_write(dev, ctx->frame_mem_ctrl, 77 CODA_REG_BIT_FRAME_MEM_CTRL); 78 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR); 79 } 80 81 if (dev->devtype->product == CODA_960) { 82 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR); 83 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN); 84 } 85 86 coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); 87 88 coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX); 89 coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD); 90 coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD); 91 92 trace_coda_bit_run(ctx, cmd); 93 94 coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND); 95} 96 97static int coda_command_sync(struct coda_ctx *ctx, int cmd) 98{ 99 struct coda_dev *dev = ctx->dev; 100 int ret; 101 102 lockdep_assert_held(&dev->coda_mutex); 103 104 coda_command_async(ctx, cmd); 105 ret = coda_wait_timeout(dev); 106 trace_coda_bit_done(ctx); 107 108 return ret; 109} 110 111int coda_hw_reset(struct coda_ctx *ctx) 112{ 113 struct coda_dev *dev = ctx->dev; 114 unsigned long timeout; 115 unsigned int idx; 116 int ret; 117 118 lockdep_assert_held(&dev->coda_mutex); 119 120 if (!dev->rstc) 121 return -ENOENT; 122 123 idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX); 124 125 if (dev->devtype->product == CODA_960) { 126 timeout = jiffies + msecs_to_jiffies(100); 127 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL); 128 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) { 129 if (time_after(jiffies, timeout)) 130 return -ETIME; 131 cpu_relax(); 132 } 133 } 134 135 ret = reset_control_reset(dev->rstc); 136 if (ret < 0) 137 return ret; 138 139 if (dev->devtype->product == CODA_960) 140 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL); 141 coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); 142 coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN); 143 ret = coda_wait_timeout(dev); 144 coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX); 145 146 return ret; 147} 148 149static void coda_kfifo_sync_from_device(struct coda_ctx *ctx) 150{ 151 struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; 152 struct coda_dev *dev = ctx->dev; 153 u32 rd_ptr; 154 155 rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); 156 kfifo->out = (kfifo->in & ~kfifo->mask) | 157 (rd_ptr - ctx->bitstream.paddr); 158 if (kfifo->out > kfifo->in) 159 kfifo->out -= kfifo->mask + 1; 160} 161 162static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx) 163{ 164 struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; 165 struct coda_dev *dev = ctx->dev; 166 u32 rd_ptr, wr_ptr; 167 168 rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask); 169 coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); 170 wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); 171 coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 172} 173 174static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx) 175{ 176 struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; 177 struct coda_dev *dev = ctx->dev; 178 u32 wr_ptr; 179 180 wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); 181 coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 182} 183 184static int coda_h264_bitstream_pad(struct coda_ctx *ctx, u32 size) 185{ 186 unsigned char *buf; 187 u32 n; 188 189 if (size < 6) 190 size = 6; 191 192 buf = kmalloc(size, GFP_KERNEL); 193 if (!buf) 194 return -ENOMEM; 195 196 coda_h264_filler_nal(size, buf); 197 n = kfifo_in(&ctx->bitstream_fifo, buf, size); 198 kfree(buf); 199 200 return (n < size) ? -ENOSPC : 0; 201} 202 203int coda_bitstream_flush(struct coda_ctx *ctx) 204{ 205 int ret; 206 207 if (ctx->inst_type != CODA_INST_DECODER || !ctx->use_bit) 208 return 0; 209 210 ret = coda_command_sync(ctx, CODA_COMMAND_DEC_BUF_FLUSH); 211 if (ret < 0) { 212 v4l2_err(&ctx->dev->v4l2_dev, "failed to flush bitstream\n"); 213 return ret; 214 } 215 216 kfifo_init(&ctx->bitstream_fifo, ctx->bitstream.vaddr, 217 ctx->bitstream.size); 218 coda_kfifo_sync_to_device_full(ctx); 219 220 return 0; 221} 222 223static int coda_bitstream_queue(struct coda_ctx *ctx, const u8 *buf, u32 size) 224{ 225 u32 n = kfifo_in(&ctx->bitstream_fifo, buf, size); 226 227 return (n < size) ? -ENOSPC : 0; 228} 229 230static u32 coda_buffer_parse_headers(struct coda_ctx *ctx, 231 struct vb2_v4l2_buffer *src_buf, 232 u32 payload) 233{ 234 u8 *vaddr = vb2_plane_vaddr(&src_buf->vb2_buf, 0); 235 u32 size = 0; 236 237 switch (ctx->codec->src_fourcc) { 238 case V4L2_PIX_FMT_MPEG2: 239 size = coda_mpeg2_parse_headers(ctx, vaddr, payload); 240 break; 241 case V4L2_PIX_FMT_MPEG4: 242 size = coda_mpeg4_parse_headers(ctx, vaddr, payload); 243 break; 244 default: 245 break; 246 } 247 248 return size; 249} 250 251static bool coda_bitstream_try_queue(struct coda_ctx *ctx, 252 struct vb2_v4l2_buffer *src_buf) 253{ 254 unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 255 u8 *vaddr = vb2_plane_vaddr(&src_buf->vb2_buf, 0); 256 int ret; 257 int i; 258 259 if (coda_get_bitstream_payload(ctx) + payload + 512 >= 260 ctx->bitstream.size) 261 return false; 262 263 if (!vaddr) { 264 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n"); 265 return true; 266 } 267 268 if (ctx->qsequence == 0 && payload < 512) { 269 /* 270 * Add padding after the first buffer, if it is too small to be 271 * fetched by the CODA, by repeating the headers. Without 272 * repeated headers, or the first frame already queued, decoder 273 * sequence initialization fails with error code 0x2000 on i.MX6 274 * or error code 0x1 on i.MX51. 275 */ 276 u32 header_size = coda_buffer_parse_headers(ctx, src_buf, 277 payload); 278 279 if (header_size) { 280 coda_dbg(1, ctx, "pad with %u-byte header\n", 281 header_size); 282 for (i = payload; i < 512; i += header_size) { 283 ret = coda_bitstream_queue(ctx, vaddr, 284 header_size); 285 if (ret < 0) { 286 v4l2_err(&ctx->dev->v4l2_dev, 287 "bitstream buffer overflow\n"); 288 return false; 289 } 290 if (ctx->dev->devtype->product == CODA_960) 291 break; 292 } 293 } else { 294 coda_dbg(1, ctx, 295 "could not parse header, sequence initialization might fail\n"); 296 } 297 298 /* Add padding before the first buffer, if it is too small */ 299 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264) 300 coda_h264_bitstream_pad(ctx, 512 - payload); 301 } 302 303 ret = coda_bitstream_queue(ctx, vaddr, payload); 304 if (ret < 0) { 305 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n"); 306 return false; 307 } 308 309 src_buf->sequence = ctx->qsequence++; 310 311 /* Sync read pointer to device */ 312 if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev)) 313 coda_kfifo_sync_to_device_write(ctx); 314 315 /* Set the stream-end flag after the last buffer is queued */ 316 if (src_buf->flags & V4L2_BUF_FLAG_LAST) 317 coda_bit_stream_end_flag(ctx); 318 ctx->hold = false; 319 320 return true; 321} 322 323void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list) 324{ 325 struct vb2_v4l2_buffer *src_buf; 326 struct coda_buffer_meta *meta; 327 u32 start; 328 329 lockdep_assert_held(&ctx->bitstream_mutex); 330 331 if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) 332 return; 333 334 while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) { 335 /* 336 * Only queue two JPEGs into the bitstream buffer to keep 337 * latency low. We need at least one complete buffer and the 338 * header of another buffer (for prescan) in the bitstream. 339 */ 340 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG && 341 ctx->num_metas > 1) 342 break; 343 344 if (ctx->num_internal_frames && 345 ctx->num_metas >= ctx->num_internal_frames) { 346 meta = list_first_entry(&ctx->buffer_meta_list, 347 struct coda_buffer_meta, list); 348 349 /* 350 * If we managed to fill in at least a full reorder 351 * window of buffers (num_internal_frames is a 352 * conservative estimate for this) and the bitstream 353 * prefetcher has at least 2 256 bytes periods beyond 354 * the first buffer to fetch, we can safely stop queuing 355 * in order to limit the decoder drain latency. 356 */ 357 if (coda_bitstream_can_fetch_past(ctx, meta->end)) 358 break; 359 } 360 361 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 362 363 /* Drop frames that do not start/end with a SOI/EOI markers */ 364 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG && 365 !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) { 366 v4l2_err(&ctx->dev->v4l2_dev, 367 "dropping invalid JPEG frame %d\n", 368 ctx->qsequence); 369 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 370 if (buffer_list) { 371 struct v4l2_m2m_buffer *m2m_buf; 372 373 m2m_buf = container_of(src_buf, 374 struct v4l2_m2m_buffer, 375 vb); 376 list_add_tail(&m2m_buf->list, buffer_list); 377 } else { 378 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); 379 } 380 continue; 381 } 382 383 /* Dump empty buffers */ 384 if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) { 385 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 386 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 387 continue; 388 } 389 390 /* Buffer start position */ 391 start = ctx->bitstream_fifo.kfifo.in; 392 393 if (coda_bitstream_try_queue(ctx, src_buf)) { 394 /* 395 * Source buffer is queued in the bitstream ringbuffer; 396 * queue the timestamp and mark source buffer as done 397 */ 398 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 399 400 meta = kmalloc(sizeof(*meta), GFP_KERNEL); 401 if (meta) { 402 meta->sequence = src_buf->sequence; 403 meta->timecode = src_buf->timecode; 404 meta->timestamp = src_buf->vb2_buf.timestamp; 405 meta->start = start; 406 meta->end = ctx->bitstream_fifo.kfifo.in; 407 meta->last = src_buf->flags & V4L2_BUF_FLAG_LAST; 408 if (meta->last) 409 coda_dbg(1, ctx, "marking last meta"); 410 spin_lock(&ctx->buffer_meta_lock); 411 list_add_tail(&meta->list, 412 &ctx->buffer_meta_list); 413 ctx->num_metas++; 414 spin_unlock(&ctx->buffer_meta_lock); 415 416 trace_coda_bit_queue(ctx, src_buf, meta); 417 } 418 419 if (buffer_list) { 420 struct v4l2_m2m_buffer *m2m_buf; 421 422 m2m_buf = container_of(src_buf, 423 struct v4l2_m2m_buffer, 424 vb); 425 list_add_tail(&m2m_buf->list, buffer_list); 426 } else { 427 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 428 } 429 } else { 430 break; 431 } 432 } 433} 434 435void coda_bit_stream_end_flag(struct coda_ctx *ctx) 436{ 437 struct coda_dev *dev = ctx->dev; 438 439 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; 440 441 /* If this context is currently running, update the hardware flag */ 442 if ((dev->devtype->product == CODA_960) && 443 coda_isbusy(dev) && 444 (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) { 445 coda_write(dev, ctx->bit_stream_param, 446 CODA_REG_BIT_BIT_STREAM_PARAM); 447 } 448} 449 450static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value) 451{ 452 struct coda_dev *dev = ctx->dev; 453 u32 *p = ctx->parabuf.vaddr; 454 455 if (dev->devtype->product == CODA_DX6) 456 p[index] = value; 457 else 458 p[index ^ 1] = value; 459} 460 461static inline int coda_alloc_context_buf(struct coda_ctx *ctx, 462 struct coda_aux_buf *buf, size_t size, 463 const char *name) 464{ 465 return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry); 466} 467 468 469static void coda_free_framebuffers(struct coda_ctx *ctx) 470{ 471 int i; 472 473 for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++) 474 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i].buf); 475} 476 477static int coda_alloc_framebuffers(struct coda_ctx *ctx, 478 struct coda_q_data *q_data, u32 fourcc) 479{ 480 struct coda_dev *dev = ctx->dev; 481 unsigned int ysize, ycbcr_size; 482 int ret; 483 int i; 484 485 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 || 486 ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 || 487 ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 || 488 ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4) 489 ysize = round_up(q_data->rect.width, 16) * 490 round_up(q_data->rect.height, 16); 491 else 492 ysize = round_up(q_data->rect.width, 8) * q_data->rect.height; 493 494 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 495 ycbcr_size = round_up(ysize, 4096) + ysize / 2; 496 else 497 ycbcr_size = ysize + ysize / 2; 498 499 /* Allocate frame buffers */ 500 for (i = 0; i < ctx->num_internal_frames; i++) { 501 size_t size = ycbcr_size; 502 char *name; 503 504 /* Add space for mvcol buffers */ 505 if (dev->devtype->product != CODA_DX6 && 506 (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 || 507 (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0))) 508 size += ysize / 4; 509 name = kasprintf(GFP_KERNEL, "fb%d", i); 510 if (!name) { 511 coda_free_framebuffers(ctx); 512 return -ENOMEM; 513 } 514 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i].buf, 515 size, name); 516 kfree(name); 517 if (ret < 0) { 518 coda_free_framebuffers(ctx); 519 return ret; 520 } 521 } 522 523 /* Register frame buffers in the parameter buffer */ 524 for (i = 0; i < ctx->num_internal_frames; i++) { 525 u32 y, cb, cr, mvcol; 526 527 /* Start addresses of Y, Cb, Cr planes */ 528 y = ctx->internal_frames[i].buf.paddr; 529 cb = y + ysize; 530 cr = y + ysize + ysize/4; 531 mvcol = y + ysize + ysize/4 + ysize/4; 532 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) { 533 cb = round_up(cb, 4096); 534 mvcol = cb + ysize/2; 535 cr = 0; 536 /* Packed 20-bit MSB of base addresses */ 537 /* YYYYYCCC, CCyyyyyc, cccc.... */ 538 y = (y & 0xfffff000) | cb >> 20; 539 cb = (cb & 0x000ff000) << 12; 540 } 541 coda_parabuf_write(ctx, i * 3 + 0, y); 542 coda_parabuf_write(ctx, i * 3 + 1, cb); 543 coda_parabuf_write(ctx, i * 3 + 2, cr); 544 545 if (dev->devtype->product == CODA_DX6) 546 continue; 547 548 /* mvcol buffer for h.264 and mpeg4 */ 549 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264) 550 coda_parabuf_write(ctx, 96 + i, mvcol); 551 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0) 552 coda_parabuf_write(ctx, 97, mvcol); 553 } 554 555 return 0; 556} 557 558static void coda_free_context_buffers(struct coda_ctx *ctx) 559{ 560 struct coda_dev *dev = ctx->dev; 561 562 coda_free_aux_buf(dev, &ctx->slicebuf); 563 coda_free_aux_buf(dev, &ctx->psbuf); 564 if (dev->devtype->product != CODA_DX6) 565 coda_free_aux_buf(dev, &ctx->workbuf); 566 coda_free_aux_buf(dev, &ctx->parabuf); 567} 568 569static int coda_alloc_context_buffers(struct coda_ctx *ctx, 570 struct coda_q_data *q_data) 571{ 572 struct coda_dev *dev = ctx->dev; 573 size_t size; 574 int ret; 575 576 if (!ctx->parabuf.vaddr) { 577 ret = coda_alloc_context_buf(ctx, &ctx->parabuf, 578 CODA_PARA_BUF_SIZE, "parabuf"); 579 if (ret < 0) 580 return ret; 581 } 582 583 if (dev->devtype->product == CODA_DX6) 584 return 0; 585 586 if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) { 587 /* worst case slice size */ 588 size = (DIV_ROUND_UP(q_data->rect.width, 16) * 589 DIV_ROUND_UP(q_data->rect.height, 16)) * 3200 / 8 + 512; 590 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size, 591 "slicebuf"); 592 if (ret < 0) 593 goto err; 594 } 595 596 if (!ctx->psbuf.vaddr && (dev->devtype->product == CODA_HX4 || 597 dev->devtype->product == CODA_7541)) { 598 ret = coda_alloc_context_buf(ctx, &ctx->psbuf, 599 CODA7_PS_BUF_SIZE, "psbuf"); 600 if (ret < 0) 601 goto err; 602 } 603 604 if (!ctx->workbuf.vaddr) { 605 size = dev->devtype->workbuf_size; 606 if (dev->devtype->product == CODA_960 && 607 q_data->fourcc == V4L2_PIX_FMT_H264) 608 size += CODA9_PS_SAVE_SIZE; 609 ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size, 610 "workbuf"); 611 if (ret < 0) 612 goto err; 613 } 614 615 return 0; 616 617err: 618 coda_free_context_buffers(ctx); 619 return ret; 620} 621 622static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf, 623 int header_code, u8 *header, int *size) 624{ 625 struct vb2_buffer *vb = &buf->vb2_buf; 626 struct coda_dev *dev = ctx->dev; 627 struct coda_q_data *q_data_src; 628 struct v4l2_rect *r; 629 size_t bufsize; 630 int ret; 631 int i; 632 633 if (dev->devtype->product == CODA_960) 634 memset(vb2_plane_vaddr(vb, 0), 0, 64); 635 636 coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0), 637 CODA_CMD_ENC_HEADER_BB_START); 638 bufsize = vb2_plane_size(vb, 0); 639 if (dev->devtype->product == CODA_960) 640 bufsize /= 1024; 641 coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE); 642 if (dev->devtype->product == CODA_960 && 643 ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 && 644 header_code == CODA_HEADER_H264_SPS) { 645 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 646 r = &q_data_src->rect; 647 648 if (r->width % 16 || r->height % 16) { 649 u32 crop_right = round_up(r->width, 16) - r->width; 650 u32 crop_bottom = round_up(r->height, 16) - r->height; 651 652 coda_write(dev, crop_right, 653 CODA9_CMD_ENC_HEADER_FRAME_CROP_H); 654 coda_write(dev, crop_bottom, 655 CODA9_CMD_ENC_HEADER_FRAME_CROP_V); 656 header_code |= CODA9_HEADER_FRAME_CROP; 657 } 658 } 659 coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE); 660 ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER); 661 if (ret < 0) { 662 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n"); 663 return ret; 664 } 665 666 if (dev->devtype->product == CODA_960) { 667 for (i = 63; i > 0; i--) 668 if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0) 669 break; 670 *size = i + 1; 671 } else { 672 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) - 673 coda_read(dev, CODA_CMD_ENC_HEADER_BB_START); 674 } 675 memcpy(header, vb2_plane_vaddr(vb, 0), *size); 676 677 return 0; 678} 679 680static u32 coda_slice_mode(struct coda_ctx *ctx) 681{ 682 int size, unit; 683 684 switch (ctx->params.slice_mode) { 685 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE: 686 default: 687 return 0; 688 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_MB: 689 size = ctx->params.slice_max_mb; 690 unit = 1; 691 break; 692 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_BYTES: 693 size = ctx->params.slice_max_bits; 694 unit = 0; 695 break; 696 } 697 698 return ((size & CODA_SLICING_SIZE_MASK) << CODA_SLICING_SIZE_OFFSET) | 699 ((unit & CODA_SLICING_UNIT_MASK) << CODA_SLICING_UNIT_OFFSET) | 700 ((1 & CODA_SLICING_MODE_MASK) << CODA_SLICING_MODE_OFFSET); 701} 702 703static int coda_enc_param_change(struct coda_ctx *ctx) 704{ 705 struct coda_dev *dev = ctx->dev; 706 u32 change_enable = 0; 707 u32 success; 708 int ret; 709 710 if (ctx->params.gop_size_changed) { 711 change_enable |= CODA_PARAM_CHANGE_RC_GOP; 712 coda_write(dev, ctx->params.gop_size, 713 CODA_CMD_ENC_PARAM_RC_GOP); 714 ctx->gopcounter = ctx->params.gop_size - 1; 715 ctx->params.gop_size_changed = false; 716 } 717 if (ctx->params.h264_intra_qp_changed) { 718 coda_dbg(1, ctx, "parameter change: intra Qp %u\n", 719 ctx->params.h264_intra_qp); 720 721 if (ctx->params.bitrate) { 722 change_enable |= CODA_PARAM_CHANGE_RC_INTRA_QP; 723 coda_write(dev, ctx->params.h264_intra_qp, 724 CODA_CMD_ENC_PARAM_RC_INTRA_QP); 725 } 726 ctx->params.h264_intra_qp_changed = false; 727 } 728 if (ctx->params.bitrate_changed) { 729 coda_dbg(1, ctx, "parameter change: bitrate %u kbit/s\n", 730 ctx->params.bitrate); 731 change_enable |= CODA_PARAM_CHANGE_RC_BITRATE; 732 coda_write(dev, ctx->params.bitrate, 733 CODA_CMD_ENC_PARAM_RC_BITRATE); 734 ctx->params.bitrate_changed = false; 735 } 736 if (ctx->params.framerate_changed) { 737 coda_dbg(1, ctx, "parameter change: frame rate %u/%u Hz\n", 738 ctx->params.framerate & 0xffff, 739 (ctx->params.framerate >> 16) + 1); 740 change_enable |= CODA_PARAM_CHANGE_RC_FRAME_RATE; 741 coda_write(dev, ctx->params.framerate, 742 CODA_CMD_ENC_PARAM_RC_FRAME_RATE); 743 ctx->params.framerate_changed = false; 744 } 745 if (ctx->params.intra_refresh_changed) { 746 coda_dbg(1, ctx, "parameter change: intra refresh MBs %u\n", 747 ctx->params.intra_refresh); 748 change_enable |= CODA_PARAM_CHANGE_INTRA_MB_NUM; 749 coda_write(dev, ctx->params.intra_refresh, 750 CODA_CMD_ENC_PARAM_INTRA_MB_NUM); 751 ctx->params.intra_refresh_changed = false; 752 } 753 if (ctx->params.slice_mode_changed) { 754 change_enable |= CODA_PARAM_CHANGE_SLICE_MODE; 755 coda_write(dev, coda_slice_mode(ctx), 756 CODA_CMD_ENC_PARAM_SLICE_MODE); 757 ctx->params.slice_mode_changed = false; 758 } 759 760 if (!change_enable) 761 return 0; 762 763 coda_write(dev, change_enable, CODA_CMD_ENC_PARAM_CHANGE_ENABLE); 764 765 ret = coda_command_sync(ctx, CODA_COMMAND_RC_CHANGE_PARAMETER); 766 if (ret < 0) 767 return ret; 768 769 success = coda_read(dev, CODA_RET_ENC_PARAM_CHANGE_SUCCESS); 770 if (success != 1) 771 coda_dbg(1, ctx, "parameter change failed: %u\n", success); 772 773 return 0; 774} 775 776static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size) 777{ 778 phys_addr_t ret; 779 780 size = round_up(size, 1024); 781 if (size > iram->remaining) 782 return 0; 783 iram->remaining -= size; 784 785 ret = iram->next_paddr; 786 iram->next_paddr += size; 787 788 return ret; 789} 790 791static void coda_setup_iram(struct coda_ctx *ctx) 792{ 793 struct coda_iram_info *iram_info = &ctx->iram_info; 794 struct coda_dev *dev = ctx->dev; 795 int w64, w128; 796 int mb_width; 797 int dbk_bits; 798 int bit_bits; 799 int ip_bits; 800 int me_bits; 801 802 memset(iram_info, 0, sizeof(*iram_info)); 803 iram_info->next_paddr = dev->iram.paddr; 804 iram_info->remaining = dev->iram.size; 805 806 if (!dev->iram.vaddr) 807 return; 808 809 switch (dev->devtype->product) { 810 case CODA_HX4: 811 dbk_bits = CODA7_USE_HOST_DBK_ENABLE; 812 bit_bits = CODA7_USE_HOST_BIT_ENABLE; 813 ip_bits = CODA7_USE_HOST_IP_ENABLE; 814 me_bits = CODA7_USE_HOST_ME_ENABLE; 815 break; 816 case CODA_7541: 817 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE; 818 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE; 819 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE; 820 me_bits = CODA7_USE_HOST_ME_ENABLE | CODA7_USE_ME_ENABLE; 821 break; 822 case CODA_960: 823 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE; 824 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE; 825 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE; 826 me_bits = 0; 827 break; 828 default: /* CODA_DX6 */ 829 return; 830 } 831 832 if (ctx->inst_type == CODA_INST_ENCODER) { 833 struct coda_q_data *q_data_src; 834 835 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 836 mb_width = DIV_ROUND_UP(q_data_src->rect.width, 16); 837 w128 = mb_width * 128; 838 w64 = mb_width * 64; 839 840 /* Prioritize in case IRAM is too small for everything */ 841 if (dev->devtype->product == CODA_HX4 || 842 dev->devtype->product == CODA_7541) { 843 iram_info->search_ram_size = round_up(mb_width * 16 * 844 36 + 2048, 1024); 845 iram_info->search_ram_paddr = coda_iram_alloc(iram_info, 846 iram_info->search_ram_size); 847 if (!iram_info->search_ram_paddr) { 848 pr_err("IRAM is smaller than the search ram size\n"); 849 goto out; 850 } 851 iram_info->axi_sram_use |= me_bits; 852 } 853 854 /* Only H.264BP and H.263P3 are considered */ 855 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64); 856 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64); 857 if (!iram_info->buf_dbk_c_use) 858 goto out; 859 iram_info->axi_sram_use |= dbk_bits; 860 861 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128); 862 if (!iram_info->buf_bit_use) 863 goto out; 864 iram_info->axi_sram_use |= bit_bits; 865 866 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128); 867 if (!iram_info->buf_ip_ac_dc_use) 868 goto out; 869 iram_info->axi_sram_use |= ip_bits; 870 871 /* OVL and BTP disabled for encoder */ 872 } else if (ctx->inst_type == CODA_INST_DECODER) { 873 struct coda_q_data *q_data_dst; 874 875 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 876 mb_width = DIV_ROUND_UP(q_data_dst->width, 16); 877 w128 = mb_width * 128; 878 879 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128); 880 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128); 881 if (!iram_info->buf_dbk_c_use) 882 goto out; 883 iram_info->axi_sram_use |= dbk_bits; 884 885 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128); 886 if (!iram_info->buf_bit_use) 887 goto out; 888 iram_info->axi_sram_use |= bit_bits; 889 890 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128); 891 if (!iram_info->buf_ip_ac_dc_use) 892 goto out; 893 iram_info->axi_sram_use |= ip_bits; 894 895 /* OVL and BTP unused as there is no VC1 support yet */ 896 } 897 898out: 899 if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE)) 900 coda_dbg(1, ctx, "IRAM smaller than needed\n"); 901 902 if (dev->devtype->product == CODA_HX4 || 903 dev->devtype->product == CODA_7541) { 904 /* TODO - Enabling these causes picture errors on CODA7541 */ 905 if (ctx->inst_type == CODA_INST_DECODER) { 906 /* fw 1.4.50 */ 907 iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | 908 CODA7_USE_IP_ENABLE); 909 } else { 910 /* fw 13.4.29 */ 911 iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | 912 CODA7_USE_HOST_DBK_ENABLE | 913 CODA7_USE_IP_ENABLE | 914 CODA7_USE_DBK_ENABLE); 915 } 916 } 917} 918 919static u32 coda_supported_firmwares[] = { 920 CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5), 921 CODA_FIRMWARE_VERNUM(CODA_HX4, 1, 4, 50), 922 CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50), 923 CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5), 924 CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 9), 925 CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10), 926 CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1), 927}; 928 929static bool coda_firmware_supported(u32 vernum) 930{ 931 int i; 932 933 for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++) 934 if (vernum == coda_supported_firmwares[i]) 935 return true; 936 return false; 937} 938 939int coda_check_firmware(struct coda_dev *dev) 940{ 941 u16 product, major, minor, release; 942 u32 data; 943 int ret; 944 945 ret = clk_prepare_enable(dev->clk_per); 946 if (ret) 947 goto err_clk_per; 948 949 ret = clk_prepare_enable(dev->clk_ahb); 950 if (ret) 951 goto err_clk_ahb; 952 953 coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM); 954 coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); 955 coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX); 956 coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD); 957 coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND); 958 if (coda_wait_timeout(dev)) { 959 v4l2_err(&dev->v4l2_dev, "firmware get command error\n"); 960 ret = -EIO; 961 goto err_run_cmd; 962 } 963 964 if (dev->devtype->product == CODA_960) { 965 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV); 966 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n", 967 data); 968 } 969 970 /* Check we are compatible with the loaded firmware */ 971 data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM); 972 product = CODA_FIRMWARE_PRODUCT(data); 973 major = CODA_FIRMWARE_MAJOR(data); 974 minor = CODA_FIRMWARE_MINOR(data); 975 release = CODA_FIRMWARE_RELEASE(data); 976 977 clk_disable_unprepare(dev->clk_per); 978 clk_disable_unprepare(dev->clk_ahb); 979 980 if (product != dev->devtype->product) { 981 v4l2_err(&dev->v4l2_dev, 982 "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n", 983 coda_product_name(dev->devtype->product), 984 coda_product_name(product), major, minor, release); 985 return -EINVAL; 986 } 987 988 v4l2_info(&dev->v4l2_dev, "Initialized %s.\n", 989 coda_product_name(product)); 990 991 if (coda_firmware_supported(data)) { 992 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n", 993 major, minor, release); 994 } else { 995 v4l2_warn(&dev->v4l2_dev, 996 "Unsupported firmware version: %u.%u.%u\n", 997 major, minor, release); 998 } 999 1000 return 0; 1001 1002err_run_cmd: 1003 clk_disable_unprepare(dev->clk_ahb); 1004err_clk_ahb: 1005 clk_disable_unprepare(dev->clk_per); 1006err_clk_per: 1007 return ret; 1008} 1009 1010static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc) 1011{ 1012 u32 cache_size, cache_config; 1013 1014 if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) { 1015 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */ 1016 cache_size = 0x20262024; 1017 cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET; 1018 } else { 1019 /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */ 1020 cache_size = 0x02440243; 1021 cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET; 1022 } 1023 coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE); 1024 if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) { 1025 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET | 1026 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET | 1027 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET; 1028 } else { 1029 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET | 1030 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET | 1031 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET; 1032 } 1033 coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG); 1034} 1035 1036/* 1037 * Encoder context operations 1038 */ 1039 1040static int coda_encoder_reqbufs(struct coda_ctx *ctx, 1041 struct v4l2_requestbuffers *rb) 1042{ 1043 struct coda_q_data *q_data_src; 1044 int ret; 1045 1046 if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) 1047 return 0; 1048 1049 if (rb->count) { 1050 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1051 ret = coda_alloc_context_buffers(ctx, q_data_src); 1052 if (ret < 0) 1053 return ret; 1054 } else { 1055 coda_free_context_buffers(ctx); 1056 } 1057 1058 return 0; 1059} 1060 1061static int coda_start_encoding(struct coda_ctx *ctx) 1062{ 1063 struct coda_dev *dev = ctx->dev; 1064 struct v4l2_device *v4l2_dev = &dev->v4l2_dev; 1065 struct coda_q_data *q_data_src, *q_data_dst; 1066 u32 bitstream_buf, bitstream_size; 1067 struct vb2_v4l2_buffer *buf; 1068 int gamma, ret, value; 1069 u32 dst_fourcc; 1070 int num_fb; 1071 u32 stride; 1072 1073 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1074 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 1075 dst_fourcc = q_data_dst->fourcc; 1076 1077 buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1078 bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0); 1079 bitstream_size = q_data_dst->sizeimage; 1080 1081 if (!coda_is_initialized(dev)) { 1082 v4l2_err(v4l2_dev, "coda is not initialized.\n"); 1083 return -EFAULT; 1084 } 1085 1086 if (dst_fourcc == V4L2_PIX_FMT_JPEG) { 1087 if (!ctx->params.jpeg_qmat_tab[0]) 1088 ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL); 1089 if (!ctx->params.jpeg_qmat_tab[1]) 1090 ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL); 1091 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality); 1092 } 1093 1094 mutex_lock(&dev->coda_mutex); 1095 1096 coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); 1097 coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); 1098 coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 1099 switch (dev->devtype->product) { 1100 case CODA_DX6: 1101 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN | 1102 CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL); 1103 break; 1104 case CODA_960: 1105 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN); 1106 fallthrough; 1107 case CODA_HX4: 1108 case CODA_7541: 1109 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN | 1110 CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL); 1111 break; 1112 } 1113 1114 ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | 1115 CODA9_FRAME_TILED2LINEAR); 1116 if (q_data_src->fourcc == V4L2_PIX_FMT_NV12) 1117 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; 1118 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 1119 ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR; 1120 coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL); 1121 1122 if (dev->devtype->product == CODA_DX6) { 1123 /* Configure the coda */ 1124 coda_write(dev, dev->iram.paddr, 1125 CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR); 1126 } 1127 1128 /* Could set rotation here if needed */ 1129 value = 0; 1130 switch (dev->devtype->product) { 1131 case CODA_DX6: 1132 value = (q_data_src->rect.width & CODADX6_PICWIDTH_MASK) 1133 << CODADX6_PICWIDTH_OFFSET; 1134 value |= (q_data_src->rect.height & CODADX6_PICHEIGHT_MASK) 1135 << CODA_PICHEIGHT_OFFSET; 1136 break; 1137 case CODA_HX4: 1138 case CODA_7541: 1139 if (dst_fourcc == V4L2_PIX_FMT_H264) { 1140 value = (round_up(q_data_src->rect.width, 16) & 1141 CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET; 1142 value |= (round_up(q_data_src->rect.height, 16) & 1143 CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET; 1144 break; 1145 } 1146 fallthrough; 1147 case CODA_960: 1148 value = (q_data_src->rect.width & CODA7_PICWIDTH_MASK) 1149 << CODA7_PICWIDTH_OFFSET; 1150 value |= (q_data_src->rect.height & CODA7_PICHEIGHT_MASK) 1151 << CODA_PICHEIGHT_OFFSET; 1152 } 1153 coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE); 1154 if (dst_fourcc == V4L2_PIX_FMT_JPEG) 1155 ctx->params.framerate = 0; 1156 coda_write(dev, ctx->params.framerate, 1157 CODA_CMD_ENC_SEQ_SRC_F_RATE); 1158 1159 ctx->params.codec_mode = ctx->codec->mode; 1160 switch (dst_fourcc) { 1161 case V4L2_PIX_FMT_MPEG4: 1162 if (dev->devtype->product == CODA_960) 1163 coda_write(dev, CODA9_STD_MPEG4, 1164 CODA_CMD_ENC_SEQ_COD_STD); 1165 else 1166 coda_write(dev, CODA_STD_MPEG4, 1167 CODA_CMD_ENC_SEQ_COD_STD); 1168 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA); 1169 break; 1170 case V4L2_PIX_FMT_H264: 1171 if (dev->devtype->product == CODA_960) 1172 coda_write(dev, CODA9_STD_H264, 1173 CODA_CMD_ENC_SEQ_COD_STD); 1174 else 1175 coda_write(dev, CODA_STD_H264, 1176 CODA_CMD_ENC_SEQ_COD_STD); 1177 value = ((ctx->params.h264_disable_deblocking_filter_idc & 1178 CODA_264PARAM_DISABLEDEBLK_MASK) << 1179 CODA_264PARAM_DISABLEDEBLK_OFFSET) | 1180 ((ctx->params.h264_slice_alpha_c0_offset_div2 & 1181 CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) << 1182 CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) | 1183 ((ctx->params.h264_slice_beta_offset_div2 & 1184 CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) << 1185 CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET) | 1186 (ctx->params.h264_constrained_intra_pred_flag << 1187 CODA_264PARAM_CONSTRAINEDINTRAPREDFLAG_OFFSET) | 1188 (ctx->params.h264_chroma_qp_index_offset & 1189 CODA_264PARAM_CHROMAQPOFFSET_MASK); 1190 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA); 1191 break; 1192 case V4L2_PIX_FMT_JPEG: 1193 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA); 1194 coda_write(dev, ctx->params.jpeg_restart_interval, 1195 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL); 1196 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN); 1197 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE); 1198 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET); 1199 1200 coda_jpeg_write_tables(ctx); 1201 break; 1202 default: 1203 v4l2_err(v4l2_dev, 1204 "dst format (0x%08x) invalid.\n", dst_fourcc); 1205 ret = -EINVAL; 1206 goto out; 1207 } 1208 1209 /* 1210 * slice mode and GOP size registers are used for thumb size/offset 1211 * in JPEG mode 1212 */ 1213 if (dst_fourcc != V4L2_PIX_FMT_JPEG) { 1214 value = coda_slice_mode(ctx); 1215 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE); 1216 value = ctx->params.gop_size; 1217 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE); 1218 } 1219 1220 if (ctx->params.bitrate && (ctx->params.frame_rc_enable || 1221 ctx->params.mb_rc_enable)) { 1222 ctx->params.bitrate_changed = false; 1223 ctx->params.h264_intra_qp_changed = false; 1224 1225 /* Rate control enabled */ 1226 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK) 1227 << CODA_RATECONTROL_BITRATE_OFFSET; 1228 value |= 1 & CODA_RATECONTROL_ENABLE_MASK; 1229 value |= (ctx->params.vbv_delay & 1230 CODA_RATECONTROL_INITIALDELAY_MASK) 1231 << CODA_RATECONTROL_INITIALDELAY_OFFSET; 1232 if (dev->devtype->product == CODA_960) 1233 value |= BIT(31); /* disable autoskip */ 1234 } else { 1235 value = 0; 1236 } 1237 coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA); 1238 1239 coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE); 1240 coda_write(dev, ctx->params.intra_refresh, 1241 CODA_CMD_ENC_SEQ_INTRA_REFRESH); 1242 1243 coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START); 1244 coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE); 1245 1246 1247 value = 0; 1248 if (dev->devtype->product == CODA_960) 1249 gamma = CODA9_DEFAULT_GAMMA; 1250 else 1251 gamma = CODA_DEFAULT_GAMMA; 1252 if (gamma > 0) { 1253 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET, 1254 CODA_CMD_ENC_SEQ_RC_GAMMA); 1255 } 1256 1257 if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) { 1258 coda_write(dev, 1259 ctx->params.h264_min_qp << CODA_QPMIN_OFFSET | 1260 ctx->params.h264_max_qp << CODA_QPMAX_OFFSET, 1261 CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX); 1262 } 1263 if (dev->devtype->product == CODA_960) { 1264 if (ctx->params.h264_max_qp) 1265 value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET; 1266 if (CODA_DEFAULT_GAMMA > 0) 1267 value |= 1 << CODA9_OPTION_GAMMA_OFFSET; 1268 } else { 1269 if (CODA_DEFAULT_GAMMA > 0) { 1270 if (dev->devtype->product == CODA_DX6) 1271 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET; 1272 else 1273 value |= 1 << CODA7_OPTION_GAMMA_OFFSET; 1274 } 1275 if (ctx->params.h264_min_qp) 1276 value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET; 1277 if (ctx->params.h264_max_qp) 1278 value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET; 1279 } 1280 coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION); 1281 1282 if (ctx->params.frame_rc_enable && !ctx->params.mb_rc_enable) 1283 value = 1; 1284 else 1285 value = 0; 1286 coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE); 1287 1288 coda_setup_iram(ctx); 1289 1290 if (dst_fourcc == V4L2_PIX_FMT_H264) { 1291 switch (dev->devtype->product) { 1292 case CODA_DX6: 1293 value = FMO_SLICE_SAVE_BUF_SIZE << 7; 1294 coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO); 1295 break; 1296 case CODA_HX4: 1297 case CODA_7541: 1298 coda_write(dev, ctx->iram_info.search_ram_paddr, 1299 CODA7_CMD_ENC_SEQ_SEARCH_BASE); 1300 coda_write(dev, ctx->iram_info.search_ram_size, 1301 CODA7_CMD_ENC_SEQ_SEARCH_SIZE); 1302 break; 1303 case CODA_960: 1304 coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION); 1305 coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT); 1306 } 1307 } 1308 1309 ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT); 1310 if (ret < 0) { 1311 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n"); 1312 goto out; 1313 } 1314 1315 if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) { 1316 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n"); 1317 ret = -EFAULT; 1318 goto out; 1319 } 1320 ctx->initialized = 1; 1321 1322 if (dst_fourcc != V4L2_PIX_FMT_JPEG) { 1323 if (dev->devtype->product == CODA_960) 1324 ctx->num_internal_frames = 4; 1325 else 1326 ctx->num_internal_frames = 2; 1327 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc); 1328 if (ret < 0) { 1329 v4l2_err(v4l2_dev, "failed to allocate framebuffers\n"); 1330 goto out; 1331 } 1332 num_fb = 2; 1333 stride = q_data_src->bytesperline; 1334 } else { 1335 ctx->num_internal_frames = 0; 1336 num_fb = 0; 1337 stride = 0; 1338 } 1339 coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM); 1340 coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE); 1341 1342 if (dev->devtype->product == CODA_HX4 || 1343 dev->devtype->product == CODA_7541) { 1344 coda_write(dev, q_data_src->bytesperline, 1345 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE); 1346 } 1347 if (dev->devtype->product != CODA_DX6) { 1348 coda_write(dev, ctx->iram_info.buf_bit_use, 1349 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); 1350 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use, 1351 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); 1352 coda_write(dev, ctx->iram_info.buf_dbk_y_use, 1353 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); 1354 coda_write(dev, ctx->iram_info.buf_dbk_c_use, 1355 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); 1356 coda_write(dev, ctx->iram_info.buf_ovl_use, 1357 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); 1358 if (dev->devtype->product == CODA_960) { 1359 coda_write(dev, ctx->iram_info.buf_btp_use, 1360 CODA9_CMD_SET_FRAME_AXI_BTP_ADDR); 1361 1362 coda9_set_frame_cache(ctx, q_data_src->fourcc); 1363 1364 /* FIXME */ 1365 coda_write(dev, ctx->internal_frames[2].buf.paddr, 1366 CODA9_CMD_SET_FRAME_SUBSAMP_A); 1367 coda_write(dev, ctx->internal_frames[3].buf.paddr, 1368 CODA9_CMD_SET_FRAME_SUBSAMP_B); 1369 } 1370 } 1371 1372 ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF); 1373 if (ret < 0) { 1374 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n"); 1375 goto out; 1376 } 1377 1378 coda_dbg(1, ctx, "start encoding %dx%d %4.4s->%4.4s @ %d/%d Hz\n", 1379 q_data_src->rect.width, q_data_src->rect.height, 1380 (char *)&ctx->codec->src_fourcc, (char *)&dst_fourcc, 1381 ctx->params.framerate & 0xffff, 1382 (ctx->params.framerate >> 16) + 1); 1383 1384 /* Save stream headers */ 1385 buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1386 switch (dst_fourcc) { 1387 case V4L2_PIX_FMT_H264: 1388 /* 1389 * Get SPS in the first frame and copy it to an 1390 * intermediate buffer. 1391 */ 1392 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS, 1393 &ctx->vpu_header[0][0], 1394 &ctx->vpu_header_size[0]); 1395 if (ret < 0) 1396 goto out; 1397 1398 /* 1399 * If visible width or height are not aligned to macroblock 1400 * size, the crop_right and crop_bottom SPS fields must be set 1401 * to the difference between visible and coded size. This is 1402 * only supported by CODA960 firmware. All others do not allow 1403 * writing frame cropping parameters, so we have to manually 1404 * fix up the SPS RBSP (Sequence Parameter Set Raw Byte 1405 * Sequence Payload) ourselves. 1406 */ 1407 if (ctx->dev->devtype->product != CODA_960 && 1408 ((q_data_src->rect.width % 16) || 1409 (q_data_src->rect.height % 16))) { 1410 ret = coda_h264_sps_fixup(ctx, q_data_src->rect.width, 1411 q_data_src->rect.height, 1412 &ctx->vpu_header[0][0], 1413 &ctx->vpu_header_size[0], 1414 sizeof(ctx->vpu_header[0])); 1415 if (ret < 0) 1416 goto out; 1417 } 1418 1419 /* 1420 * Get PPS in the first frame and copy it to an 1421 * intermediate buffer. 1422 */ 1423 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS, 1424 &ctx->vpu_header[1][0], 1425 &ctx->vpu_header_size[1]); 1426 if (ret < 0) 1427 goto out; 1428 1429 /* 1430 * Length of H.264 headers is variable and thus it might not be 1431 * aligned for the coda to append the encoded frame. In that is 1432 * the case a filler NAL must be added to header 2. 1433 */ 1434 ctx->vpu_header_size[2] = coda_h264_padding( 1435 (ctx->vpu_header_size[0] + 1436 ctx->vpu_header_size[1]), 1437 ctx->vpu_header[2]); 1438 break; 1439 case V4L2_PIX_FMT_MPEG4: 1440 /* 1441 * Get VOS in the first frame and copy it to an 1442 * intermediate buffer 1443 */ 1444 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS, 1445 &ctx->vpu_header[0][0], 1446 &ctx->vpu_header_size[0]); 1447 if (ret < 0) 1448 goto out; 1449 1450 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS, 1451 &ctx->vpu_header[1][0], 1452 &ctx->vpu_header_size[1]); 1453 if (ret < 0) 1454 goto out; 1455 1456 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL, 1457 &ctx->vpu_header[2][0], 1458 &ctx->vpu_header_size[2]); 1459 if (ret < 0) 1460 goto out; 1461 break; 1462 default: 1463 /* No more formats need to save headers at the moment */ 1464 break; 1465 } 1466 1467out: 1468 mutex_unlock(&dev->coda_mutex); 1469 return ret; 1470} 1471 1472static int coda_prepare_encode(struct coda_ctx *ctx) 1473{ 1474 struct coda_q_data *q_data_src, *q_data_dst; 1475 struct vb2_v4l2_buffer *src_buf, *dst_buf; 1476 struct coda_dev *dev = ctx->dev; 1477 int force_ipicture; 1478 int quant_param = 0; 1479 u32 pic_stream_buffer_addr, pic_stream_buffer_size; 1480 u32 rot_mode = 0; 1481 u32 dst_fourcc; 1482 u32 reg; 1483 int ret; 1484 1485 ret = coda_enc_param_change(ctx); 1486 if (ret < 0) { 1487 v4l2_warn(&ctx->dev->v4l2_dev, "parameter change failed: %d\n", 1488 ret); 1489 } 1490 1491 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 1492 dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1493 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1494 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 1495 dst_fourcc = q_data_dst->fourcc; 1496 1497 src_buf->sequence = ctx->osequence; 1498 dst_buf->sequence = ctx->osequence; 1499 ctx->osequence++; 1500 1501 force_ipicture = ctx->params.force_ipicture; 1502 if (force_ipicture) 1503 ctx->params.force_ipicture = false; 1504 else if (ctx->params.gop_size != 0 && 1505 (src_buf->sequence % ctx->params.gop_size) == 0) 1506 force_ipicture = 1; 1507 1508 /* 1509 * Workaround coda firmware BUG that only marks the first 1510 * frame as IDR. This is a problem for some decoders that can't 1511 * recover when a frame is lost. 1512 */ 1513 if (!force_ipicture) { 1514 src_buf->flags |= V4L2_BUF_FLAG_PFRAME; 1515 src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME; 1516 } else { 1517 src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME; 1518 src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME; 1519 } 1520 1521 if (dev->devtype->product == CODA_960) 1522 coda_set_gdi_regs(ctx); 1523 1524 /* 1525 * Copy headers in front of the first frame and forced I frames for 1526 * H.264 only. In MPEG4 they are already copied by the CODA. 1527 */ 1528 if (src_buf->sequence == 0 || force_ipicture) { 1529 pic_stream_buffer_addr = 1530 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) + 1531 ctx->vpu_header_size[0] + 1532 ctx->vpu_header_size[1] + 1533 ctx->vpu_header_size[2]; 1534 pic_stream_buffer_size = q_data_dst->sizeimage - 1535 ctx->vpu_header_size[0] - 1536 ctx->vpu_header_size[1] - 1537 ctx->vpu_header_size[2]; 1538 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0), 1539 &ctx->vpu_header[0][0], ctx->vpu_header_size[0]); 1540 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) 1541 + ctx->vpu_header_size[0], &ctx->vpu_header[1][0], 1542 ctx->vpu_header_size[1]); 1543 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) 1544 + ctx->vpu_header_size[0] + ctx->vpu_header_size[1], 1545 &ctx->vpu_header[2][0], ctx->vpu_header_size[2]); 1546 } else { 1547 pic_stream_buffer_addr = 1548 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); 1549 pic_stream_buffer_size = q_data_dst->sizeimage; 1550 } 1551 1552 if (force_ipicture) { 1553 switch (dst_fourcc) { 1554 case V4L2_PIX_FMT_H264: 1555 quant_param = ctx->params.h264_intra_qp; 1556 break; 1557 case V4L2_PIX_FMT_MPEG4: 1558 quant_param = ctx->params.mpeg4_intra_qp; 1559 break; 1560 case V4L2_PIX_FMT_JPEG: 1561 quant_param = 30; 1562 break; 1563 default: 1564 v4l2_warn(&ctx->dev->v4l2_dev, 1565 "cannot set intra qp, fmt not supported\n"); 1566 break; 1567 } 1568 } else { 1569 switch (dst_fourcc) { 1570 case V4L2_PIX_FMT_H264: 1571 quant_param = ctx->params.h264_inter_qp; 1572 break; 1573 case V4L2_PIX_FMT_MPEG4: 1574 quant_param = ctx->params.mpeg4_inter_qp; 1575 break; 1576 default: 1577 v4l2_warn(&ctx->dev->v4l2_dev, 1578 "cannot set inter qp, fmt not supported\n"); 1579 break; 1580 } 1581 } 1582 1583 /* submit */ 1584 if (ctx->params.rot_mode) 1585 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode; 1586 coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE); 1587 coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS); 1588 1589 if (dev->devtype->product == CODA_960) { 1590 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX); 1591 coda_write(dev, q_data_src->bytesperline, 1592 CODA9_CMD_ENC_PIC_SRC_STRIDE); 1593 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC); 1594 1595 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y; 1596 } else { 1597 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y; 1598 } 1599 coda_write_base(ctx, q_data_src, src_buf, reg); 1600 1601 coda_write(dev, force_ipicture << 1 & 0x2, 1602 CODA_CMD_ENC_PIC_OPTION); 1603 1604 coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START); 1605 coda_write(dev, pic_stream_buffer_size / 1024, 1606 CODA_CMD_ENC_PIC_BB_SIZE); 1607 1608 if (!ctx->streamon_out) { 1609 /* After streamoff on the output side, set stream end flag */ 1610 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; 1611 coda_write(dev, ctx->bit_stream_param, 1612 CODA_REG_BIT_BIT_STREAM_PARAM); 1613 } 1614 1615 if (dev->devtype->product != CODA_DX6) 1616 coda_write(dev, ctx->iram_info.axi_sram_use, 1617 CODA7_REG_BIT_AXI_SRAM_USE); 1618 1619 trace_coda_enc_pic_run(ctx, src_buf); 1620 1621 coda_command_async(ctx, CODA_COMMAND_PIC_RUN); 1622 1623 return 0; 1624} 1625 1626static char coda_frame_type_char(u32 flags) 1627{ 1628 return (flags & V4L2_BUF_FLAG_KEYFRAME) ? 'I' : 1629 (flags & V4L2_BUF_FLAG_PFRAME) ? 'P' : 1630 (flags & V4L2_BUF_FLAG_BFRAME) ? 'B' : '?'; 1631} 1632 1633static void coda_finish_encode(struct coda_ctx *ctx) 1634{ 1635 struct vb2_v4l2_buffer *src_buf, *dst_buf; 1636 struct coda_dev *dev = ctx->dev; 1637 u32 wr_ptr, start_ptr; 1638 1639 if (ctx->aborting) 1640 return; 1641 1642 /* 1643 * Lock to make sure that an encoder stop command running in parallel 1644 * will either already have marked src_buf as last, or it will wake up 1645 * the capture queue after the buffers are returned. 1646 */ 1647 mutex_lock(&ctx->wakeup_mutex); 1648 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 1649 dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1650 1651 trace_coda_enc_pic_done(ctx, dst_buf); 1652 1653 /* Get results from the coda */ 1654 start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START); 1655 wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 1656 1657 /* Calculate bytesused field */ 1658 if (dst_buf->sequence == 0 || 1659 src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) { 1660 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr + 1661 ctx->vpu_header_size[0] + 1662 ctx->vpu_header_size[1] + 1663 ctx->vpu_header_size[2]); 1664 } else { 1665 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr); 1666 } 1667 1668 coda_dbg(1, ctx, "frame size = %u\n", wr_ptr - start_ptr); 1669 1670 coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM); 1671 coda_read(dev, CODA_RET_ENC_PIC_FLAG); 1672 1673 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME | 1674 V4L2_BUF_FLAG_PFRAME | 1675 V4L2_BUF_FLAG_LAST); 1676 if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) 1677 dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME; 1678 else 1679 dst_buf->flags |= V4L2_BUF_FLAG_PFRAME; 1680 dst_buf->flags |= src_buf->flags & V4L2_BUF_FLAG_LAST; 1681 1682 v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, false); 1683 1684 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 1685 1686 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); 1687 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE); 1688 mutex_unlock(&ctx->wakeup_mutex); 1689 1690 ctx->gopcounter--; 1691 if (ctx->gopcounter < 0) 1692 ctx->gopcounter = ctx->params.gop_size - 1; 1693 1694 coda_dbg(1, ctx, "job finished: encoded %c frame (%d)%s\n", 1695 coda_frame_type_char(dst_buf->flags), dst_buf->sequence, 1696 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? " (last)" : ""); 1697} 1698 1699static void coda_seq_end_work(struct work_struct *work) 1700{ 1701 struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work); 1702 struct coda_dev *dev = ctx->dev; 1703 1704 mutex_lock(&ctx->buffer_mutex); 1705 mutex_lock(&dev->coda_mutex); 1706 1707 if (ctx->initialized == 0) 1708 goto out; 1709 1710 coda_dbg(1, ctx, "%s: sent command 'SEQ_END' to coda\n", __func__); 1711 if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) { 1712 v4l2_err(&dev->v4l2_dev, 1713 "CODA_COMMAND_SEQ_END failed\n"); 1714 } 1715 1716 /* 1717 * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing 1718 * from the output stream after the h.264 decoder has run. Resetting the 1719 * hardware after the decoder has finished seems to help. 1720 */ 1721 if (dev->devtype->product == CODA_960) 1722 coda_hw_reset(ctx); 1723 1724 kfifo_init(&ctx->bitstream_fifo, 1725 ctx->bitstream.vaddr, ctx->bitstream.size); 1726 1727 coda_free_framebuffers(ctx); 1728 1729 ctx->initialized = 0; 1730 1731out: 1732 mutex_unlock(&dev->coda_mutex); 1733 mutex_unlock(&ctx->buffer_mutex); 1734} 1735 1736static void coda_bit_release(struct coda_ctx *ctx) 1737{ 1738 mutex_lock(&ctx->buffer_mutex); 1739 coda_free_framebuffers(ctx); 1740 coda_free_context_buffers(ctx); 1741 coda_free_bitstream_buffer(ctx); 1742 mutex_unlock(&ctx->buffer_mutex); 1743} 1744 1745const struct coda_context_ops coda_bit_encode_ops = { 1746 .queue_init = coda_encoder_queue_init, 1747 .reqbufs = coda_encoder_reqbufs, 1748 .start_streaming = coda_start_encoding, 1749 .prepare_run = coda_prepare_encode, 1750 .finish_run = coda_finish_encode, 1751 .seq_end_work = coda_seq_end_work, 1752 .release = coda_bit_release, 1753}; 1754 1755/* 1756 * Decoder context operations 1757 */ 1758 1759static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx, 1760 struct coda_q_data *q_data) 1761{ 1762 if (ctx->bitstream.vaddr) 1763 return 0; 1764 1765 ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2); 1766 ctx->bitstream.vaddr = dma_alloc_wc(ctx->dev->dev, ctx->bitstream.size, 1767 &ctx->bitstream.paddr, GFP_KERNEL); 1768 if (!ctx->bitstream.vaddr) { 1769 v4l2_err(&ctx->dev->v4l2_dev, 1770 "failed to allocate bitstream ringbuffer"); 1771 return -ENOMEM; 1772 } 1773 kfifo_init(&ctx->bitstream_fifo, 1774 ctx->bitstream.vaddr, ctx->bitstream.size); 1775 1776 return 0; 1777} 1778 1779static void coda_free_bitstream_buffer(struct coda_ctx *ctx) 1780{ 1781 if (ctx->bitstream.vaddr == NULL) 1782 return; 1783 1784 dma_free_wc(ctx->dev->dev, ctx->bitstream.size, ctx->bitstream.vaddr, 1785 ctx->bitstream.paddr); 1786 ctx->bitstream.vaddr = NULL; 1787 kfifo_init(&ctx->bitstream_fifo, NULL, 0); 1788} 1789 1790static int coda_decoder_reqbufs(struct coda_ctx *ctx, 1791 struct v4l2_requestbuffers *rb) 1792{ 1793 struct coda_q_data *q_data_src; 1794 int ret; 1795 1796 if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) 1797 return 0; 1798 1799 if (rb->count) { 1800 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1801 ret = coda_alloc_context_buffers(ctx, q_data_src); 1802 if (ret < 0) 1803 return ret; 1804 ret = coda_alloc_bitstream_buffer(ctx, q_data_src); 1805 if (ret < 0) { 1806 coda_free_context_buffers(ctx); 1807 return ret; 1808 } 1809 } else { 1810 coda_free_bitstream_buffer(ctx); 1811 coda_free_context_buffers(ctx); 1812 } 1813 1814 return 0; 1815} 1816 1817static bool coda_reorder_enable(struct coda_ctx *ctx) 1818{ 1819 struct coda_dev *dev = ctx->dev; 1820 int profile; 1821 1822 if (dev->devtype->product != CODA_HX4 && 1823 dev->devtype->product != CODA_7541 && 1824 dev->devtype->product != CODA_960) 1825 return false; 1826 1827 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) 1828 return false; 1829 1830 if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264) 1831 return true; 1832 1833 profile = coda_h264_profile(ctx->params.h264_profile_idc); 1834 if (profile < 0) 1835 v4l2_warn(&dev->v4l2_dev, "Unknown H264 Profile: %u\n", 1836 ctx->params.h264_profile_idc); 1837 1838 /* Baseline profile does not support reordering */ 1839 return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE; 1840} 1841 1842static void coda_decoder_drop_used_metas(struct coda_ctx *ctx) 1843{ 1844 struct coda_buffer_meta *meta, *tmp; 1845 1846 /* 1847 * All metas that end at or before the RD pointer (fifo out), 1848 * are now consumed by the VPU and should be released. 1849 */ 1850 spin_lock(&ctx->buffer_meta_lock); 1851 list_for_each_entry_safe(meta, tmp, &ctx->buffer_meta_list, list) { 1852 if (ctx->bitstream_fifo.kfifo.out >= meta->end) { 1853 coda_dbg(2, ctx, "releasing meta: seq=%d start=%d end=%d\n", 1854 meta->sequence, meta->start, meta->end); 1855 1856 list_del(&meta->list); 1857 ctx->num_metas--; 1858 ctx->first_frame_sequence++; 1859 kfree(meta); 1860 } 1861 } 1862 spin_unlock(&ctx->buffer_meta_lock); 1863} 1864 1865static int __coda_decoder_seq_init(struct coda_ctx *ctx) 1866{ 1867 struct coda_q_data *q_data_src, *q_data_dst; 1868 u32 bitstream_buf, bitstream_size; 1869 struct coda_dev *dev = ctx->dev; 1870 int width, height; 1871 u32 src_fourcc, dst_fourcc; 1872 u32 val; 1873 int ret; 1874 1875 lockdep_assert_held(&dev->coda_mutex); 1876 1877 coda_dbg(1, ctx, "Video Data Order Adapter: %s\n", 1878 ctx->use_vdoa ? "Enabled" : "Disabled"); 1879 1880 /* Start decoding */ 1881 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1882 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 1883 bitstream_buf = ctx->bitstream.paddr; 1884 bitstream_size = ctx->bitstream.size; 1885 src_fourcc = q_data_src->fourcc; 1886 dst_fourcc = q_data_dst->fourcc; 1887 1888 /* Update coda bitstream read and write pointers from kfifo */ 1889 coda_kfifo_sync_to_device_full(ctx); 1890 1891 ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | 1892 CODA9_FRAME_TILED2LINEAR); 1893 if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV) 1894 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; 1895 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 1896 ctx->frame_mem_ctrl |= (0x3 << 9) | 1897 ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR); 1898 coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL); 1899 1900 ctx->display_idx = -1; 1901 ctx->frm_dis_flg = 0; 1902 coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 1903 1904 coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START); 1905 coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE); 1906 val = 0; 1907 if (coda_reorder_enable(ctx)) 1908 val |= CODA_REORDER_ENABLE; 1909 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) 1910 val |= CODA_NO_INT_ENABLE; 1911 coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION); 1912 1913 ctx->params.codec_mode = ctx->codec->mode; 1914 if (dev->devtype->product == CODA_960 && 1915 src_fourcc == V4L2_PIX_FMT_MPEG4) 1916 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4; 1917 else 1918 ctx->params.codec_mode_aux = 0; 1919 if (src_fourcc == V4L2_PIX_FMT_MPEG4) { 1920 coda_write(dev, CODA_MP4_CLASS_MPEG4, 1921 CODA_CMD_DEC_SEQ_MP4_ASP_CLASS); 1922 } 1923 if (src_fourcc == V4L2_PIX_FMT_H264) { 1924 if (dev->devtype->product == CODA_HX4 || 1925 dev->devtype->product == CODA_7541) { 1926 coda_write(dev, ctx->psbuf.paddr, 1927 CODA_CMD_DEC_SEQ_PS_BB_START); 1928 coda_write(dev, (CODA7_PS_BUF_SIZE / 1024), 1929 CODA_CMD_DEC_SEQ_PS_BB_SIZE); 1930 } 1931 if (dev->devtype->product == CODA_960) { 1932 coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN); 1933 coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE); 1934 } 1935 } 1936 if (src_fourcc == V4L2_PIX_FMT_JPEG) 1937 coda_write(dev, 0, CODA_CMD_DEC_SEQ_JPG_THUMB_EN); 1938 if (dev->devtype->product != CODA_960) 1939 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE); 1940 1941 ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE; 1942 ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT); 1943 ctx->bit_stream_param = 0; 1944 if (ret) { 1945 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n"); 1946 return ret; 1947 } 1948 ctx->sequence_offset = ~0U; 1949 ctx->initialized = 1; 1950 ctx->first_frame_sequence = 0; 1951 1952 /* Update kfifo out pointer from coda bitstream read pointer */ 1953 coda_kfifo_sync_from_device(ctx); 1954 1955 /* 1956 * After updating the read pointer, we need to check if 1957 * any metas are consumed and should be released. 1958 */ 1959 coda_decoder_drop_used_metas(ctx); 1960 1961 if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) { 1962 v4l2_err(&dev->v4l2_dev, 1963 "CODA_COMMAND_SEQ_INIT failed, error code = 0x%x\n", 1964 coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON)); 1965 return -EAGAIN; 1966 } 1967 1968 val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE); 1969 if (dev->devtype->product == CODA_DX6) { 1970 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK; 1971 height = val & CODADX6_PICHEIGHT_MASK; 1972 } else { 1973 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK; 1974 height = val & CODA7_PICHEIGHT_MASK; 1975 } 1976 1977 if (width > q_data_dst->bytesperline || height > q_data_dst->height) { 1978 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n", 1979 width, height, q_data_dst->bytesperline, 1980 q_data_dst->height); 1981 return -EINVAL; 1982 } 1983 1984 width = round_up(width, 16); 1985 height = round_up(height, 16); 1986 1987 coda_dbg(1, ctx, "start decoding: %dx%d\n", width, height); 1988 1989 ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED); 1990 /* 1991 * If the VDOA is used, the decoder needs one additional frame, 1992 * because the frames are freed when the next frame is decoded. 1993 * Otherwise there are visible errors in the decoded frames (green 1994 * regions in displayed frames) and a broken order of frames (earlier 1995 * frames are sporadically displayed after later frames). 1996 */ 1997 if (ctx->use_vdoa) 1998 ctx->num_internal_frames += 1; 1999 if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) { 2000 v4l2_err(&dev->v4l2_dev, 2001 "not enough framebuffers to decode (%d < %d)\n", 2002 CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames); 2003 return -EINVAL; 2004 } 2005 2006 if (src_fourcc == V4L2_PIX_FMT_H264) { 2007 u32 left_right; 2008 u32 top_bottom; 2009 2010 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT); 2011 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM); 2012 2013 q_data_dst->rect.left = (left_right >> 10) & 0x3ff; 2014 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff; 2015 q_data_dst->rect.width = width - q_data_dst->rect.left - 2016 (left_right & 0x3ff); 2017 q_data_dst->rect.height = height - q_data_dst->rect.top - 2018 (top_bottom & 0x3ff); 2019 } 2020 2021 if (dev->devtype->product != CODA_DX6) { 2022 u8 profile, level; 2023 2024 val = coda_read(dev, CODA7_RET_DEC_SEQ_HEADER_REPORT); 2025 profile = val & 0xff; 2026 level = (val >> 8) & 0x7f; 2027 2028 if (profile || level) 2029 coda_update_profile_level_ctrls(ctx, profile, level); 2030 } 2031 2032 return 0; 2033} 2034 2035static void coda_dec_seq_init_work(struct work_struct *work) 2036{ 2037 struct coda_ctx *ctx = container_of(work, 2038 struct coda_ctx, seq_init_work); 2039 struct coda_dev *dev = ctx->dev; 2040 2041 mutex_lock(&ctx->buffer_mutex); 2042 mutex_lock(&dev->coda_mutex); 2043 2044 if (!ctx->initialized) 2045 __coda_decoder_seq_init(ctx); 2046 2047 mutex_unlock(&dev->coda_mutex); 2048 mutex_unlock(&ctx->buffer_mutex); 2049} 2050 2051static int __coda_start_decoding(struct coda_ctx *ctx) 2052{ 2053 struct coda_q_data *q_data_src, *q_data_dst; 2054 struct coda_dev *dev = ctx->dev; 2055 u32 src_fourcc, dst_fourcc; 2056 int ret; 2057 2058 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 2059 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 2060 src_fourcc = q_data_src->fourcc; 2061 dst_fourcc = q_data_dst->fourcc; 2062 2063 if (!ctx->initialized) { 2064 ret = __coda_decoder_seq_init(ctx); 2065 if (ret < 0) 2066 return ret; 2067 } else { 2068 ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | 2069 CODA9_FRAME_TILED2LINEAR); 2070 if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV) 2071 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; 2072 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 2073 ctx->frame_mem_ctrl |= (0x3 << 9) | 2074 ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR); 2075 } 2076 2077 coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); 2078 2079 ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc); 2080 if (ret < 0) { 2081 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n"); 2082 return ret; 2083 } 2084 2085 /* Tell the decoder how many frame buffers we allocated. */ 2086 coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM); 2087 coda_write(dev, round_up(q_data_dst->rect.width, 16), 2088 CODA_CMD_SET_FRAME_BUF_STRIDE); 2089 2090 if (dev->devtype->product != CODA_DX6) { 2091 /* Set secondary AXI IRAM */ 2092 coda_setup_iram(ctx); 2093 2094 coda_write(dev, ctx->iram_info.buf_bit_use, 2095 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); 2096 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use, 2097 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); 2098 coda_write(dev, ctx->iram_info.buf_dbk_y_use, 2099 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); 2100 coda_write(dev, ctx->iram_info.buf_dbk_c_use, 2101 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); 2102 coda_write(dev, ctx->iram_info.buf_ovl_use, 2103 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); 2104 if (dev->devtype->product == CODA_960) { 2105 coda_write(dev, ctx->iram_info.buf_btp_use, 2106 CODA9_CMD_SET_FRAME_AXI_BTP_ADDR); 2107 2108 coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY); 2109 coda9_set_frame_cache(ctx, dst_fourcc); 2110 } 2111 } 2112 2113 if (src_fourcc == V4L2_PIX_FMT_H264) { 2114 coda_write(dev, ctx->slicebuf.paddr, 2115 CODA_CMD_SET_FRAME_SLICE_BB_START); 2116 coda_write(dev, ctx->slicebuf.size / 1024, 2117 CODA_CMD_SET_FRAME_SLICE_BB_SIZE); 2118 } 2119 2120 if (dev->devtype->product == CODA_HX4 || 2121 dev->devtype->product == CODA_7541) { 2122 int max_mb_x = 1920 / 16; 2123 int max_mb_y = 1088 / 16; 2124 int max_mb_num = max_mb_x * max_mb_y; 2125 2126 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y, 2127 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE); 2128 } else if (dev->devtype->product == CODA_960) { 2129 int max_mb_x = 1920 / 16; 2130 int max_mb_y = 1088 / 16; 2131 int max_mb_num = max_mb_x * max_mb_y; 2132 2133 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y, 2134 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE); 2135 } 2136 2137 if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) { 2138 v4l2_err(&ctx->dev->v4l2_dev, 2139 "CODA_COMMAND_SET_FRAME_BUF timeout\n"); 2140 return -ETIMEDOUT; 2141 } 2142 2143 return 0; 2144} 2145 2146static int coda_start_decoding(struct coda_ctx *ctx) 2147{ 2148 struct coda_dev *dev = ctx->dev; 2149 int ret; 2150 2151 mutex_lock(&dev->coda_mutex); 2152 ret = __coda_start_decoding(ctx); 2153 mutex_unlock(&dev->coda_mutex); 2154 2155 return ret; 2156} 2157 2158static int coda_prepare_decode(struct coda_ctx *ctx) 2159{ 2160 struct vb2_v4l2_buffer *dst_buf; 2161 struct coda_dev *dev = ctx->dev; 2162 struct coda_q_data *q_data_dst; 2163 struct coda_buffer_meta *meta; 2164 u32 rot_mode = 0; 2165 u32 reg_addr, reg_stride; 2166 2167 dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 2168 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 2169 2170 /* Try to copy source buffer contents into the bitstream ringbuffer */ 2171 mutex_lock(&ctx->bitstream_mutex); 2172 coda_fill_bitstream(ctx, NULL); 2173 mutex_unlock(&ctx->bitstream_mutex); 2174 2175 if (coda_get_bitstream_payload(ctx) < 512 && 2176 (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) { 2177 coda_dbg(1, ctx, "bitstream payload: %d, skipping\n", 2178 coda_get_bitstream_payload(ctx)); 2179 return -EAGAIN; 2180 } 2181 2182 /* Run coda_start_decoding (again) if not yet initialized */ 2183 if (!ctx->initialized) { 2184 int ret = __coda_start_decoding(ctx); 2185 2186 if (ret < 0) { 2187 v4l2_err(&dev->v4l2_dev, "failed to start decoding\n"); 2188 return -EAGAIN; 2189 } else { 2190 ctx->initialized = 1; 2191 } 2192 } 2193 2194 if (dev->devtype->product == CODA_960) 2195 coda_set_gdi_regs(ctx); 2196 2197 if (ctx->use_vdoa && 2198 ctx->display_idx >= 0 && 2199 ctx->display_idx < ctx->num_internal_frames) { 2200 vdoa_device_run(ctx->vdoa, 2201 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0), 2202 ctx->internal_frames[ctx->display_idx].buf.paddr); 2203 } else { 2204 if (dev->devtype->product == CODA_960) { 2205 /* 2206 * It was previously assumed that the CODA960 has an 2207 * internal list of 64 buffer entries that contains 2208 * both the registered internal frame buffers as well 2209 * as the rotator buffer output, and that the ROT_INDEX 2210 * register must be set to a value between the last 2211 * internal frame buffers' index and 64. 2212 * At least on firmware version 3.1.1 it turns out that 2213 * setting ROT_INDEX to any value >= 32 causes CODA 2214 * hangups that it can not recover from with the SRC VPU 2215 * reset. 2216 * It does appear to work however, to just set it to a 2217 * fixed value in the [ctx->num_internal_frames, 31] 2218 * range, for example CODA_MAX_FRAMEBUFFERS. 2219 */ 2220 coda_write(dev, CODA_MAX_FRAMEBUFFERS, 2221 CODA9_CMD_DEC_PIC_ROT_INDEX); 2222 2223 reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y; 2224 reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE; 2225 } else { 2226 reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y; 2227 reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE; 2228 } 2229 coda_write_base(ctx, q_data_dst, dst_buf, reg_addr); 2230 coda_write(dev, q_data_dst->bytesperline, reg_stride); 2231 2232 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode; 2233 } 2234 2235 coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE); 2236 2237 switch (dev->devtype->product) { 2238 case CODA_DX6: 2239 /* TBD */ 2240 case CODA_HX4: 2241 case CODA_7541: 2242 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION); 2243 break; 2244 case CODA_960: 2245 /* 'hardcode to use interrupt disable mode'? */ 2246 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION); 2247 break; 2248 } 2249 2250 coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM); 2251 2252 coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START); 2253 coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE); 2254 2255 if (dev->devtype->product != CODA_DX6) 2256 coda_write(dev, ctx->iram_info.axi_sram_use, 2257 CODA7_REG_BIT_AXI_SRAM_USE); 2258 2259 spin_lock(&ctx->buffer_meta_lock); 2260 meta = list_first_entry_or_null(&ctx->buffer_meta_list, 2261 struct coda_buffer_meta, list); 2262 2263 if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) { 2264 2265 /* If this is the last buffer in the bitstream, add padding */ 2266 if (meta->end == ctx->bitstream_fifo.kfifo.in) { 2267 static unsigned char buf[512]; 2268 unsigned int pad; 2269 2270 /* Pad to multiple of 256 and then add 256 more */ 2271 pad = ((0 - meta->end) & 0xff) + 256; 2272 2273 memset(buf, 0xff, sizeof(buf)); 2274 2275 kfifo_in(&ctx->bitstream_fifo, buf, pad); 2276 } 2277 } 2278 spin_unlock(&ctx->buffer_meta_lock); 2279 2280 coda_kfifo_sync_to_device_full(ctx); 2281 2282 /* Clear decode success flag */ 2283 coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS); 2284 2285 /* Clear error return value */ 2286 coda_write(dev, 0, CODA_RET_DEC_PIC_ERR_MB); 2287 2288 trace_coda_dec_pic_run(ctx, meta); 2289 2290 coda_command_async(ctx, CODA_COMMAND_PIC_RUN); 2291 2292 return 0; 2293} 2294 2295static void coda_finish_decode(struct coda_ctx *ctx) 2296{ 2297 struct coda_dev *dev = ctx->dev; 2298 struct coda_q_data *q_data_src; 2299 struct coda_q_data *q_data_dst; 2300 struct vb2_v4l2_buffer *dst_buf; 2301 struct coda_buffer_meta *meta; 2302 int width, height; 2303 int decoded_idx; 2304 int display_idx; 2305 struct coda_internal_frame *decoded_frame = NULL; 2306 u32 src_fourcc; 2307 int success; 2308 u32 err_mb; 2309 int err_vdoa = 0; 2310 u32 val; 2311 2312 if (ctx->aborting) 2313 return; 2314 2315 /* Update kfifo out pointer from coda bitstream read pointer */ 2316 coda_kfifo_sync_from_device(ctx); 2317 2318 /* 2319 * in stream-end mode, the read pointer can overshoot the write pointer 2320 * by up to 512 bytes 2321 */ 2322 if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) { 2323 if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512) 2324 kfifo_init(&ctx->bitstream_fifo, 2325 ctx->bitstream.vaddr, ctx->bitstream.size); 2326 } 2327 2328 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 2329 src_fourcc = q_data_src->fourcc; 2330 2331 val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS); 2332 if (val != 1) 2333 pr_err("DEC_PIC_SUCCESS = %d\n", val); 2334 2335 success = val & 0x1; 2336 if (!success) 2337 v4l2_err(&dev->v4l2_dev, "decode failed\n"); 2338 2339 if (src_fourcc == V4L2_PIX_FMT_H264) { 2340 if (val & (1 << 3)) 2341 v4l2_err(&dev->v4l2_dev, 2342 "insufficient PS buffer space (%d bytes)\n", 2343 ctx->psbuf.size); 2344 if (val & (1 << 2)) 2345 v4l2_err(&dev->v4l2_dev, 2346 "insufficient slice buffer space (%d bytes)\n", 2347 ctx->slicebuf.size); 2348 } 2349 2350 val = coda_read(dev, CODA_RET_DEC_PIC_SIZE); 2351 width = (val >> 16) & 0xffff; 2352 height = val & 0xffff; 2353 2354 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 2355 2356 /* frame crop information */ 2357 if (src_fourcc == V4L2_PIX_FMT_H264) { 2358 u32 left_right; 2359 u32 top_bottom; 2360 2361 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT); 2362 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM); 2363 2364 if (left_right == 0xffffffff && top_bottom == 0xffffffff) { 2365 /* Keep current crop information */ 2366 } else { 2367 struct v4l2_rect *rect = &q_data_dst->rect; 2368 2369 rect->left = left_right >> 16 & 0xffff; 2370 rect->top = top_bottom >> 16 & 0xffff; 2371 rect->width = width - rect->left - 2372 (left_right & 0xffff); 2373 rect->height = height - rect->top - 2374 (top_bottom & 0xffff); 2375 } 2376 } else { 2377 /* no cropping */ 2378 } 2379 2380 err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB); 2381 if (err_mb > 0) { 2382 if (__ratelimit(&dev->mb_err_rs)) 2383 coda_dbg(1, ctx, "errors in %d macroblocks\n", err_mb); 2384 v4l2_ctrl_s_ctrl(ctx->mb_err_cnt_ctrl, 2385 v4l2_ctrl_g_ctrl(ctx->mb_err_cnt_ctrl) + err_mb); 2386 } 2387 2388 if (dev->devtype->product == CODA_HX4 || 2389 dev->devtype->product == CODA_7541) { 2390 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION); 2391 if (val == 0) { 2392 /* not enough bitstream data */ 2393 coda_dbg(1, ctx, "prescan failed: %d\n", val); 2394 ctx->hold = true; 2395 return; 2396 } 2397 } 2398 2399 /* Wait until the VDOA finished writing the previous display frame */ 2400 if (ctx->use_vdoa && 2401 ctx->display_idx >= 0 && 2402 ctx->display_idx < ctx->num_internal_frames) { 2403 err_vdoa = vdoa_wait_for_completion(ctx->vdoa); 2404 } 2405 2406 ctx->frm_dis_flg = coda_read(dev, 2407 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 2408 2409 /* The previous display frame was copied out and can be overwritten */ 2410 if (ctx->display_idx >= 0 && 2411 ctx->display_idx < ctx->num_internal_frames) { 2412 ctx->frm_dis_flg &= ~(1 << ctx->display_idx); 2413 coda_write(dev, ctx->frm_dis_flg, 2414 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 2415 } 2416 2417 /* 2418 * The index of the last decoded frame, not necessarily in 2419 * display order, and the index of the next display frame. 2420 * The latter could have been decoded in a previous run. 2421 */ 2422 decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX); 2423 display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX); 2424 2425 if (decoded_idx == -1) { 2426 /* no frame was decoded, but we might have a display frame */ 2427 if (display_idx >= 0 && display_idx < ctx->num_internal_frames) 2428 ctx->sequence_offset++; 2429 else if (ctx->display_idx < 0) 2430 ctx->hold = true; 2431 } else if (decoded_idx == -2) { 2432 if (ctx->display_idx >= 0 && 2433 ctx->display_idx < ctx->num_internal_frames) 2434 ctx->sequence_offset++; 2435 /* no frame was decoded, we still return remaining buffers */ 2436 } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) { 2437 v4l2_err(&dev->v4l2_dev, 2438 "decoded frame index out of range: %d\n", decoded_idx); 2439 } else { 2440 int sequence; 2441 2442 decoded_frame = &ctx->internal_frames[decoded_idx]; 2443 2444 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM); 2445 if (ctx->sequence_offset == -1) 2446 ctx->sequence_offset = val; 2447 2448 sequence = val + ctx->first_frame_sequence 2449 - ctx->sequence_offset; 2450 spin_lock(&ctx->buffer_meta_lock); 2451 if (!list_empty(&ctx->buffer_meta_list)) { 2452 meta = list_first_entry(&ctx->buffer_meta_list, 2453 struct coda_buffer_meta, list); 2454 list_del(&meta->list); 2455 ctx->num_metas--; 2456 spin_unlock(&ctx->buffer_meta_lock); 2457 /* 2458 * Clamp counters to 16 bits for comparison, as the HW 2459 * counter rolls over at this point for h.264. This 2460 * may be different for other formats, but using 16 bits 2461 * should be enough to detect most errors and saves us 2462 * from doing different things based on the format. 2463 */ 2464 if ((sequence & 0xffff) != (meta->sequence & 0xffff)) { 2465 v4l2_err(&dev->v4l2_dev, 2466 "sequence number mismatch (%d(%d) != %d)\n", 2467 sequence, ctx->sequence_offset, 2468 meta->sequence); 2469 } 2470 decoded_frame->meta = *meta; 2471 kfree(meta); 2472 } else { 2473 spin_unlock(&ctx->buffer_meta_lock); 2474 v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n"); 2475 memset(&decoded_frame->meta, 0, 2476 sizeof(struct coda_buffer_meta)); 2477 decoded_frame->meta.sequence = sequence; 2478 decoded_frame->meta.last = false; 2479 ctx->sequence_offset++; 2480 } 2481 2482 trace_coda_dec_pic_done(ctx, &decoded_frame->meta); 2483 2484 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7; 2485 decoded_frame->type = (val == 0) ? V4L2_BUF_FLAG_KEYFRAME : 2486 (val == 1) ? V4L2_BUF_FLAG_PFRAME : 2487 V4L2_BUF_FLAG_BFRAME; 2488 2489 decoded_frame->error = err_mb; 2490 } 2491 2492 if (display_idx == -1) { 2493 /* 2494 * no more frames to be decoded, but there could still 2495 * be rotator output to dequeue 2496 */ 2497 ctx->hold = true; 2498 } else if (display_idx == -3) { 2499 /* possibly prescan failure */ 2500 } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) { 2501 v4l2_err(&dev->v4l2_dev, 2502 "presentation frame index out of range: %d\n", 2503 display_idx); 2504 } 2505 2506 /* If a frame was copied out, return it */ 2507 if (ctx->display_idx >= 0 && 2508 ctx->display_idx < ctx->num_internal_frames) { 2509 struct coda_internal_frame *ready_frame; 2510 2511 ready_frame = &ctx->internal_frames[ctx->display_idx]; 2512 2513 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); 2514 dst_buf->sequence = ctx->osequence++; 2515 2516 dst_buf->field = V4L2_FIELD_NONE; 2517 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME | 2518 V4L2_BUF_FLAG_PFRAME | 2519 V4L2_BUF_FLAG_BFRAME); 2520 dst_buf->flags |= ready_frame->type; 2521 meta = &ready_frame->meta; 2522 if (meta->last && !coda_reorder_enable(ctx)) { 2523 /* 2524 * If this was the last decoded frame, and reordering 2525 * is disabled, this will be the last display frame. 2526 */ 2527 coda_dbg(1, ctx, "last meta, marking as last frame\n"); 2528 dst_buf->flags |= V4L2_BUF_FLAG_LAST; 2529 } else if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG && 2530 display_idx == -1) { 2531 /* 2532 * If there is no designated presentation frame anymore, 2533 * this frame has to be the last one. 2534 */ 2535 coda_dbg(1, ctx, 2536 "no more frames to return, marking as last frame\n"); 2537 dst_buf->flags |= V4L2_BUF_FLAG_LAST; 2538 } 2539 dst_buf->timecode = meta->timecode; 2540 dst_buf->vb2_buf.timestamp = meta->timestamp; 2541 2542 trace_coda_dec_rot_done(ctx, dst_buf, meta); 2543 2544 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, 2545 q_data_dst->sizeimage); 2546 2547 if (ready_frame->error || err_vdoa) 2548 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR); 2549 else 2550 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE); 2551 2552 if (decoded_frame) { 2553 coda_dbg(1, ctx, "job finished: decoded %c frame %u, returned %c frame %u (%u/%u)%s\n", 2554 coda_frame_type_char(decoded_frame->type), 2555 decoded_frame->meta.sequence, 2556 coda_frame_type_char(dst_buf->flags), 2557 ready_frame->meta.sequence, 2558 dst_buf->sequence, ctx->qsequence, 2559 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? 2560 " (last)" : ""); 2561 } else { 2562 coda_dbg(1, ctx, "job finished: no frame decoded (%d), returned %c frame %u (%u/%u)%s\n", 2563 decoded_idx, 2564 coda_frame_type_char(dst_buf->flags), 2565 ready_frame->meta.sequence, 2566 dst_buf->sequence, ctx->qsequence, 2567 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? 2568 " (last)" : ""); 2569 } 2570 } else { 2571 if (decoded_frame) { 2572 coda_dbg(1, ctx, "job finished: decoded %c frame %u, no frame returned (%d)\n", 2573 coda_frame_type_char(decoded_frame->type), 2574 decoded_frame->meta.sequence, 2575 ctx->display_idx); 2576 } else { 2577 coda_dbg(1, ctx, "job finished: no frame decoded (%d) or returned (%d)\n", 2578 decoded_idx, ctx->display_idx); 2579 } 2580 } 2581 2582 /* The rotator will copy the current display frame next time */ 2583 ctx->display_idx = display_idx; 2584 2585 /* 2586 * The current decode run might have brought the bitstream fill level 2587 * below the size where we can start the next decode run. As userspace 2588 * might have filled the output queue completely and might thus be 2589 * blocked, we can't rely on the next qbuf to trigger the bitstream 2590 * refill. Check if we have data to refill the bitstream now. 2591 */ 2592 mutex_lock(&ctx->bitstream_mutex); 2593 coda_fill_bitstream(ctx, NULL); 2594 mutex_unlock(&ctx->bitstream_mutex); 2595} 2596 2597static void coda_decode_timeout(struct coda_ctx *ctx) 2598{ 2599 struct vb2_v4l2_buffer *dst_buf; 2600 2601 /* 2602 * For now this only handles the case where we would deadlock with 2603 * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS, 2604 * but after a failed decode run we would hold the context and wait for 2605 * userspace to queue more buffers. 2606 */ 2607 if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)) 2608 return; 2609 2610 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); 2611 dst_buf->sequence = ctx->qsequence - 1; 2612 2613 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR); 2614} 2615 2616const struct coda_context_ops coda_bit_decode_ops = { 2617 .queue_init = coda_decoder_queue_init, 2618 .reqbufs = coda_decoder_reqbufs, 2619 .start_streaming = coda_start_decoding, 2620 .prepare_run = coda_prepare_decode, 2621 .finish_run = coda_finish_decode, 2622 .run_timeout = coda_decode_timeout, 2623 .seq_init_work = coda_dec_seq_init_work, 2624 .seq_end_work = coda_seq_end_work, 2625 .release = coda_bit_release, 2626}; 2627 2628irqreturn_t coda_irq_handler(int irq, void *data) 2629{ 2630 struct coda_dev *dev = data; 2631 struct coda_ctx *ctx; 2632 2633 /* read status register to attend the IRQ */ 2634 coda_read(dev, CODA_REG_BIT_INT_STATUS); 2635 coda_write(dev, 0, CODA_REG_BIT_INT_REASON); 2636 coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET, 2637 CODA_REG_BIT_INT_CLEAR); 2638 2639 ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev); 2640 if (ctx == NULL) { 2641 v4l2_err(&dev->v4l2_dev, 2642 "Instance released before the end of transaction\n"); 2643 return IRQ_HANDLED; 2644 } 2645 2646 trace_coda_bit_done(ctx); 2647 2648 if (ctx->aborting) { 2649 coda_dbg(1, ctx, "task has been aborted\n"); 2650 } 2651 2652 if (coda_isbusy(ctx->dev)) { 2653 coda_dbg(1, ctx, "coda is still busy!!!!\n"); 2654 return IRQ_NONE; 2655 } 2656 2657 complete(&ctx->completion); 2658 2659 return IRQ_HANDLED; 2660}