cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

h264.c (25733B)


      1// SPDX-License-Identifier: GPL-2.0+
      2/*
      3 * NVIDIA Tegra Video decoder driver
      4 *
      5 * Copyright (C) 2016-2022 Dmitry Osipenko <digetx@gmail.com>
      6 *
      7 */
      8
      9#include <linux/iopoll.h>
     10#include <linux/pm_runtime.h>
     11#include <linux/reset.h>
     12#include <linux/slab.h>
     13
     14#include <media/v4l2-h264.h>
     15
     16#include "trace.h"
     17#include "vde.h"
     18
     19#define FLAG_B_FRAME		0x1
     20#define FLAG_REFERENCE		0x2
     21
     22struct tegra_vde_h264_frame {
     23	unsigned int frame_num;
     24	unsigned int flags;
     25};
     26
     27struct tegra_vde_h264_decoder_ctx {
     28	unsigned int dpb_frames_nb;
     29	unsigned int dpb_ref_frames_with_earlier_poc_nb;
     30	unsigned int baseline_profile;
     31	unsigned int level_idc;
     32	unsigned int log2_max_pic_order_cnt_lsb;
     33	unsigned int log2_max_frame_num;
     34	unsigned int pic_order_cnt_type;
     35	unsigned int direct_8x8_inference_flag;
     36	unsigned int pic_width_in_mbs;
     37	unsigned int pic_height_in_mbs;
     38	unsigned int pic_init_qp;
     39	unsigned int deblocking_filter_control_present_flag;
     40	unsigned int constrained_intra_pred_flag;
     41	unsigned int chroma_qp_index_offset;
     42	unsigned int pic_order_present_flag;
     43	unsigned int num_ref_idx_l0_active_minus1;
     44	unsigned int num_ref_idx_l1_active_minus1;
     45};
     46
     47struct h264_reflists {
     48	struct v4l2_h264_reference p[V4L2_H264_NUM_DPB_ENTRIES];
     49	struct v4l2_h264_reference b0[V4L2_H264_NUM_DPB_ENTRIES];
     50	struct v4l2_h264_reference b1[V4L2_H264_NUM_DPB_ENTRIES];
     51};
     52
     53static int tegra_vde_wait_mbe(struct tegra_vde *vde)
     54{
     55	u32 tmp;
     56
     57	return readl_relaxed_poll_timeout(vde->mbe + 0x8C, tmp,
     58					  tmp >= 0x10, 1, 100);
     59}
     60
     61static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde *vde,
     62					 unsigned int refs_nb,
     63					 bool setup_refs)
     64{
     65	u32 value, frame_idx_enb_mask = 0;
     66	unsigned int frame_idx;
     67	unsigned int idx;
     68	int err;
     69
     70	tegra_vde_writel(vde, 0xD0000000 | (0 << 23), vde->mbe, 0x80);
     71	tegra_vde_writel(vde, 0xD0200000 | (0 << 23), vde->mbe, 0x80);
     72
     73	err = tegra_vde_wait_mbe(vde);
     74	if (err)
     75		return err;
     76
     77	if (!setup_refs)
     78		return 0;
     79
     80	for (idx = 0, frame_idx = 1; idx < refs_nb; idx++, frame_idx++) {
     81		tegra_vde_writel(vde, 0xD0000000 | (frame_idx << 23),
     82				 vde->mbe, 0x80);
     83		tegra_vde_writel(vde, 0xD0200000 | (frame_idx << 23),
     84				 vde->mbe, 0x80);
     85
     86		frame_idx_enb_mask |= frame_idx << (6 * (idx % 4));
     87
     88		if (idx % 4 == 3 || idx == refs_nb - 1) {
     89			value = 0xC0000000;
     90			value |= (idx >> 2) << 24;
     91			value |= frame_idx_enb_mask;
     92
     93			tegra_vde_writel(vde, value, vde->mbe, 0x80);
     94
     95			err = tegra_vde_wait_mbe(vde);
     96			if (err)
     97				return err;
     98
     99			frame_idx_enb_mask = 0;
    100		}
    101	}
    102
    103	return 0;
    104}
    105
    106static void tegra_vde_mbe_set_0xa_reg(struct tegra_vde *vde, int reg, u32 val)
    107{
    108	tegra_vde_writel(vde, 0xA0000000 | (reg << 24) | (val & 0xFFFF),
    109			 vde->mbe, 0x80);
    110	tegra_vde_writel(vde, 0xA0000000 | ((reg + 1) << 24) | (val >> 16),
    111			 vde->mbe, 0x80);
    112}
    113
    114static int tegra_vde_wait_bsev(struct tegra_vde *vde, bool wait_dma)
    115{
    116	struct device *dev = vde->dev;
    117	u32 value;
    118	int err;
    119
    120	err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
    121					 !(value & BIT(2)), 1, 100);
    122	if (err) {
    123		dev_err(dev, "BSEV unknown bit timeout\n");
    124		return err;
    125	}
    126
    127	err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
    128					 (value & BSE_ICMDQUE_EMPTY), 1, 100);
    129	if (err) {
    130		dev_err(dev, "BSEV ICMDQUE flush timeout\n");
    131		return err;
    132	}
    133
    134	if (!wait_dma)
    135		return 0;
    136
    137	err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
    138					 !(value & BSE_DMA_BUSY), 1, 1000);
    139	if (err) {
    140		dev_err(dev, "BSEV DMA timeout\n");
    141		return err;
    142	}
    143
    144	return 0;
    145}
    146
    147static int tegra_vde_push_to_bsev_icmdqueue(struct tegra_vde *vde,
    148					    u32 value, bool wait_dma)
    149{
    150	tegra_vde_writel(vde, value, vde->bsev, ICMDQUE_WR);
    151
    152	return tegra_vde_wait_bsev(vde, wait_dma);
    153}
    154
    155static void tegra_vde_setup_frameid(struct tegra_vde *vde,
    156				    struct tegra_video_frame *frame,
    157				    unsigned int frameid,
    158				    u32 mbs_width, u32 mbs_height)
    159{
    160	u32 y_addr  = frame ? frame->y_addr  : 0x6CDEAD00;
    161	u32 cb_addr = frame ? frame->cb_addr : 0x6CDEAD00;
    162	u32 cr_addr = frame ? frame->cr_addr : 0x6CDEAD00;
    163	u32 value1 = frame ? ((frame->luma_atoms_pitch << 16) | mbs_height) : 0;
    164	u32 value2 = frame ? ((frame->chroma_atoms_pitch << 6) | 1) : 0;
    165
    166	tegra_vde_writel(vde, y_addr  >> 8, vde->frameid, 0x000 + frameid * 4);
    167	tegra_vde_writel(vde, cb_addr >> 8, vde->frameid, 0x100 + frameid * 4);
    168	tegra_vde_writel(vde, cr_addr >> 8, vde->frameid, 0x180 + frameid * 4);
    169	tegra_vde_writel(vde, value1,       vde->frameid, 0x080 + frameid * 4);
    170	tegra_vde_writel(vde, value2,       vde->frameid, 0x280 + frameid * 4);
    171}
    172
    173static void tegra_setup_frameidx(struct tegra_vde *vde,
    174				 struct tegra_video_frame *frames,
    175				 unsigned int frames_nb,
    176				 u32 mbs_width, u32 mbs_height)
    177{
    178	unsigned int idx;
    179
    180	for (idx = 0; idx < frames_nb; idx++)
    181		tegra_vde_setup_frameid(vde, &frames[idx], idx,
    182					mbs_width, mbs_height);
    183
    184	for (; idx < 17; idx++)
    185		tegra_vde_setup_frameid(vde, NULL, idx, 0, 0);
    186}
    187
    188static void tegra_vde_setup_iram_entry(struct tegra_vde *vde,
    189				       unsigned int table,
    190				       unsigned int row,
    191				       u32 value1, u32 value2)
    192{
    193	u32 *iram_tables = vde->iram;
    194
    195	trace_vde_setup_iram_entry(table, row, value1, value2);
    196
    197	iram_tables[0x20 * table + row * 2 + 0] = value1;
    198	iram_tables[0x20 * table + row * 2 + 1] = value2;
    199}
    200
    201static void tegra_vde_setup_iram_tables(struct tegra_vde *vde,
    202					struct tegra_video_frame *dpb_frames,
    203					unsigned int ref_frames_nb,
    204					unsigned int with_earlier_poc_nb)
    205{
    206	struct tegra_video_frame *frame;
    207	int with_later_poc_nb;
    208	u32 value, aux_addr;
    209	unsigned int i, k;
    210
    211	trace_vde_ref_l0(dpb_frames[0].frame_num);
    212
    213	for (i = 0; i < 16; i++) {
    214		if (i < ref_frames_nb) {
    215			frame = &dpb_frames[i + 1];
    216
    217			aux_addr = frame->aux_addr;
    218
    219			value  = (i + 1) << 26;
    220			value |= !(frame->flags & FLAG_B_FRAME) << 25;
    221			value |= 1 << 24;
    222			value |= frame->frame_num;
    223		} else {
    224			aux_addr = 0x6ADEAD00;
    225			value = 0x3f;
    226		}
    227
    228		tegra_vde_setup_iram_entry(vde, 0, i, value, aux_addr);
    229		tegra_vde_setup_iram_entry(vde, 1, i, value, aux_addr);
    230		tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
    231		tegra_vde_setup_iram_entry(vde, 3, i, value, aux_addr);
    232	}
    233
    234	if (!(dpb_frames[0].flags & FLAG_B_FRAME))
    235		return;
    236
    237	if (with_earlier_poc_nb >= ref_frames_nb)
    238		return;
    239
    240	with_later_poc_nb = ref_frames_nb - with_earlier_poc_nb;
    241
    242	trace_vde_ref_l1(with_later_poc_nb, with_earlier_poc_nb);
    243
    244	for (i = 0, k = with_earlier_poc_nb; i < with_later_poc_nb; i++, k++) {
    245		frame = &dpb_frames[k + 1];
    246
    247		aux_addr = frame->aux_addr;
    248
    249		value  = (k + 1) << 26;
    250		value |= !(frame->flags & FLAG_B_FRAME) << 25;
    251		value |= 1 << 24;
    252		value |= frame->frame_num;
    253
    254		tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
    255	}
    256
    257	for (k = 0; i < ref_frames_nb; i++, k++) {
    258		frame = &dpb_frames[k + 1];
    259
    260		aux_addr = frame->aux_addr;
    261
    262		value  = (k + 1) << 26;
    263		value |= !(frame->flags & FLAG_B_FRAME) << 25;
    264		value |= 1 << 24;
    265		value |= frame->frame_num;
    266
    267		tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
    268	}
    269}
    270
    271static int tegra_vde_setup_hw_context(struct tegra_vde *vde,
    272				      struct tegra_vde_h264_decoder_ctx *ctx,
    273				      struct tegra_video_frame *dpb_frames,
    274				      dma_addr_t bitstream_data_addr,
    275				      size_t bitstream_data_size,
    276				      unsigned int macroblocks_nb)
    277{
    278	struct device *dev = vde->dev;
    279	u32 value;
    280	int err;
    281
    282	tegra_vde_set_bits(vde, 0x000A, vde->sxe, 0xF0);
    283	tegra_vde_set_bits(vde, 0x000B, vde->bsev, CMDQUE_CONTROL);
    284	tegra_vde_set_bits(vde, 0x8002, vde->mbe, 0x50);
    285	tegra_vde_set_bits(vde, 0x000A, vde->mbe, 0xA0);
    286	tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x14);
    287	tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x28);
    288	tegra_vde_set_bits(vde, 0x0A00, vde->mce, 0x08);
    289	tegra_vde_set_bits(vde, 0x000A, vde->tfe, 0x00);
    290	tegra_vde_set_bits(vde, 0x0005, vde->vdma, 0x04);
    291
    292	tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x1C);
    293	tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x00);
    294	tegra_vde_writel(vde, 0x00000007, vde->vdma, 0x04);
    295	tegra_vde_writel(vde, 0x00000007, vde->frameid, 0x200);
    296	tegra_vde_writel(vde, 0x00000005, vde->tfe, 0x04);
    297	tegra_vde_writel(vde, 0x00000000, vde->mbe, 0x84);
    298	tegra_vde_writel(vde, 0x00000010, vde->sxe, 0x08);
    299	tegra_vde_writel(vde, 0x00000150, vde->sxe, 0x54);
    300	tegra_vde_writel(vde, 0x0000054C, vde->sxe, 0x58);
    301	tegra_vde_writel(vde, 0x00000E34, vde->sxe, 0x5C);
    302	tegra_vde_writel(vde, 0x063C063C, vde->mce, 0x10);
    303	tegra_vde_writel(vde, 0x0003FC00, vde->bsev, INTR_STATUS);
    304	tegra_vde_writel(vde, 0x0000150D, vde->bsev, BSE_CONFIG);
    305	tegra_vde_writel(vde, 0x00000100, vde->bsev, BSE_INT_ENB);
    306	tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x98);
    307	tegra_vde_writel(vde, 0x00000060, vde->bsev, 0x9C);
    308
    309	memset(vde->iram + 128, 0, macroblocks_nb / 2);
    310
    311	tegra_setup_frameidx(vde, dpb_frames, ctx->dpb_frames_nb,
    312			     ctx->pic_width_in_mbs, ctx->pic_height_in_mbs);
    313
    314	tegra_vde_setup_iram_tables(vde, dpb_frames,
    315				    ctx->dpb_frames_nb - 1,
    316				    ctx->dpb_ref_frames_with_earlier_poc_nb);
    317
    318	/*
    319	 * The IRAM mapping is write-combine, ensure that CPU buffers have
    320	 * been flushed at this point.
    321	 */
    322	wmb();
    323
    324	tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x8C);
    325	tegra_vde_writel(vde, bitstream_data_addr + bitstream_data_size,
    326			 vde->bsev, 0x54);
    327
    328	vde->bitstream_data_addr = bitstream_data_addr;
    329
    330	value = ctx->pic_width_in_mbs << 11 | ctx->pic_height_in_mbs << 3;
    331
    332	tegra_vde_writel(vde, value, vde->bsev, 0x88);
    333
    334	err = tegra_vde_wait_bsev(vde, false);
    335	if (err)
    336		return err;
    337
    338	err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x800003FC, false);
    339	if (err)
    340		return err;
    341
    342	value = 0x01500000;
    343	value |= ((vde->iram_lists_addr + 512) >> 2) & 0xFFFF;
    344
    345	err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
    346	if (err)
    347		return err;
    348
    349	err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x840F054C, false);
    350	if (err)
    351		return err;
    352
    353	err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x80000080, false);
    354	if (err)
    355		return err;
    356
    357	value = 0x0E340000 | ((vde->iram_lists_addr >> 2) & 0xFFFF);
    358
    359	err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true);
    360	if (err)
    361		return err;
    362
    363	value = 0x00800005;
    364	value |= ctx->pic_width_in_mbs << 11;
    365	value |= ctx->pic_height_in_mbs << 3;
    366
    367	tegra_vde_writel(vde, value, vde->sxe, 0x10);
    368
    369	value = !ctx->baseline_profile << 17;
    370	value |= ctx->level_idc << 13;
    371	value |= ctx->log2_max_pic_order_cnt_lsb << 7;
    372	value |= ctx->pic_order_cnt_type << 5;
    373	value |= ctx->log2_max_frame_num;
    374
    375	tegra_vde_writel(vde, value, vde->sxe, 0x40);
    376
    377	value = ctx->pic_init_qp << 25;
    378	value |= !!(ctx->deblocking_filter_control_present_flag) << 2;
    379	value |= !!ctx->pic_order_present_flag;
    380
    381	tegra_vde_writel(vde, value, vde->sxe, 0x44);
    382
    383	value = ctx->chroma_qp_index_offset;
    384	value |= ctx->num_ref_idx_l0_active_minus1 << 5;
    385	value |= ctx->num_ref_idx_l1_active_minus1 << 10;
    386	value |= !!ctx->constrained_intra_pred_flag << 15;
    387
    388	tegra_vde_writel(vde, value, vde->sxe, 0x48);
    389
    390	value = 0x0C000000;
    391	value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 24;
    392
    393	tegra_vde_writel(vde, value, vde->sxe, 0x4C);
    394
    395	value = 0x03800000;
    396	value |= bitstream_data_size & GENMASK(19, 15);
    397
    398	tegra_vde_writel(vde, value, vde->sxe, 0x68);
    399
    400	tegra_vde_writel(vde, bitstream_data_addr, vde->sxe, 0x6C);
    401
    402	if (vde->soc->supports_ref_pic_marking)
    403		tegra_vde_writel(vde, vde->secure_bo->dma_addr, vde->sxe, 0x7c);
    404
    405	value = 0x10000005;
    406	value |= ctx->pic_width_in_mbs << 11;
    407	value |= ctx->pic_height_in_mbs << 3;
    408
    409	tegra_vde_writel(vde, value, vde->mbe, 0x80);
    410
    411	value = 0x26800000;
    412	value |= ctx->level_idc << 4;
    413	value |= !ctx->baseline_profile << 1;
    414	value |= !!ctx->direct_8x8_inference_flag;
    415
    416	tegra_vde_writel(vde, value, vde->mbe, 0x80);
    417
    418	tegra_vde_writel(vde, 0xF4000001, vde->mbe, 0x80);
    419	tegra_vde_writel(vde, 0x20000000, vde->mbe, 0x80);
    420	tegra_vde_writel(vde, 0xF4000101, vde->mbe, 0x80);
    421
    422	value = 0x20000000;
    423	value |= ctx->chroma_qp_index_offset << 8;
    424
    425	tegra_vde_writel(vde, value, vde->mbe, 0x80);
    426
    427	err = tegra_vde_setup_mbe_frame_idx(vde,
    428					    ctx->dpb_frames_nb - 1,
    429					    ctx->pic_order_cnt_type == 0);
    430	if (err) {
    431		dev_err(dev, "MBE frames setup failed %d\n", err);
    432		return err;
    433	}
    434
    435	tegra_vde_mbe_set_0xa_reg(vde, 0, 0x000009FC);
    436	tegra_vde_mbe_set_0xa_reg(vde, 2, 0x61DEAD00);
    437	tegra_vde_mbe_set_0xa_reg(vde, 4, 0x62DEAD00);
    438	tegra_vde_mbe_set_0xa_reg(vde, 6, 0x63DEAD00);
    439	tegra_vde_mbe_set_0xa_reg(vde, 8, dpb_frames[0].aux_addr);
    440
    441	value = 0xFC000000;
    442	value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 2;
    443
    444	if (!ctx->baseline_profile)
    445		value |= !!(dpb_frames[0].flags & FLAG_REFERENCE) << 1;
    446
    447	tegra_vde_writel(vde, value, vde->mbe, 0x80);
    448
    449	err = tegra_vde_wait_mbe(vde);
    450	if (err) {
    451		dev_err(dev, "MBE programming failed %d\n", err);
    452		return err;
    453	}
    454
    455	return 0;
    456}
    457
    458static void tegra_vde_decode_frame(struct tegra_vde *vde,
    459				   unsigned int macroblocks_nb)
    460{
    461	reinit_completion(&vde->decode_completion);
    462
    463	tegra_vde_writel(vde, 0x00000001, vde->bsev, 0x8C);
    464	tegra_vde_writel(vde, 0x20000000 | (macroblocks_nb - 1),
    465			 vde->sxe, 0x00);
    466}
    467
    468static int tegra_vde_validate_h264_ctx(struct device *dev,
    469				       struct tegra_vde_h264_decoder_ctx *ctx)
    470{
    471	if (ctx->dpb_frames_nb == 0 || ctx->dpb_frames_nb > 17) {
    472		dev_err(dev, "Bad DPB size %u\n", ctx->dpb_frames_nb);
    473		return -EINVAL;
    474	}
    475
    476	if (ctx->level_idc > 15) {
    477		dev_err(dev, "Bad level value %u\n", ctx->level_idc);
    478		return -EINVAL;
    479	}
    480
    481	if (ctx->pic_init_qp > 52) {
    482		dev_err(dev, "Bad pic_init_qp value %u\n", ctx->pic_init_qp);
    483		return -EINVAL;
    484	}
    485
    486	if (ctx->log2_max_pic_order_cnt_lsb > 16) {
    487		dev_err(dev, "Bad log2_max_pic_order_cnt_lsb value %u\n",
    488			ctx->log2_max_pic_order_cnt_lsb);
    489		return -EINVAL;
    490	}
    491
    492	if (ctx->log2_max_frame_num > 16) {
    493		dev_err(dev, "Bad log2_max_frame_num value %u\n",
    494			ctx->log2_max_frame_num);
    495		return -EINVAL;
    496	}
    497
    498	if (ctx->chroma_qp_index_offset > 31) {
    499		dev_err(dev, "Bad chroma_qp_index_offset value %u\n",
    500			ctx->chroma_qp_index_offset);
    501		return -EINVAL;
    502	}
    503
    504	if (ctx->pic_order_cnt_type > 2) {
    505		dev_err(dev, "Bad pic_order_cnt_type value %u\n",
    506			ctx->pic_order_cnt_type);
    507		return -EINVAL;
    508	}
    509
    510	if (ctx->num_ref_idx_l0_active_minus1 > 15) {
    511		dev_err(dev, "Bad num_ref_idx_l0_active_minus1 value %u\n",
    512			ctx->num_ref_idx_l0_active_minus1);
    513		return -EINVAL;
    514	}
    515
    516	if (ctx->num_ref_idx_l1_active_minus1 > 15) {
    517		dev_err(dev, "Bad num_ref_idx_l1_active_minus1 value %u\n",
    518			ctx->num_ref_idx_l1_active_minus1);
    519		return -EINVAL;
    520	}
    521
    522	if (!ctx->pic_width_in_mbs || ctx->pic_width_in_mbs > 127) {
    523		dev_err(dev, "Bad pic_width_in_mbs value %u\n",
    524			ctx->pic_width_in_mbs);
    525		return -EINVAL;
    526	}
    527
    528	if (!ctx->pic_height_in_mbs || ctx->pic_height_in_mbs > 127) {
    529		dev_err(dev, "Bad pic_height_in_mbs value %u\n",
    530			ctx->pic_height_in_mbs);
    531		return -EINVAL;
    532	}
    533
    534	return 0;
    535}
    536
    537static int tegra_vde_decode_begin(struct tegra_vde *vde,
    538				  struct tegra_vde_h264_decoder_ctx *ctx,
    539				  struct tegra_video_frame *dpb_frames,
    540				  dma_addr_t bitstream_data_addr,
    541				  size_t bitstream_data_size)
    542{
    543	struct device *dev = vde->dev;
    544	unsigned int macroblocks_nb;
    545	int err;
    546
    547	err = mutex_lock_interruptible(&vde->lock);
    548	if (err)
    549		return err;
    550
    551	err = pm_runtime_resume_and_get(dev);
    552	if (err < 0)
    553		goto unlock;
    554
    555	/*
    556	 * We rely on the VDE registers reset value, otherwise VDE
    557	 * causes bus lockup.
    558	 */
    559	err = reset_control_assert(vde->rst_mc);
    560	if (err) {
    561		dev_err(dev, "DEC start: Failed to assert MC reset: %d\n",
    562			err);
    563		goto put_runtime_pm;
    564	}
    565
    566	err = reset_control_reset(vde->rst);
    567	if (err) {
    568		dev_err(dev, "DEC start: Failed to reset HW: %d\n", err);
    569		goto put_runtime_pm;
    570	}
    571
    572	err = reset_control_deassert(vde->rst_mc);
    573	if (err) {
    574		dev_err(dev, "DEC start: Failed to deassert MC reset: %d\n",
    575			err);
    576		goto put_runtime_pm;
    577	}
    578
    579	macroblocks_nb = ctx->pic_width_in_mbs * ctx->pic_height_in_mbs;
    580
    581	err = tegra_vde_setup_hw_context(vde, ctx, dpb_frames,
    582					 bitstream_data_addr,
    583					 bitstream_data_size,
    584					 macroblocks_nb);
    585	if (err)
    586		goto put_runtime_pm;
    587
    588	tegra_vde_decode_frame(vde, macroblocks_nb);
    589
    590	return 0;
    591
    592put_runtime_pm:
    593	pm_runtime_mark_last_busy(dev);
    594	pm_runtime_put_autosuspend(dev);
    595
    596unlock:
    597	mutex_unlock(&vde->lock);
    598
    599	return err;
    600}
    601
    602static void tegra_vde_decode_abort(struct tegra_vde *vde)
    603{
    604	struct device *dev = vde->dev;
    605	int err;
    606
    607	/*
    608	 * At first reset memory client to avoid resetting VDE HW in the
    609	 * middle of DMA which could result into memory corruption or hang
    610	 * the whole system.
    611	 */
    612	err = reset_control_assert(vde->rst_mc);
    613	if (err)
    614		dev_err(dev, "DEC end: Failed to assert MC reset: %d\n", err);
    615
    616	err = reset_control_assert(vde->rst);
    617	if (err)
    618		dev_err(dev, "DEC end: Failed to assert HW reset: %d\n", err);
    619
    620	pm_runtime_mark_last_busy(dev);
    621	pm_runtime_put_autosuspend(dev);
    622
    623	mutex_unlock(&vde->lock);
    624}
    625
    626static int tegra_vde_decode_end(struct tegra_vde *vde)
    627{
    628	unsigned int read_bytes, macroblocks_nb;
    629	struct device *dev = vde->dev;
    630	dma_addr_t bsev_ptr;
    631	long timeout;
    632	int ret;
    633
    634	timeout = wait_for_completion_interruptible_timeout(
    635			&vde->decode_completion, msecs_to_jiffies(1000));
    636	if (timeout == 0) {
    637		bsev_ptr = tegra_vde_readl(vde, vde->bsev, 0x10);
    638		macroblocks_nb = tegra_vde_readl(vde, vde->sxe, 0xC8) & 0x1FFF;
    639		read_bytes = bsev_ptr ? bsev_ptr - vde->bitstream_data_addr : 0;
    640
    641		dev_err(dev, "Decoding failed: read 0x%X bytes, %u macroblocks parsed\n",
    642			read_bytes, macroblocks_nb);
    643
    644		ret = -EIO;
    645	} else if (timeout < 0) {
    646		ret = timeout;
    647	} else {
    648		ret = 0;
    649	}
    650
    651	tegra_vde_decode_abort(vde);
    652
    653	return ret;
    654}
    655
    656static struct vb2_buffer *get_ref_buf(struct tegra_ctx *ctx,
    657				      struct vb2_v4l2_buffer *dst,
    658				      unsigned int dpb_idx)
    659{
    660	const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb;
    661	struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
    662	int buf_idx = -1;
    663
    664	if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
    665		buf_idx = vb2_find_timestamp(cap_q,
    666					     dpb[dpb_idx].reference_ts, 0);
    667
    668	/*
    669	 * If a DPB entry is unused or invalid, address of current destination
    670	 * buffer is returned.
    671	 */
    672	if (buf_idx < 0)
    673		return &dst->vb2_buf;
    674
    675	return vb2_get_buffer(cap_q, buf_idx);
    676}
    677
    678static int tegra_vde_validate_vb_size(struct tegra_ctx *ctx,
    679				      struct vb2_buffer *vb,
    680				      unsigned int plane_id,
    681				      size_t min_size)
    682{
    683	u64 offset = vb->planes[plane_id].data_offset;
    684	struct device *dev = ctx->vde->dev;
    685
    686	if (offset + min_size > vb2_plane_size(vb, plane_id)) {
    687		dev_err(dev, "Too small plane[%u] size %lu @0x%llX, should be at least %zu\n",
    688			plane_id, vb2_plane_size(vb, plane_id), offset, min_size);
    689		return -EINVAL;
    690	}
    691
    692	return 0;
    693}
    694
    695static int tegra_vde_h264_setup_frame(struct tegra_ctx *ctx,
    696				      struct tegra_vde_h264_decoder_ctx *h264,
    697				      struct v4l2_h264_reflist_builder *b,
    698				      struct vb2_buffer *vb,
    699				      unsigned int ref_id,
    700				      unsigned int id)
    701{
    702	struct v4l2_pix_format_mplane *pixfmt = &ctx->decoded_fmt.fmt.pix_mp;
    703	struct tegra_m2m_buffer *tb = vb_to_tegra_buf(vb);
    704	struct tegra_ctx_h264 *h = &ctx->h264;
    705	struct tegra_vde *vde = ctx->vde;
    706	struct device *dev = vde->dev;
    707	unsigned int cstride, lstride;
    708	unsigned int flags = 0;
    709	size_t lsize, csize;
    710	int err, frame_num;
    711
    712	lsize = h264->pic_width_in_mbs * 16 * h264->pic_height_in_mbs * 16;
    713	csize = h264->pic_width_in_mbs *  8 * h264->pic_height_in_mbs *  8;
    714	lstride = pixfmt->plane_fmt[0].bytesperline;
    715	cstride = pixfmt->plane_fmt[1].bytesperline;
    716
    717	err = tegra_vde_validate_vb_size(ctx, vb, 0, lsize);
    718	if (err)
    719		return err;
    720
    721	err = tegra_vde_validate_vb_size(ctx, vb, 1, csize);
    722	if (err)
    723		return err;
    724
    725	err = tegra_vde_validate_vb_size(ctx, vb, 2, csize);
    726	if (err)
    727		return err;
    728
    729	if (!tb->aux || tb->aux->size < csize) {
    730		dev_err(dev, "Too small aux size %zd, should be at least %zu\n",
    731			tb->aux ? tb->aux->size : -1, csize);
    732		return -EINVAL;
    733	}
    734
    735	if (id == 0) {
    736		frame_num = h->decode_params->frame_num;
    737
    738		if (h->decode_params->nal_ref_idc)
    739			flags |= FLAG_REFERENCE;
    740	} else {
    741		frame_num = b->refs[ref_id].frame_num;
    742	}
    743
    744	if (tb->b_frame)
    745		flags |= FLAG_B_FRAME;
    746
    747	vde->frames[id].flags = flags;
    748	vde->frames[id].y_addr = tb->dma_addr[0];
    749	vde->frames[id].cb_addr = tb->dma_addr[1];
    750	vde->frames[id].cr_addr = tb->dma_addr[2];
    751	vde->frames[id].aux_addr = tb->aux->dma_addr;
    752	vde->frames[id].frame_num = frame_num & 0x7fffff;
    753	vde->frames[id].luma_atoms_pitch = lstride / VDE_ATOM;
    754	vde->frames[id].chroma_atoms_pitch = cstride / VDE_ATOM;
    755
    756	return 0;
    757}
    758
    759static int tegra_vde_h264_setup_frames(struct tegra_ctx *ctx,
    760				       struct tegra_vde_h264_decoder_ctx *h264)
    761{
    762	struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
    763	struct vb2_v4l2_buffer *dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
    764	const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb;
    765	struct tegra_m2m_buffer *tb = vb_to_tegra_buf(&dst->vb2_buf);
    766	struct tegra_ctx_h264 *h = &ctx->h264;
    767	struct v4l2_h264_reflist_builder b;
    768	struct v4l2_h264_reference *dpb_id;
    769	struct h264_reflists reflists;
    770	struct vb2_buffer *ref;
    771	unsigned int i;
    772	int err;
    773
    774	/*
    775	 * Tegra hardware requires information about frame's type, assuming
    776	 * that frame consists of the same type slices. Userspace must tag
    777	 * frame's type appropriately.
    778	 *
    779	 * Decoding of a non-uniform frames isn't supported by hardware and
    780	 * require software preprocessing that we don't implement. Decoding
    781	 * is expected to fail in this case. Such video streams are rare in
    782	 * practice, so not a big deal.
    783	 *
    784	 * If userspace doesn't tell us frame's type, then we will try decode
    785	 * as-is.
    786	 */
    787	v4l2_m2m_buf_copy_metadata(src, dst, true);
    788
    789	if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME)
    790		tb->b_frame = true;
    791	else
    792		tb->b_frame = false;
    793
    794	err = tegra_vde_h264_setup_frame(ctx, h264, NULL, &dst->vb2_buf, 0,
    795					 h264->dpb_frames_nb++);
    796	if (err)
    797		return err;
    798
    799	if (!(h->decode_params->flags & (V4L2_H264_DECODE_PARAM_FLAG_PFRAME |
    800					 V4L2_H264_DECODE_PARAM_FLAG_BFRAME)))
    801		return 0;
    802
    803	v4l2_h264_init_reflist_builder(&b, h->decode_params, h->sps, dpb);
    804
    805	if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME) {
    806		v4l2_h264_build_b_ref_lists(&b, reflists.b0, reflists.b1);
    807		dpb_id = reflists.b0;
    808	} else {
    809		v4l2_h264_build_p_ref_list(&b, reflists.p);
    810		dpb_id = reflists.p;
    811	}
    812
    813	for (i = 0; i < b.num_valid; i++) {
    814		int dpb_idx = dpb_id[i].index;
    815
    816		ref = get_ref_buf(ctx, dst, dpb_idx);
    817
    818		err = tegra_vde_h264_setup_frame(ctx, h264, &b, ref, dpb_idx,
    819						 h264->dpb_frames_nb++);
    820		if (err)
    821			return err;
    822
    823		if (b.refs[dpb_idx].top_field_order_cnt < b.cur_pic_order_count)
    824			h264->dpb_ref_frames_with_earlier_poc_nb++;
    825	}
    826
    827	return 0;
    828}
    829
    830static unsigned int to_tegra_vde_h264_level_idc(unsigned int level_idc)
    831{
    832	switch (level_idc) {
    833	case 11:
    834		return 2;
    835	case 12:
    836		return 3;
    837	case 13:
    838		return 4;
    839	case 20:
    840		return 5;
    841	case 21:
    842		return 6;
    843	case 22:
    844		return 7;
    845	case 30:
    846		return 8;
    847	case 31:
    848		return 9;
    849	case 32:
    850		return 10;
    851	case 40:
    852		return 11;
    853	case 41:
    854		return 12;
    855	case 42:
    856		return 13;
    857	case 50:
    858		return 14;
    859	default:
    860		break;
    861	}
    862
    863	return 15;
    864}
    865
    866static int tegra_vde_h264_setup_context(struct tegra_ctx *ctx,
    867					struct tegra_vde_h264_decoder_ctx *h264)
    868{
    869	struct tegra_ctx_h264 *h = &ctx->h264;
    870	struct tegra_vde *vde = ctx->vde;
    871	struct device *dev = vde->dev;
    872	int err;
    873
    874	memset(h264, 0, sizeof(*h264));
    875	memset(vde->frames, 0, sizeof(vde->frames));
    876
    877	tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
    878	tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_SPS);
    879	tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_PPS);
    880
    881	/* CABAC unsupported by hardware, requires software preprocessing */
    882	if (h->pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
    883		return -EOPNOTSUPP;
    884
    885	if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
    886		return -EOPNOTSUPP;
    887
    888	if (h->sps->profile_idc == 66)
    889		h264->baseline_profile = 1;
    890
    891	if (h->sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
    892		h264->direct_8x8_inference_flag = 1;
    893
    894	if (h->pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
    895		h264->constrained_intra_pred_flag = 1;
    896
    897	if (h->pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT)
    898		h264->deblocking_filter_control_present_flag = 1;
    899
    900	if (h->pps->flags & V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT)
    901		h264->pic_order_present_flag = 1;
    902
    903	h264->level_idc				= to_tegra_vde_h264_level_idc(h->sps->level_idc);
    904	h264->log2_max_pic_order_cnt_lsb	= h->sps->log2_max_pic_order_cnt_lsb_minus4 + 4;
    905	h264->log2_max_frame_num		= h->sps->log2_max_frame_num_minus4 + 4;
    906	h264->pic_order_cnt_type		= h->sps->pic_order_cnt_type;
    907	h264->pic_width_in_mbs			= h->sps->pic_width_in_mbs_minus1 + 1;
    908	h264->pic_height_in_mbs			= h->sps->pic_height_in_map_units_minus1 + 1;
    909
    910	h264->num_ref_idx_l0_active_minus1	= h->pps->num_ref_idx_l0_default_active_minus1;
    911	h264->num_ref_idx_l1_active_minus1	= h->pps->num_ref_idx_l1_default_active_minus1;
    912	h264->chroma_qp_index_offset		= h->pps->chroma_qp_index_offset & 0x1f;
    913	h264->pic_init_qp			= h->pps->pic_init_qp_minus26 + 26;
    914
    915	err = tegra_vde_h264_setup_frames(ctx, h264);
    916	if (err)
    917		return err;
    918
    919	err = tegra_vde_validate_h264_ctx(dev, h264);
    920	if (err)
    921		return err;
    922
    923	return 0;
    924}
    925
    926int tegra_vde_h264_decode_run(struct tegra_ctx *ctx)
    927{
    928	struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
    929	struct tegra_m2m_buffer *bitstream = vb_to_tegra_buf(&src->vb2_buf);
    930	size_t bitstream_size = vb2_get_plane_payload(&src->vb2_buf, 0);
    931	struct tegra_vde_h264_decoder_ctx h264;
    932	struct tegra_vde *vde = ctx->vde;
    933	int err;
    934
    935	err = tegra_vde_h264_setup_context(ctx, &h264);
    936	if (err)
    937		return err;
    938
    939	err = tegra_vde_decode_begin(vde, &h264, vde->frames,
    940				     bitstream->dma_addr[0],
    941				     bitstream_size);
    942	if (err)
    943		return err;
    944
    945	return 0;
    946}
    947
    948int tegra_vde_h264_decode_wait(struct tegra_ctx *ctx)
    949{
    950	return tegra_vde_decode_end(ctx->vde);
    951}