cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

hva-h264.c (31332B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (C) STMicroelectronics SA 2015
      4 * Authors: Yannick Fertre <yannick.fertre@st.com>
      5 *          Hugues Fruchet <hugues.fruchet@st.com>
      6 */
      7
      8#include "hva.h"
      9#include "hva-hw.h"
     10
     11#define MAX_SPS_PPS_SIZE 128
     12
     13#define BITSTREAM_OFFSET_MASK 0x7F
     14
     15/* video max size*/
     16#define H264_MAX_SIZE_W 1920
     17#define H264_MAX_SIZE_H 1920
     18
     19/* macroBlocs number (width & height) */
     20#define MB_W(w) ((w + 0xF)  / 0x10)
     21#define MB_H(h) ((h + 0xF)  / 0x10)
     22
     23/* formula to get temporal or spatial data size */
     24#define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16)
     25
     26#define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2)
     27#define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16)
     28#define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8)
     29#define SLICE_HEADER_SIZE (4 * 16)
     30#define BRC_DATA_SIZE (5 * 16)
     31
     32/* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */
     33#define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2)
     34
     35/*
     36 * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB
     37 * for deblocking with size=4*16*MBx*2
     38 */
     39#define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2)
     40
     41/* factor for bitrate and cpb buffer size max values if profile >= high */
     42#define H264_FACTOR_HIGH 1200
     43
     44/* factor for bitrate and cpb buffer size max values if profile < high */
     45#define H264_FACTOR_BASELINE 1000
     46
     47/* number of bytes for NALU_TYPE_FILLER_DATA header and footer */
     48#define H264_FILLER_DATA_SIZE 6
     49
     50struct h264_profile {
     51	enum v4l2_mpeg_video_h264_level level;
     52	u32 max_mb_per_seconds;
     53	u32 max_frame_size;
     54	u32 max_bitrate;
     55	u32 max_cpb_size;
     56	u32 min_comp_ratio;
     57};
     58
     59static const struct h264_profile h264_infos_list[] = {
     60	{V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2},
     61	{V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2},
     62	{V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2},
     63	{V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2},
     64	{V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2},
     65	{V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2},
     66	{V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2},
     67	{V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2},
     68	{V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2},
     69	{V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4},
     70	{V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4},
     71	{V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4},
     72	{V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2},
     73	{V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2},
     74	{V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2},
     75	{V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2}
     76};
     77
     78enum hva_brc_type {
     79	BRC_TYPE_NONE = 0,
     80	BRC_TYPE_CBR = 1,
     81	BRC_TYPE_VBR = 2,
     82	BRC_TYPE_VBR_LOW_DELAY = 3
     83};
     84
     85enum hva_entropy_coding_mode {
     86	CAVLC = 0,
     87	CABAC = 1
     88};
     89
     90enum hva_picture_coding_type {
     91	PICTURE_CODING_TYPE_I = 0,
     92	PICTURE_CODING_TYPE_P = 1,
     93	PICTURE_CODING_TYPE_B = 2
     94};
     95
     96enum hva_h264_sampling_mode {
     97	SAMPLING_MODE_NV12 = 0,
     98	SAMPLING_MODE_UYVY = 1,
     99	SAMPLING_MODE_RGB3 = 3,
    100	SAMPLING_MODE_XRGB4 = 4,
    101	SAMPLING_MODE_NV21 = 8,
    102	SAMPLING_MODE_VYUY = 9,
    103	SAMPLING_MODE_BGR3 = 11,
    104	SAMPLING_MODE_XBGR4 = 12,
    105	SAMPLING_MODE_RGBX4 = 20,
    106	SAMPLING_MODE_BGRX4 = 28
    107};
    108
    109enum hva_h264_nalu_type {
    110	NALU_TYPE_UNKNOWN = 0,
    111	NALU_TYPE_SLICE = 1,
    112	NALU_TYPE_SLICE_DPA = 2,
    113	NALU_TYPE_SLICE_DPB = 3,
    114	NALU_TYPE_SLICE_DPC = 4,
    115	NALU_TYPE_SLICE_IDR = 5,
    116	NALU_TYPE_SEI = 6,
    117	NALU_TYPE_SPS = 7,
    118	NALU_TYPE_PPS = 8,
    119	NALU_TYPE_AU_DELIMITER = 9,
    120	NALU_TYPE_SEQ_END = 10,
    121	NALU_TYPE_STREAM_END = 11,
    122	NALU_TYPE_FILLER_DATA = 12,
    123	NALU_TYPE_SPS_EXT = 13,
    124	NALU_TYPE_PREFIX_UNIT = 14,
    125	NALU_TYPE_SUBSET_SPS = 15,
    126	NALU_TYPE_SLICE_AUX = 19,
    127	NALU_TYPE_SLICE_EXT = 20
    128};
    129
    130enum hva_h264_sei_payload_type {
    131	SEI_BUFFERING_PERIOD = 0,
    132	SEI_PICTURE_TIMING = 1,
    133	SEI_STEREO_VIDEO_INFO = 21,
    134	SEI_FRAME_PACKING_ARRANGEMENT = 45
    135};
    136
    137/*
    138 * stereo Video Info struct
    139 */
    140struct hva_h264_stereo_video_sei {
    141	u8 field_views_flag;
    142	u8 top_field_is_left_view_flag;
    143	u8 current_frame_is_left_view_flag;
    144	u8 next_frame_is_second_view_flag;
    145	u8 left_view_self_contained_flag;
    146	u8 right_view_self_contained_flag;
    147};
    148
    149/*
    150 * struct hva_h264_td
    151 *
    152 * @frame_width: width in pixels of the buffer containing the input frame
    153 * @frame_height: height in pixels of the buffer containing the input frame
    154 * @frame_num: the parameter to be written in the slice header
    155 * @picture_coding_type: type I, P or B
    156 * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2
    157 * @first_picture_in_sequence: flag telling to encoder that this is the
    158 *			       first picture in a video sequence.
    159 *			       Used for VBR
    160 * @slice_size_type: 0 = no constraint to close the slice
    161 *		     1= a slice is closed as soon as the slice_mb_size limit
    162 *			is reached
    163 *		     2= a slice is closed as soon as the slice_byte_size limit
    164 *			is reached
    165 *		     3= a slice is closed as soon as either the slice_byte_size
    166 *			limit or the slice_mb_size limit is reached
    167 * @slice_mb_size: defines the slice size in number of macroblocks
    168 *		   (used when slice_size_type=1 or slice_size_type=3)
    169 * @ir_param_option: defines the number of macroblocks per frame to be
    170 *		     refreshed by AIR algorithm OR the refresh period
    171 *		     by CIR algorithm
    172 * @intra_refresh_type: enables the adaptive intra refresh algorithm.
    173 *			Disable=0 / Adaptative=1 and Cycle=2 as intra refresh
    174 * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS
    175 * @transform_mode: controls the use of 4x4/8x8 transform mode
    176 * @disable_deblocking_filter_idc:
    177 *		     0: specifies that all luma and chroma block edges of
    178 *			the slice are filtered.
    179 *		     1: specifies that deblocking is disabled for all block
    180 *			edges of the slice.
    181 *		     2: specifies that all luma and chroma block edges of
    182 *			the slice are filtered with exception of the block edges
    183 *			that coincide with slice boundaries
    184 * @slice_alpha_c0_offset_div2: to be written in slice header,
    185 *				controls deblocking
    186 * @slice_beta_offset_div2: to be written in slice header,
    187 *			    controls deblocking
    188 * @encoder_complexity: encoder complexity control (IME).
    189 *		     0 = I_16x16, P_16x16, Full ME Complexity
    190 *		     1 = I_16x16, I_NxN, P_16x16, Full ME Complexity
    191 *		     2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity
    192 *		     4 = I_16x16, P_16x16, Reduced ME Complexity
    193 *		     5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity
    194 *		     6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity
    195 *  @chroma_qp_index_offset: coming from picture parameter set
    196 *			     (PPS see [H.264 STD] 7.4.2.2)
    197 *  @entropy_coding_mode: entropy coding mode.
    198 *			  0 = CAVLC
    199 *			  1 = CABAC
    200 * @brc_type: selects the bit-rate control algorithm
    201 *		     0 = constant Qp, (no BRC)
    202 *		     1 = CBR
    203 *		     2 = VBR
    204 * @quant: Quantization param used in case of fix QP encoding (no BRC)
    205 * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler),
    206 *		       used by BRC
    207 * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC
    208 * @bit_rate: target bitrate, for BRC
    209 * @qp_min: min QP threshold
    210 * @qp_max: max QP threshold
    211 * @framerate_num: target framerate numerator , used by BRC
    212 * @framerate_den: target framerate denomurator , used by BRC
    213 * @delay: End-to-End Initial Delay
    214 * @strict_HRD_compliancy: flag for HDR compliancy (1)
    215 *			   May impact quality encoding
    216 * @addr_source_buffer: address of input frame buffer for current frame
    217 * @addr_fwd_Ref_Buffer: address of reference frame buffer
    218 * @addr_rec_buffer: address of reconstructed frame buffer
    219 * @addr_output_bitstream_start: output bitstream start address
    220 * @addr_output_bitstream_end: output bitstream end address
    221 * @addr_external_sw : address of external search window
    222 * @addr_lctx : address of context picture buffer
    223 * @addr_local_rec_buffer: address of local reconstructed buffer
    224 * @addr_spatial_context: address of spatial context buffer
    225 * @bitstream_offset: offset in bits between aligned bitstream start
    226 *		      address and first bit to be written by HVA.
    227 *		      Range value is [0..63]
    228 * @sampling_mode: Input picture format .
    229 *		     0: YUV420 semi_planar Interleaved
    230 *		     1: YUV422 raster Interleaved
    231 * @addr_param_out: address of output parameters structure
    232 * @addr_scaling_matrix: address to the coefficient of
    233 *			 the inverse scaling matrix
    234 * @addr_scaling_matrix_dir: address to the coefficient of
    235 *			     the direct scaling matrix
    236 * @addr_cabac_context_buffer: address of cabac context buffer
    237 * @GmvX: Input information about the horizontal global displacement of
    238 *	  the encoded frame versus the previous one
    239 * @GmvY: Input information about the vertical global displacement of
    240 *	  the encoded frame versus the previous one
    241 * @window_width: width in pixels of the window to be encoded inside
    242 *		  the input frame
    243 * @window_height: width in pixels of the window to be encoded inside
    244 *		   the input frame
    245 * @window_horizontal_offset: horizontal offset in pels for input window
    246 *			      within input frame
    247 * @window_vertical_offset: vertical offset in pels for input window
    248 *			    within input frame
    249 * @addr_roi: Map of QP offset for the Region of Interest algorithm and
    250 *	      also used for Error map.
    251 *	      Bit 0-6 used for qp offset (value -64 to 63).
    252 *	      Bit 7 used to force intra
    253 * @addr_slice_header: address to slice header
    254 * @slice_header_size_in_bits: size in bits of the Slice header
    255 * @slice_header_offset0: Slice header offset where to insert
    256 *			  first_Mb_in_slice
    257 * @slice_header_offset1: Slice header offset where to insert
    258 *			  slice_qp_delta
    259 * @slice_header_offset2: Slice header offset where to insert
    260 *			  num_MBs_in_slice
    261 * @slice_synchro_enable: enable "slice ready" interrupt after each slice
    262 * @max_slice_number: Maximum number of slice in a frame
    263 *		      (0 is strictly forbidden)
    264 * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
    265 *		      YUV for the Y component.
    266 *		      Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
    267 * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to
    268 *		      YUV for the Y component.
    269 *		      Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
    270 * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
    271 *		      YUV for the U (Cb) component.
    272 *		      U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
    273 * @slice_byte_size: maximum slice size in bytes
    274 *		     (used when slice_size_type=2 or slice_size_type=3)
    275 * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame
    276 *			 for the AIR algorithm
    277 * @brc_no_skip: Disable skipping in the Bitrate Controller
    278 * @addr_brc_in_out_parameter: address of static buffer for BRC parameters
    279 */
    280struct hva_h264_td {
    281	u16 frame_width;
    282	u16 frame_height;
    283	u32 frame_num;
    284	u16 picture_coding_type;
    285	u16 reserved1;
    286	u16 pic_order_cnt_type;
    287	u16 first_picture_in_sequence;
    288	u16 slice_size_type;
    289	u16 reserved2;
    290	u32 slice_mb_size;
    291	u16 ir_param_option;
    292	u16 intra_refresh_type;
    293	u16 use_constrained_intra_flag;
    294	u16 transform_mode;
    295	u16 disable_deblocking_filter_idc;
    296	s16 slice_alpha_c0_offset_div2;
    297	s16 slice_beta_offset_div2;
    298	u16 encoder_complexity;
    299	s16 chroma_qp_index_offset;
    300	u16 entropy_coding_mode;
    301	u16 brc_type;
    302	u16 quant;
    303	u32 non_vcl_nalu_size;
    304	u32 cpb_buffer_size;
    305	u32 bit_rate;
    306	u16 qp_min;
    307	u16 qp_max;
    308	u16 framerate_num;
    309	u16 framerate_den;
    310	u16 delay;
    311	u16 strict_hrd_compliancy;
    312	u32 addr_source_buffer;
    313	u32 addr_fwd_ref_buffer;
    314	u32 addr_rec_buffer;
    315	u32 addr_output_bitstream_start;
    316	u32 addr_output_bitstream_end;
    317	u32 addr_external_sw;
    318	u32 addr_lctx;
    319	u32 addr_local_rec_buffer;
    320	u32 addr_spatial_context;
    321	u16 bitstream_offset;
    322	u16 sampling_mode;
    323	u32 addr_param_out;
    324	u32 addr_scaling_matrix;
    325	u32 addr_scaling_matrix_dir;
    326	u32 addr_cabac_context_buffer;
    327	u32 reserved3;
    328	u32 reserved4;
    329	s16 gmv_x;
    330	s16 gmv_y;
    331	u16 window_width;
    332	u16 window_height;
    333	u16 window_horizontal_offset;
    334	u16 window_vertical_offset;
    335	u32 addr_roi;
    336	u32 addr_slice_header;
    337	u16 slice_header_size_in_bits;
    338	u16 slice_header_offset0;
    339	u16 slice_header_offset1;
    340	u16 slice_header_offset2;
    341	u32 reserved5;
    342	u32 reserved6;
    343	u16 reserved7;
    344	u16 reserved8;
    345	u16 slice_synchro_enable;
    346	u16 max_slice_number;
    347	u32 rgb2_yuv_y_coeff;
    348	u32 rgb2_yuv_u_coeff;
    349	u32 rgb2_yuv_v_coeff;
    350	u32 slice_byte_size;
    351	u16 max_air_intra_mb_nb;
    352	u16 brc_no_skip;
    353	u32 addr_temporal_context;
    354	u32 addr_brc_in_out_parameter;
    355};
    356
    357/*
    358 * struct hva_h264_slice_po
    359 *
    360 * @ slice_size: slice size
    361 * @ slice_start_time: start time
    362 * @ slice_stop_time: stop time
    363 * @ slice_num: slice number
    364 */
    365struct hva_h264_slice_po {
    366	u32 slice_size;
    367	u32 slice_start_time;
    368	u32 slice_end_time;
    369	u32 slice_num;
    370};
    371
    372/*
    373 * struct hva_h264_po
    374 *
    375 * @ bitstream_size: bitstream size
    376 * @ dct_bitstream_size: dtc bitstream size
    377 * @ stuffing_bits: number of stuffing bits inserted by the encoder
    378 * @ removal_time: removal time of current frame (nb of ticks 1/framerate)
    379 * @ hvc_start_time: hvc start time
    380 * @ hvc_stop_time: hvc stop time
    381 * @ slice_count: slice count
    382 */
    383struct hva_h264_po {
    384	u32 bitstream_size;
    385	u32 dct_bitstream_size;
    386	u32 stuffing_bits;
    387	u32 removal_time;
    388	u32 hvc_start_time;
    389	u32 hvc_stop_time;
    390	u32 slice_count;
    391	u32 reserved0;
    392	struct hva_h264_slice_po slice_params[16];
    393};
    394
    395struct hva_h264_task {
    396	struct hva_h264_td td;
    397	struct hva_h264_po po;
    398};
    399
    400/*
    401 * struct hva_h264_ctx
    402 *
    403 * @seq_info:  sequence information buffer
    404 * @ref_frame: reference frame buffer
    405 * @rec_frame: reconstructed frame buffer
    406 * @task:      task descriptor
    407 */
    408struct hva_h264_ctx {
    409	struct hva_buffer *seq_info;
    410	struct hva_buffer *ref_frame;
    411	struct hva_buffer *rec_frame;
    412	struct hva_buffer *task;
    413};
    414
    415static int hva_h264_fill_slice_header(struct hva_ctx *pctx,
    416				      u8 *slice_header_addr,
    417				      struct hva_controls *ctrls,
    418				      int frame_num,
    419				      u16 *header_size,
    420				      u16 *header_offset0,
    421				      u16 *header_offset1,
    422				      u16 *header_offset2)
    423{
    424	/*
    425	 * with this HVA hardware version, part of the slice header is computed
    426	 * on host and part by hardware.
    427	 * The part of host is precomputed and available through this array.
    428	 */
    429	struct device *dev = ctx_to_dev(pctx);
    430	int  cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC;
    431	static const unsigned char slice_header[] = {
    432		0x00, 0x00, 0x00, 0x01,
    433		0x41, 0x34, 0x07, 0x00
    434	};
    435	int idr_pic_id = frame_num % 2;
    436	enum hva_picture_coding_type type;
    437	u32 frame_order = frame_num % ctrls->gop_size;
    438
    439	if (!(frame_num % ctrls->gop_size))
    440		type = PICTURE_CODING_TYPE_I;
    441	else
    442		type = PICTURE_CODING_TYPE_P;
    443
    444	memcpy(slice_header_addr, slice_header, sizeof(slice_header));
    445
    446	*header_size = 56;
    447	*header_offset0 = 40;
    448	*header_offset1 = 13;
    449	*header_offset2 = 0;
    450
    451	if (type == PICTURE_CODING_TYPE_I) {
    452		slice_header_addr[4] = 0x65;
    453		slice_header_addr[5] = 0x11;
    454
    455		/* toggle the I frame */
    456		if ((frame_num / ctrls->gop_size) % 2) {
    457			*header_size += 4;
    458			*header_offset1 += 4;
    459			slice_header_addr[6] = 0x04;
    460			slice_header_addr[7] = 0x70;
    461
    462		} else {
    463			*header_size += 2;
    464			*header_offset1 += 2;
    465			slice_header_addr[6] = 0x09;
    466			slice_header_addr[7] = 0xC0;
    467		}
    468	} else {
    469		if (ctrls->entropy_mode == cabac) {
    470			*header_size += 1;
    471			*header_offset1 += 1;
    472			slice_header_addr[7] = 0x80;
    473		}
    474		/*
    475		 * update slice header with P frame order
    476		 * frame order is limited to 16 (coded on 4bits only)
    477		 */
    478		slice_header_addr[5] += ((frame_order & 0x0C) >> 2);
    479		slice_header_addr[6] += ((frame_order & 0x03) << 6);
    480	}
    481
    482	dev_dbg(dev,
    483		"%s   %s slice header order %d idrPicId %d header size %d\n",
    484		pctx->name, __func__, frame_order, idr_pic_id, *header_size);
    485	return 0;
    486}
    487
    488static int hva_h264_fill_data_nal(struct hva_ctx *pctx,
    489				  unsigned int stuffing_bytes, u8 *addr,
    490				  unsigned int stream_size, unsigned int *size)
    491{
    492	struct device *dev = ctx_to_dev(pctx);
    493	static const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
    494
    495	dev_dbg(dev, "%s   %s stuffing bytes %d\n", pctx->name, __func__,
    496		stuffing_bytes);
    497
    498	if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) {
    499		dev_dbg(dev, "%s   %s too many stuffing bytes %d\n",
    500			pctx->name, __func__, stuffing_bytes);
    501		return 0;
    502	}
    503
    504	/* start code */
    505	memcpy(addr + *size, start, sizeof(start));
    506	*size += sizeof(start);
    507
    508	/* nal_unit_type */
    509	addr[*size] = NALU_TYPE_FILLER_DATA;
    510	*size += 1;
    511
    512	memset(addr + *size, 0xff, stuffing_bytes);
    513	*size += stuffing_bytes;
    514
    515	addr[*size] = 0x80;
    516	*size += 1;
    517
    518	return 0;
    519}
    520
    521static int hva_h264_fill_sei_nal(struct hva_ctx *pctx,
    522				 enum hva_h264_sei_payload_type type,
    523				 u8 *addr, u32 *size)
    524{
    525	struct device *dev = ctx_to_dev(pctx);
    526	static const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
    527	struct hva_h264_stereo_video_sei info;
    528	u8 offset = 7;
    529	u8 msg = 0;
    530
    531	/* start code */
    532	memcpy(addr + *size, start, sizeof(start));
    533	*size += sizeof(start);
    534
    535	/* nal_unit_type */
    536	addr[*size] = NALU_TYPE_SEI;
    537	*size += 1;
    538
    539	/* payload type */
    540	addr[*size] = type;
    541	*size += 1;
    542
    543	switch (type) {
    544	case SEI_STEREO_VIDEO_INFO:
    545		memset(&info, 0, sizeof(info));
    546
    547		/* set to top/bottom frame packing arrangement */
    548		info.field_views_flag = 1;
    549		info.top_field_is_left_view_flag = 1;
    550
    551		/* payload size */
    552		addr[*size] = 1;
    553		*size += 1;
    554
    555		/* payload */
    556		msg = info.field_views_flag << offset--;
    557
    558		if (info.field_views_flag) {
    559			msg |= info.top_field_is_left_view_flag <<
    560			       offset--;
    561		} else {
    562			msg |= info.current_frame_is_left_view_flag <<
    563			       offset--;
    564			msg |= info.next_frame_is_second_view_flag <<
    565			       offset--;
    566		}
    567		msg |= info.left_view_self_contained_flag << offset--;
    568		msg |= info.right_view_self_contained_flag << offset--;
    569
    570		addr[*size] = msg;
    571		*size += 1;
    572
    573		addr[*size] = 0x80;
    574		*size += 1;
    575
    576		return 0;
    577	case SEI_BUFFERING_PERIOD:
    578	case SEI_PICTURE_TIMING:
    579	case SEI_FRAME_PACKING_ARRANGEMENT:
    580	default:
    581		dev_err(dev, "%s   sei nal type not supported %d\n",
    582			pctx->name, type);
    583		return -EINVAL;
    584	}
    585}
    586
    587static int hva_h264_prepare_task(struct hva_ctx *pctx,
    588				 struct hva_h264_task *task,
    589				 struct hva_frame *frame,
    590				 struct hva_stream *stream)
    591{
    592	struct hva_dev *hva = ctx_to_hdev(pctx);
    593	struct device *dev = ctx_to_dev(pctx);
    594	struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
    595	struct hva_buffer *seq_info = ctx->seq_info;
    596	struct hva_buffer *fwd_ref_frame = ctx->ref_frame;
    597	struct hva_buffer *loc_rec_frame = ctx->rec_frame;
    598	struct hva_h264_td *td = &task->td;
    599	struct hva_controls *ctrls = &pctx->ctrls;
    600	struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame;
    601	int cavlc =  V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC;
    602	u32 frame_num = pctx->stream_num;
    603	u32 addr_esram = hva->esram_addr;
    604	enum v4l2_mpeg_video_h264_level level;
    605	dma_addr_t paddr = 0;
    606	u8 *slice_header_vaddr;
    607	u32 frame_width = frame->info.aligned_width;
    608	u32 frame_height = frame->info.aligned_height;
    609	u32 max_cpb_buffer_size;
    610	unsigned int payload = stream->bytesused;
    611	u32 max_bitrate;
    612
    613	/* check width and height parameters */
    614	if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) ||
    615	    (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) {
    616		dev_err(dev,
    617			"%s   width(%d) or height(%d) exceeds limits (%dx%d)\n",
    618			pctx->name, frame_width, frame_height,
    619			H264_MAX_SIZE_W, H264_MAX_SIZE_H);
    620		pctx->frame_errors++;
    621		return -EINVAL;
    622	}
    623
    624	level = ctrls->level;
    625
    626	memset(td, 0, sizeof(struct hva_h264_td));
    627
    628	td->frame_width = frame_width;
    629	td->frame_height = frame_height;
    630
    631	/* set frame alignment */
    632	td->window_width =  frame_width;
    633	td->window_height = frame_height;
    634	td->window_horizontal_offset = 0;
    635	td->window_vertical_offset = 0;
    636
    637	td->first_picture_in_sequence = (!frame_num) ? 1 : 0;
    638
    639	/* pic_order_cnt_type hard coded to '2' as only I & P frames */
    640	td->pic_order_cnt_type = 2;
    641
    642	/* useConstrainedIntraFlag set to false for better coding efficiency */
    643	td->use_constrained_intra_flag = false;
    644	td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR)
    645			? BRC_TYPE_CBR : BRC_TYPE_VBR;
    646
    647	td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC :
    648				  CABAC;
    649
    650	td->bit_rate = ctrls->bitrate;
    651
    652	/* set framerate, framerate = 1 n/ time per frame */
    653	if (time_per_frame->numerator >= 536) {
    654		/*
    655		 * due to a hardware bug, framerate denominator can't exceed
    656		 * 536 (BRC overflow). Compute nearest framerate
    657		 */
    658		td->framerate_den = 1;
    659		td->framerate_num = (time_per_frame->denominator +
    660				    (time_per_frame->numerator >> 1) - 1) /
    661				    time_per_frame->numerator;
    662
    663		/*
    664		 * update bitrate to introduce a correction due to
    665		 * the new framerate
    666		 * new bitrate = (old bitrate * new framerate) / old framerate
    667		 */
    668		td->bit_rate /= time_per_frame->numerator;
    669		td->bit_rate *= time_per_frame->denominator;
    670		td->bit_rate /= td->framerate_num;
    671	} else {
    672		td->framerate_den = time_per_frame->numerator;
    673		td->framerate_num = time_per_frame->denominator;
    674	}
    675
    676	/* compute maximum bitrate depending on profile */
    677	if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
    678		max_bitrate = h264_infos_list[level].max_bitrate *
    679			      H264_FACTOR_HIGH;
    680	else
    681		max_bitrate = h264_infos_list[level].max_bitrate *
    682			      H264_FACTOR_BASELINE;
    683
    684	/* check if bitrate doesn't exceed max size */
    685	if (td->bit_rate > max_bitrate) {
    686		dev_dbg(dev,
    687			"%s   bitrate (%d) larger than level and profile allow, clip to %d\n",
    688			pctx->name, td->bit_rate, max_bitrate);
    689		td->bit_rate = max_bitrate;
    690	}
    691
    692	/* convert cpb_buffer_size in bits */
    693	td->cpb_buffer_size = ctrls->cpb_size * 8000;
    694
    695	/* compute maximum cpb buffer size depending on profile */
    696	if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
    697		max_cpb_buffer_size =
    698		    h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH;
    699	else
    700		max_cpb_buffer_size =
    701		    h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE;
    702
    703	/* check if cpb buffer size doesn't exceed max size */
    704	if (td->cpb_buffer_size > max_cpb_buffer_size) {
    705		dev_dbg(dev,
    706			"%s   cpb size larger than level %d allows, clip to %d\n",
    707			pctx->name, td->cpb_buffer_size, max_cpb_buffer_size);
    708		td->cpb_buffer_size = max_cpb_buffer_size;
    709	}
    710
    711	/* enable skipping in the Bitrate Controller */
    712	td->brc_no_skip = 0;
    713
    714	/* initial delay */
    715	if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) &&
    716	    td->bit_rate)
    717		td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate);
    718	else
    719		td->delay = 0;
    720
    721	switch (frame->info.pixelformat) {
    722	case V4L2_PIX_FMT_NV12:
    723		td->sampling_mode = SAMPLING_MODE_NV12;
    724		break;
    725	case V4L2_PIX_FMT_NV21:
    726		td->sampling_mode = SAMPLING_MODE_NV21;
    727		break;
    728	default:
    729		dev_err(dev, "%s   invalid source pixel format\n",
    730			pctx->name);
    731		pctx->frame_errors++;
    732		return -EINVAL;
    733	}
    734
    735	/*
    736	 * fill matrix color converter (RGB to YUV)
    737	 * Y = 0,299 R + 0,587 G + 0,114 B
    738	 * Cb = -0,1687 R -0,3313 G + 0,5 B + 128
    739	 * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128
    740	 */
    741	td->rgb2_yuv_y_coeff = 0x12031008;
    742	td->rgb2_yuv_u_coeff = 0x800EF7FB;
    743	td->rgb2_yuv_v_coeff = 0x80FEF40E;
    744
    745	/* enable/disable transform mode */
    746	td->transform_mode = ctrls->dct8x8;
    747
    748	/* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */
    749	td->encoder_complexity = 2;
    750
    751	/* quant fix to 28, default VBR value */
    752	td->quant = 28;
    753
    754	if (td->framerate_den == 0) {
    755		dev_err(dev, "%s   invalid framerate\n", pctx->name);
    756		pctx->frame_errors++;
    757		return -EINVAL;
    758	}
    759
    760	/* if automatic framerate, deactivate bitrate controller */
    761	if (td->framerate_num == 0)
    762		td->brc_type = 0;
    763
    764	/* compliancy fix to true */
    765	td->strict_hrd_compliancy = 1;
    766
    767	/* set minimum & maximum quantizers */
    768	td->qp_min = clamp_val(ctrls->qpmin, 0, 51);
    769	td->qp_max = clamp_val(ctrls->qpmax, 0, 51);
    770
    771	td->addr_source_buffer = frame->paddr;
    772	td->addr_fwd_ref_buffer = fwd_ref_frame->paddr;
    773	td->addr_rec_buffer = loc_rec_frame->paddr;
    774
    775	td->addr_output_bitstream_end = (u32)stream->paddr + stream->size;
    776
    777	td->addr_output_bitstream_start = (u32)stream->paddr;
    778	td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) &
    779			       BITSTREAM_OFFSET_MASK;
    780
    781	td->addr_param_out = (u32)ctx->task->paddr +
    782			     offsetof(struct hva_h264_task, po);
    783
    784	/* swap spatial and temporal context */
    785	if (frame_num % 2) {
    786		paddr = seq_info->paddr;
    787		td->addr_spatial_context =  ALIGN(paddr, 0x100);
    788		paddr = seq_info->paddr + DATA_SIZE(frame_width,
    789							frame_height);
    790		td->addr_temporal_context = ALIGN(paddr, 0x100);
    791	} else {
    792		paddr = seq_info->paddr;
    793		td->addr_temporal_context = ALIGN(paddr, 0x100);
    794		paddr = seq_info->paddr + DATA_SIZE(frame_width,
    795							frame_height);
    796		td->addr_spatial_context =  ALIGN(paddr, 0x100);
    797	}
    798
    799	paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height);
    800
    801	td->addr_brc_in_out_parameter =  ALIGN(paddr, 0x100);
    802
    803	paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE;
    804	td->addr_slice_header =  ALIGN(paddr, 0x100);
    805	td->addr_external_sw =  ALIGN(addr_esram, 0x100);
    806
    807	addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width);
    808	td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100);
    809
    810	addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width);
    811	td->addr_lctx = ALIGN(addr_esram, 0x100);
    812
    813	addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height));
    814	td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100);
    815
    816	if (!(frame_num % ctrls->gop_size)) {
    817		td->picture_coding_type = PICTURE_CODING_TYPE_I;
    818		stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME;
    819	} else {
    820		td->picture_coding_type = PICTURE_CODING_TYPE_P;
    821		stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
    822	}
    823
    824	/* fill the slice header part */
    825	slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header -
    826			     seq_info->paddr);
    827
    828	hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num,
    829				   &td->slice_header_size_in_bits,
    830				   &td->slice_header_offset0,
    831				   &td->slice_header_offset1,
    832				   &td->slice_header_offset2);
    833
    834	td->chroma_qp_index_offset = 2;
    835	td->slice_synchro_enable = 0;
    836	td->max_slice_number = 1;
    837
    838	/*
    839	 * check the sps/pps header size for key frame only
    840	 * sps/pps header was previously fill by libv4l
    841	 * during qbuf of stream buffer
    842	 */
    843	if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) &&
    844	    (payload > MAX_SPS_PPS_SIZE)) {
    845		dev_err(dev, "%s   invalid sps/pps size %d\n", pctx->name,
    846			payload);
    847		pctx->frame_errors++;
    848		return -EINVAL;
    849	}
    850
    851	if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME)
    852		payload = 0;
    853
    854	/* add SEI nal (video stereo info) */
    855	if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO,
    856						   (u8 *)stream->vaddr,
    857						   &payload)) {
    858		dev_err(dev, "%s   fail to get SEI nal\n", pctx->name);
    859		pctx->frame_errors++;
    860		return -EINVAL;
    861	}
    862
    863	/* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */
    864	td->non_vcl_nalu_size = payload * 8;
    865
    866	/* compute bitstream offset & new start address of bitstream */
    867	td->addr_output_bitstream_start += ((payload >> 4) << 4);
    868	td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8;
    869
    870	stream->bytesused = payload;
    871
    872	return 0;
    873}
    874
    875static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task)
    876{
    877	struct hva_h264_po *po = &task->po;
    878
    879	return po->bitstream_size;
    880}
    881
    882static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task)
    883{
    884	struct hva_h264_po *po = &task->po;
    885
    886	return po->stuffing_bits >> 3;
    887}
    888
    889static int hva_h264_open(struct hva_ctx *pctx)
    890{
    891	struct device *dev = ctx_to_dev(pctx);
    892	struct hva_h264_ctx *ctx;
    893	struct hva_dev *hva = ctx_to_hdev(pctx);
    894	u32 frame_width = pctx->frameinfo.aligned_width;
    895	u32 frame_height = pctx->frameinfo.aligned_height;
    896	u32 size;
    897	int ret;
    898
    899	/* check esram size necessary to encode a frame */
    900	size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) +
    901	       LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) +
    902	       CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) +
    903	       CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width);
    904
    905	if (hva->esram_size < size) {
    906		dev_err(dev, "%s   not enough esram (max:%d request:%d)\n",
    907			pctx->name, hva->esram_size, size);
    908		ret = -EINVAL;
    909		goto err;
    910	}
    911
    912	/* allocate context for codec */
    913	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
    914	if (!ctx) {
    915		ret = -ENOMEM;
    916		goto err;
    917	}
    918
    919	/* allocate sequence info buffer */
    920	ret = hva_mem_alloc(pctx,
    921			    2 * DATA_SIZE(frame_width, frame_height) +
    922			    SLICE_HEADER_SIZE +
    923			    BRC_DATA_SIZE,
    924			    "hva sequence info",
    925			    &ctx->seq_info);
    926	if (ret) {
    927		dev_err(dev,
    928			"%s   failed to allocate sequence info buffer\n",
    929			pctx->name);
    930		goto err_ctx;
    931	}
    932
    933	/* allocate reference frame buffer */
    934	ret = hva_mem_alloc(pctx,
    935			    frame_width * frame_height * 3 / 2,
    936			    "hva reference frame",
    937			    &ctx->ref_frame);
    938	if (ret) {
    939		dev_err(dev, "%s   failed to allocate reference frame buffer\n",
    940			pctx->name);
    941		goto err_seq_info;
    942	}
    943
    944	/* allocate reconstructed frame buffer */
    945	ret = hva_mem_alloc(pctx,
    946			    frame_width * frame_height * 3 / 2,
    947			    "hva reconstructed frame",
    948			    &ctx->rec_frame);
    949	if (ret) {
    950		dev_err(dev,
    951			"%s   failed to allocate reconstructed frame buffer\n",
    952			pctx->name);
    953		goto err_ref_frame;
    954	}
    955
    956	/* allocate task descriptor */
    957	ret = hva_mem_alloc(pctx,
    958			    sizeof(struct hva_h264_task),
    959			    "hva task descriptor",
    960			    &ctx->task);
    961	if (ret) {
    962		dev_err(dev,
    963			"%s   failed to allocate task descriptor\n",
    964			pctx->name);
    965		goto err_rec_frame;
    966	}
    967
    968	pctx->priv = (void *)ctx;
    969
    970	return 0;
    971
    972err_rec_frame:
    973	hva_mem_free(pctx, ctx->rec_frame);
    974err_ref_frame:
    975	hva_mem_free(pctx, ctx->ref_frame);
    976err_seq_info:
    977	hva_mem_free(pctx, ctx->seq_info);
    978err_ctx:
    979	devm_kfree(dev, ctx);
    980err:
    981	pctx->sys_errors++;
    982	return ret;
    983}
    984
    985static int hva_h264_close(struct hva_ctx *pctx)
    986{
    987	struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
    988	struct device *dev = ctx_to_dev(pctx);
    989
    990	if (ctx->seq_info)
    991		hva_mem_free(pctx, ctx->seq_info);
    992
    993	if (ctx->ref_frame)
    994		hva_mem_free(pctx, ctx->ref_frame);
    995
    996	if (ctx->rec_frame)
    997		hva_mem_free(pctx, ctx->rec_frame);
    998
    999	if (ctx->task)
   1000		hva_mem_free(pctx, ctx->task);
   1001
   1002	devm_kfree(dev, ctx);
   1003
   1004	return 0;
   1005}
   1006
   1007static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame,
   1008			   struct hva_stream *stream)
   1009{
   1010	struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
   1011	struct hva_h264_task *task = (struct hva_h264_task *)ctx->task->vaddr;
   1012	u32 stuffing_bytes = 0;
   1013	int ret = 0;
   1014
   1015	ret = hva_h264_prepare_task(pctx, task, frame, stream);
   1016	if (ret)
   1017		goto err;
   1018
   1019	ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task);
   1020	if (ret)
   1021		goto err;
   1022
   1023	pctx->stream_num++;
   1024	stream->bytesused += hva_h264_get_stream_size(task);
   1025
   1026	stuffing_bytes = hva_h264_get_stuffing_bytes(task);
   1027
   1028	if (stuffing_bytes)
   1029		hva_h264_fill_data_nal(pctx, stuffing_bytes,
   1030				       (u8 *)stream->vaddr,
   1031				       stream->size,
   1032				       &stream->bytesused);
   1033
   1034	/* switch reference & reconstructed frame */
   1035	swap(ctx->ref_frame, ctx->rec_frame);
   1036
   1037	return 0;
   1038err:
   1039	stream->bytesused = 0;
   1040	return ret;
   1041}
   1042
   1043const struct hva_enc nv12h264enc = {
   1044	.name = "H264(NV12)",
   1045	.pixelformat = V4L2_PIX_FMT_NV12,
   1046	.streamformat = V4L2_PIX_FMT_H264,
   1047	.max_width = H264_MAX_SIZE_W,
   1048	.max_height = H264_MAX_SIZE_H,
   1049	.open = hva_h264_open,
   1050	.close = hva_h264_close,
   1051	.encode = hva_h264_encode,
   1052};
   1053
   1054const struct hva_enc nv21h264enc = {
   1055	.name = "H264(NV21)",
   1056	.pixelformat = V4L2_PIX_FMT_NV21,
   1057	.streamformat = V4L2_PIX_FMT_H264,
   1058	.max_width = H264_MAX_SIZE_W,
   1059	.max_height = H264_MAX_SIZE_H,
   1060	.open = hva_h264_open,
   1061	.close = hva_h264_close,
   1062	.encode = hva_h264_encode,
   1063};