cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

rc_calc_fpu.c (10951B)


      1/*
      2 * Copyright 2021 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 * Authors: AMD
     23 *
     24 */
     25
     26#include "rc_calc_fpu.h"
     27
     28#include "qp_tables.h"
     29#include "amdgpu_dm/dc_fpu.h"
     30
     31#define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
     32
     33#define MODE_SELECT(val444, val422, val420) \
     34	(cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
     35
     36
     37#define TABLE_CASE(mode, bpc, max)   case (table_hash(mode, BPC_##bpc, max)): \
     38	table = qp_table_##mode##_##bpc##bpc_##max; \
     39	table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
     40	break
     41
     42static int median3(int a, int b, int c)
     43{
     44	if (a > b)
     45		swap(a, b);
     46	if (b > c)
     47		swap(b, c);
     48	if (a > b)
     49		swap(b, c);
     50
     51	return b;
     52}
     53
     54static double dsc_roundf(double num)
     55{
     56	if (num < 0.0)
     57		num = num - 0.5;
     58	else
     59		num = num + 0.5;
     60
     61	return (int)(num);
     62}
     63
     64static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
     65		       enum max_min max_min, float bpp)
     66{
     67	int mode = MODE_SELECT(444, 422, 420);
     68	int sel = table_hash(mode, bpc, max_min);
     69	int table_size = 0;
     70	int index;
     71	const struct qp_entry *table = NULL;
     72
     73	// alias enum
     74	enum { min = DAL_MM_MIN, max = DAL_MM_MAX };
     75	switch (sel) {
     76		TABLE_CASE(444,  8, max);
     77		TABLE_CASE(444,  8, min);
     78		TABLE_CASE(444, 10, max);
     79		TABLE_CASE(444, 10, min);
     80		TABLE_CASE(444, 12, max);
     81		TABLE_CASE(444, 12, min);
     82		TABLE_CASE(422,  8, max);
     83		TABLE_CASE(422,  8, min);
     84		TABLE_CASE(422, 10, max);
     85		TABLE_CASE(422, 10, min);
     86		TABLE_CASE(422, 12, max);
     87		TABLE_CASE(422, 12, min);
     88		TABLE_CASE(420,  8, max);
     89		TABLE_CASE(420,  8, min);
     90		TABLE_CASE(420, 10, max);
     91		TABLE_CASE(420, 10, min);
     92		TABLE_CASE(420, 12, max);
     93		TABLE_CASE(420, 12, min);
     94	}
     95
     96	if (!table)
     97		return;
     98
     99	index = (bpp - table[0].bpp) * 2;
    100
    101	/* requested size is bigger than the table */
    102	if (index >= table_size) {
    103		dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
    104		return;
    105	}
    106
    107	memcpy(qps, table[index].qps, sizeof(qp_set));
    108}
    109
    110static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
    111{
    112	int   *p = ofs;
    113
    114	if (mode == CM_444 || mode == CM_RGB) {
    115		*p++ = (bpp <=  6) ? (0) : ((((bpp >=  8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
    116		*p++ = (bpp <=  6) ? (-2) : ((((bpp >=  8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
    117		*p++ = (bpp <=  6) ? (-2) : ((((bpp >=  8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
    118		*p++ = (bpp <=  6) ? (-4) : ((((bpp >=  8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
    119		*p++ = (bpp <=  6) ? (-6) : ((((bpp >=  8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
    120		*p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
    121		*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
    122		*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
    123		*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
    124		*p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
    125		*p++ = -10;
    126		*p++ = (bpp <=  6) ? (-12) : ((bpp >=  8) ? (-10) : (-12 + dsc_roundf((bpp -  6) * (2 / 2.0))));
    127		*p++ = -12;
    128		*p++ = -12;
    129		*p++ = -12;
    130	} else if (mode == CM_422) {
    131		*p++ = (bpp <=  8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp -  8) * (8 / 2.0))));
    132		*p++ = (bpp <=  8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp -  8) * (8 / 2.0))));
    133		*p++ = (bpp <=  8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    134		*p++ = (bpp <=  8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    135		*p++ = (bpp <=  8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    136		*p++ = (bpp <=  8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    137		*p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp -  8) * (6 / 2.0))));
    138		*p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp -  8) * (4 / 2.0))));
    139		*p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp -  8) * (2 / 2.0))));
    140		*p++ = (bpp <=  8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp -  8) * (2 / 2.0))));
    141		*p++ = -10;
    142		*p++ = (bpp <=  6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp -  6) * (2.0 / 1))));
    143		*p++ = -12;
    144		*p++ = -12;
    145		*p++ = -12;
    146	} else {
    147		*p++ = (bpp <=  6) ? (2) : ((bpp >=  8) ? (10) : (2 + dsc_roundf((bpp -  6) * (8 / 2.0))));
    148		*p++ = (bpp <=  6) ? (0) : ((bpp >=  8) ? (8) : (0 + dsc_roundf((bpp -  6) * (8 / 2.0))));
    149		*p++ = (bpp <=  6) ? (0) : ((bpp >=  8) ? (6) : (0 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    150		*p++ = (bpp <=  6) ? (-2) : ((bpp >=  8) ? (4) : (-2 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    151		*p++ = (bpp <=  6) ? (-4) : ((bpp >=  8) ? (2) : (-4 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    152		*p++ = (bpp <=  6) ? (-6) : ((bpp >=  8) ? (0) : (-6 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    153		*p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-2) : (-8 + dsc_roundf((bpp -  6) * (6 / 2.0))));
    154		*p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-4) : (-8 + dsc_roundf((bpp -  6) * (4 / 2.0))));
    155		*p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-6) : (-8 + dsc_roundf((bpp -  6) * (2 / 2.0))));
    156		*p++ = (bpp <=  6) ? (-10) : ((bpp >=  8) ? (-8) : (-10 + dsc_roundf((bpp -  6) * (2 / 2.0))));
    157		*p++ = -10;
    158		*p++ = (bpp <=  4) ? (-12) : ((bpp >=  5) ? (-10) : (-12 + dsc_roundf((bpp -  4) * (2 / 1.0))));
    159		*p++ = -12;
    160		*p++ = -12;
    161		*p++ = -12;
    162	}
    163}
    164
    165void _do_calc_rc_params(struct rc_params *rc,
    166		enum colour_mode cm,
    167		enum bits_per_comp bpc,
    168		u16 drm_bpp,
    169		bool is_navite_422_or_420,
    170		int slice_width,
    171		int slice_height,
    172		int minor_version)
    173{
    174	float bpp;
    175	float bpp_group;
    176	float initial_xmit_delay_factor;
    177	int padding_pixels;
    178	int i;
    179
    180	dc_assert_fp_enabled();
    181
    182	bpp = ((float)drm_bpp / 16.0);
    183	/* in native_422 or native_420 modes, the bits_per_pixel is double the
    184	 * target bpp (the latter is what calc_rc_params expects)
    185	 */
    186	if (is_navite_422_or_420)
    187		bpp /= 2.0;
    188
    189	rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
    190	rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
    191
    192	bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
    193
    194	switch (cm) {
    195	case CM_420:
    196		rc->initial_fullness_offset = (bpp >=  6) ? (2048) : ((bpp <=  4) ? (6144) : ((((bpp >  4) && (bpp <=  5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp -  5) * (3584)))));
    197		rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
    198		rc->second_line_bpg_offset  = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
    199		break;
    200	case CM_422:
    201		rc->initial_fullness_offset = (bpp >=  8) ? (2048) : ((bpp <=  7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
    202		rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
    203		rc->second_line_bpg_offset  = 0;
    204		break;
    205	case CM_444:
    206	case CM_RGB:
    207		rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <=  8) ? (6144) : ((((bpp >  8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
    208		rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
    209		rc->second_line_bpg_offset  = 0;
    210		break;
    211	}
    212
    213	initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
    214	rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
    215
    216	if (cm == CM_422 || cm == CM_420)
    217		slice_width /= 2;
    218
    219	padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
    220	if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
    221		if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
    222			rc->initial_xmit_delay++;
    223	}
    224
    225	rc->flatness_min_qp     = ((bpc == BPC_8) ?  (3) : ((bpc == BPC_10) ? (7)  : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
    226	rc->flatness_max_qp     = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
    227	rc->flatness_det_thresh = 2 << (bpc - 8);
    228
    229	get_qp_set(rc->qp_min, cm, bpc, DAL_MM_MIN, bpp);
    230	get_qp_set(rc->qp_max, cm, bpc, DAL_MM_MAX, bpp);
    231	if (cm == CM_444 && minor_version == 1) {
    232		for (i = 0; i < QP_SET_SIZE; ++i) {
    233			rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
    234			rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
    235		}
    236	}
    237	get_ofs_set(rc->ofs, cm, bpp);
    238
    239	/* fixed parameters */
    240	rc->rc_model_size    = 8192;
    241	rc->rc_edge_factor   = 6;
    242	rc->rc_tgt_offset_hi = 3;
    243	rc->rc_tgt_offset_lo = 3;
    244
    245	rc->rc_buf_thresh[0] = 896;
    246	rc->rc_buf_thresh[1] = 1792;
    247	rc->rc_buf_thresh[2] = 2688;
    248	rc->rc_buf_thresh[3] = 3584;
    249	rc->rc_buf_thresh[4] = 4480;
    250	rc->rc_buf_thresh[5] = 5376;
    251	rc->rc_buf_thresh[6] = 6272;
    252	rc->rc_buf_thresh[7] = 6720;
    253	rc->rc_buf_thresh[8] = 7168;
    254	rc->rc_buf_thresh[9] = 7616;
    255	rc->rc_buf_thresh[10] = 7744;
    256	rc->rc_buf_thresh[11] = 7872;
    257	rc->rc_buf_thresh[12] = 8000;
    258	rc->rc_buf_thresh[13] = 8064;
    259}
    260