cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dcss-scaler.c (23331B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright 2019 NXP.
      4 *
      5 * Scaling algorithms were contributed by Dzung Hoang <dzung.hoang@nxp.com>
      6 */
      7
      8#include <linux/device.h>
      9#include <linux/slab.h>
     10
     11#include "dcss-dev.h"
     12
     13#define DCSS_SCALER_CTRL			0x00
     14#define   SCALER_EN				BIT(0)
     15#define   REPEAT_EN				BIT(4)
     16#define   SCALE2MEM_EN				BIT(8)
     17#define   MEM2OFIFO_EN				BIT(12)
     18#define DCSS_SCALER_OFIFO_CTRL			0x04
     19#define   OFIFO_LOW_THRES_POS			0
     20#define   OFIFO_LOW_THRES_MASK			GENMASK(9, 0)
     21#define   OFIFO_HIGH_THRES_POS			16
     22#define   OFIFO_HIGH_THRES_MASK			GENMASK(25, 16)
     23#define   UNDERRUN_DETECT_CLR			BIT(26)
     24#define   LOW_THRES_DETECT_CLR			BIT(27)
     25#define   HIGH_THRES_DETECT_CLR			BIT(28)
     26#define   UNDERRUN_DETECT_EN			BIT(29)
     27#define   LOW_THRES_DETECT_EN			BIT(30)
     28#define   HIGH_THRES_DETECT_EN			BIT(31)
     29#define DCSS_SCALER_SDATA_CTRL			0x08
     30#define   YUV_EN				BIT(0)
     31#define   RTRAM_8LINES				BIT(1)
     32#define   Y_UV_BYTE_SWAP			BIT(4)
     33#define   A2R10G10B10_FORMAT_POS		8
     34#define   A2R10G10B10_FORMAT_MASK		GENMASK(11, 8)
     35#define DCSS_SCALER_BIT_DEPTH			0x0C
     36#define   LUM_BIT_DEPTH_POS			0
     37#define   LUM_BIT_DEPTH_MASK			GENMASK(1, 0)
     38#define   CHR_BIT_DEPTH_POS			4
     39#define   CHR_BIT_DEPTH_MASK			GENMASK(5, 4)
     40#define DCSS_SCALER_SRC_FORMAT			0x10
     41#define DCSS_SCALER_DST_FORMAT			0x14
     42#define   FORMAT_MASK				GENMASK(1, 0)
     43#define DCSS_SCALER_SRC_LUM_RES			0x18
     44#define DCSS_SCALER_SRC_CHR_RES			0x1C
     45#define DCSS_SCALER_DST_LUM_RES			0x20
     46#define DCSS_SCALER_DST_CHR_RES			0x24
     47#define   WIDTH_POS				0
     48#define   WIDTH_MASK				GENMASK(11, 0)
     49#define   HEIGHT_POS				16
     50#define   HEIGHT_MASK				GENMASK(27, 16)
     51#define DCSS_SCALER_V_LUM_START			0x48
     52#define   V_START_MASK				GENMASK(15, 0)
     53#define DCSS_SCALER_V_LUM_INC			0x4C
     54#define   V_INC_MASK				GENMASK(15, 0)
     55#define DCSS_SCALER_H_LUM_START			0x50
     56#define   H_START_MASK				GENMASK(18, 0)
     57#define DCSS_SCALER_H_LUM_INC			0x54
     58#define   H_INC_MASK				GENMASK(15, 0)
     59#define DCSS_SCALER_V_CHR_START			0x58
     60#define DCSS_SCALER_V_CHR_INC			0x5C
     61#define DCSS_SCALER_H_CHR_START			0x60
     62#define DCSS_SCALER_H_CHR_INC			0x64
     63#define DCSS_SCALER_COEF_VLUM			0x80
     64#define DCSS_SCALER_COEF_HLUM			0x140
     65#define DCSS_SCALER_COEF_VCHR			0x200
     66#define DCSS_SCALER_COEF_HCHR			0x300
     67
     68struct dcss_scaler_ch {
     69	void __iomem *base_reg;
     70	u32 base_ofs;
     71	struct dcss_scaler *scl;
     72
     73	u32 sdata_ctrl;
     74	u32 scaler_ctrl;
     75
     76	bool scaler_ctrl_chgd;
     77
     78	u32 c_vstart;
     79	u32 c_hstart;
     80
     81	bool use_nn_interpolation;
     82};
     83
     84struct dcss_scaler {
     85	struct device *dev;
     86
     87	struct dcss_ctxld *ctxld;
     88	u32 ctx_id;
     89
     90	struct dcss_scaler_ch ch[3];
     91};
     92
     93/* scaler coefficients generator */
     94#define PSC_FRAC_BITS 30
     95#define PSC_FRAC_SCALE BIT(PSC_FRAC_BITS)
     96#define PSC_BITS_FOR_PHASE 4
     97#define PSC_NUM_PHASES 16
     98#define PSC_STORED_PHASES (PSC_NUM_PHASES / 2 + 1)
     99#define PSC_NUM_TAPS 7
    100#define PSC_NUM_TAPS_RGBA 5
    101#define PSC_COEFF_PRECISION 10
    102#define PSC_PHASE_FRACTION_BITS 13
    103#define PSC_PHASE_MASK (PSC_NUM_PHASES - 1)
    104#define PSC_Q_FRACTION 19
    105#define PSC_Q_ROUND_OFFSET (1 << (PSC_Q_FRACTION - 1))
    106
    107/**
    108 * mult_q() - Performs fixed-point multiplication.
    109 * @A: multiplier
    110 * @B: multiplicand
    111 */
    112static int mult_q(int A, int B)
    113{
    114	int result;
    115	s64 temp;
    116
    117	temp = (int64_t)A * (int64_t)B;
    118	temp += PSC_Q_ROUND_OFFSET;
    119	result = (int)(temp >> PSC_Q_FRACTION);
    120	return result;
    121}
    122
    123/**
    124 * div_q() - Performs fixed-point division.
    125 * @A: dividend
    126 * @B: divisor
    127 */
    128static int div_q(int A, int B)
    129{
    130	int result;
    131	s64 temp;
    132
    133	temp = (int64_t)A << PSC_Q_FRACTION;
    134	if ((temp >= 0 && B >= 0) || (temp < 0 && B < 0))
    135		temp += B / 2;
    136	else
    137		temp -= B / 2;
    138
    139	result = (int)(temp / B);
    140	return result;
    141}
    142
    143/**
    144 * exp_approx_q() - Compute approximation to exp(x) function using Taylor
    145 *		    series.
    146 * @x: fixed-point argument of exp function
    147 */
    148static int exp_approx_q(int x)
    149{
    150	int sum = 1 << PSC_Q_FRACTION;
    151	int term = 1 << PSC_Q_FRACTION;
    152
    153	term = mult_q(term, div_q(x, 1 << PSC_Q_FRACTION));
    154	sum += term;
    155	term = mult_q(term, div_q(x, 2 << PSC_Q_FRACTION));
    156	sum += term;
    157	term = mult_q(term, div_q(x, 3 << PSC_Q_FRACTION));
    158	sum += term;
    159	term = mult_q(term, div_q(x, 4 << PSC_Q_FRACTION));
    160	sum += term;
    161
    162	return sum;
    163}
    164
    165/**
    166 * dcss_scaler_gaussian_filter() - Generate gaussian prototype filter.
    167 * @fc_q: fixed-point cutoff frequency normalized to range [0, 1]
    168 * @use_5_taps: indicates whether to use 5 taps or 7 taps
    169 * @coef: output filter coefficients
    170 */
    171static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps,
    172					bool phase0_identity,
    173					int coef[][PSC_NUM_TAPS])
    174{
    175	int sigma_q, g0_q, g1_q, g2_q;
    176	int tap_cnt1, tap_cnt2, tap_idx, phase_cnt;
    177	int mid;
    178	int phase;
    179	int i;
    180	int taps;
    181
    182	if (use_5_taps)
    183		for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
    184			coef[phase][0] = 0;
    185			coef[phase][PSC_NUM_TAPS - 1] = 0;
    186		}
    187
    188	/* seed coefficient scanner */
    189	taps = use_5_taps ? PSC_NUM_TAPS_RGBA : PSC_NUM_TAPS;
    190	mid = (PSC_NUM_PHASES * taps) / 2 - 1;
    191	phase_cnt = (PSC_NUM_PHASES * (PSC_NUM_TAPS + 1)) / 2;
    192	tap_cnt1 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
    193	tap_cnt2 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
    194
    195	/* seed gaussian filter generator */
    196	sigma_q = div_q(PSC_Q_ROUND_OFFSET, fc_q);
    197	g0_q = 1 << PSC_Q_FRACTION;
    198	g1_q = exp_approx_q(div_q(-PSC_Q_ROUND_OFFSET,
    199				  mult_q(sigma_q, sigma_q)));
    200	g2_q = mult_q(g1_q, g1_q);
    201	coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = g0_q;
    202
    203	for (i = 0; i < mid; i++) {
    204		phase_cnt++;
    205		tap_cnt1--;
    206		tap_cnt2++;
    207
    208		g0_q = mult_q(g0_q, g1_q);
    209		g1_q = mult_q(g1_q, g2_q);
    210
    211		if ((phase_cnt & PSC_PHASE_MASK) <= 8) {
    212			tap_idx = tap_cnt1 >> PSC_BITS_FOR_PHASE;
    213			coef[phase_cnt & PSC_PHASE_MASK][tap_idx] = g0_q;
    214		}
    215		if (((-phase_cnt) & PSC_PHASE_MASK) <= 8) {
    216			tap_idx = tap_cnt2 >> PSC_BITS_FOR_PHASE;
    217			coef[(-phase_cnt) & PSC_PHASE_MASK][tap_idx] = g0_q;
    218		}
    219	}
    220
    221	phase_cnt++;
    222	tap_cnt1--;
    223	coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = 0;
    224
    225	/* override phase 0 with identity filter if specified */
    226	if (phase0_identity)
    227		for (i = 0; i < PSC_NUM_TAPS; i++)
    228			coef[0][i] = i == (PSC_NUM_TAPS >> 1) ?
    229						(1 << PSC_COEFF_PRECISION) : 0;
    230
    231	/* normalize coef */
    232	for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
    233		int sum = 0;
    234		s64 ll_temp;
    235
    236		for (i = 0; i < PSC_NUM_TAPS; i++)
    237			sum += coef[phase][i];
    238		for (i = 0; i < PSC_NUM_TAPS; i++) {
    239			ll_temp = coef[phase][i];
    240			ll_temp <<= PSC_COEFF_PRECISION;
    241			ll_temp += sum >> 1;
    242			ll_temp /= sum;
    243			coef[phase][i] = (int)ll_temp;
    244		}
    245	}
    246}
    247
    248static void dcss_scaler_nearest_neighbor_filter(bool use_5_taps,
    249						int coef[][PSC_NUM_TAPS])
    250{
    251	int i, j;
    252
    253	for (i = 0; i < PSC_STORED_PHASES; i++)
    254		for (j = 0; j < PSC_NUM_TAPS; j++)
    255			coef[i][j] = j == PSC_NUM_TAPS >> 1 ?
    256						(1 << PSC_COEFF_PRECISION) : 0;
    257}
    258
    259/**
    260 * dcss_scaler_filter_design() - Compute filter coefficients using
    261 *				 Gaussian filter.
    262 * @src_length: length of input
    263 * @dst_length: length of output
    264 * @use_5_taps: 0 for 7 taps per phase, 1 for 5 taps
    265 * @coef: output coefficients
    266 */
    267static void dcss_scaler_filter_design(int src_length, int dst_length,
    268				      bool use_5_taps, bool phase0_identity,
    269				      int coef[][PSC_NUM_TAPS],
    270				      bool nn_interpolation)
    271{
    272	int fc_q;
    273
    274	/* compute cutoff frequency */
    275	if (dst_length >= src_length)
    276		fc_q = div_q(1, PSC_NUM_PHASES);
    277	else
    278		fc_q = div_q(dst_length, src_length * PSC_NUM_PHASES);
    279
    280	if (nn_interpolation)
    281		dcss_scaler_nearest_neighbor_filter(use_5_taps, coef);
    282	else
    283		/* compute gaussian filter coefficients */
    284		dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef);
    285}
    286
    287static void dcss_scaler_write(struct dcss_scaler_ch *ch, u32 val, u32 ofs)
    288{
    289	struct dcss_scaler *scl = ch->scl;
    290
    291	dcss_ctxld_write(scl->ctxld, scl->ctx_id, val, ch->base_ofs + ofs);
    292}
    293
    294static int dcss_scaler_ch_init_all(struct dcss_scaler *scl,
    295				   unsigned long scaler_base)
    296{
    297	struct dcss_scaler_ch *ch;
    298	int i;
    299
    300	for (i = 0; i < 3; i++) {
    301		ch = &scl->ch[i];
    302
    303		ch->base_ofs = scaler_base + i * 0x400;
    304
    305		ch->base_reg = ioremap(ch->base_ofs, SZ_4K);
    306		if (!ch->base_reg) {
    307			dev_err(scl->dev, "scaler: unable to remap ch base\n");
    308			return -ENOMEM;
    309		}
    310
    311		ch->scl = scl;
    312	}
    313
    314	return 0;
    315}
    316
    317int dcss_scaler_init(struct dcss_dev *dcss, unsigned long scaler_base)
    318{
    319	struct dcss_scaler *scaler;
    320
    321	scaler = kzalloc(sizeof(*scaler), GFP_KERNEL);
    322	if (!scaler)
    323		return -ENOMEM;
    324
    325	dcss->scaler = scaler;
    326	scaler->dev = dcss->dev;
    327	scaler->ctxld = dcss->ctxld;
    328	scaler->ctx_id = CTX_SB_HP;
    329
    330	if (dcss_scaler_ch_init_all(scaler, scaler_base)) {
    331		int i;
    332
    333		for (i = 0; i < 3; i++) {
    334			if (scaler->ch[i].base_reg)
    335				iounmap(scaler->ch[i].base_reg);
    336		}
    337
    338		kfree(scaler);
    339
    340		return -ENOMEM;
    341	}
    342
    343	return 0;
    344}
    345
    346void dcss_scaler_exit(struct dcss_scaler *scl)
    347{
    348	int ch_no;
    349
    350	for (ch_no = 0; ch_no < 3; ch_no++) {
    351		struct dcss_scaler_ch *ch = &scl->ch[ch_no];
    352
    353		dcss_writel(0, ch->base_reg + DCSS_SCALER_CTRL);
    354
    355		if (ch->base_reg)
    356			iounmap(ch->base_reg);
    357	}
    358
    359	kfree(scl);
    360}
    361
    362void dcss_scaler_ch_enable(struct dcss_scaler *scl, int ch_num, bool en)
    363{
    364	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
    365	u32 scaler_ctrl;
    366
    367	scaler_ctrl = en ? SCALER_EN | REPEAT_EN : 0;
    368
    369	if (en)
    370		dcss_scaler_write(ch, ch->sdata_ctrl, DCSS_SCALER_SDATA_CTRL);
    371
    372	if (ch->scaler_ctrl != scaler_ctrl)
    373		ch->scaler_ctrl_chgd = true;
    374
    375	ch->scaler_ctrl = scaler_ctrl;
    376}
    377
    378static void dcss_scaler_yuv_enable(struct dcss_scaler_ch *ch, bool en)
    379{
    380	ch->sdata_ctrl &= ~YUV_EN;
    381	ch->sdata_ctrl |= en ? YUV_EN : 0;
    382}
    383
    384static void dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch *ch, bool en)
    385{
    386	ch->sdata_ctrl &= ~RTRAM_8LINES;
    387	ch->sdata_ctrl |= en ? RTRAM_8LINES : 0;
    388}
    389
    390static void dcss_scaler_bit_depth_set(struct dcss_scaler_ch *ch, int depth)
    391{
    392	u32 val;
    393
    394	val = depth == 30 ? 2 : 0;
    395
    396	dcss_scaler_write(ch,
    397			  ((val << CHR_BIT_DEPTH_POS) & CHR_BIT_DEPTH_MASK) |
    398			  ((val << LUM_BIT_DEPTH_POS) & LUM_BIT_DEPTH_MASK),
    399			  DCSS_SCALER_BIT_DEPTH);
    400}
    401
    402enum buffer_format {
    403	BUF_FMT_YUV420,
    404	BUF_FMT_YUV422,
    405	BUF_FMT_ARGB8888_YUV444,
    406};
    407
    408enum chroma_location {
    409	PSC_LOC_HORZ_0_VERT_1_OVER_4 = 0,
    410	PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4 = 1,
    411	PSC_LOC_HORZ_0_VERT_0 = 2,
    412	PSC_LOC_HORZ_1_OVER_4_VERT_0 = 3,
    413	PSC_LOC_HORZ_0_VERT_1_OVER_2 = 4,
    414	PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2 = 5
    415};
    416
    417static void dcss_scaler_format_set(struct dcss_scaler_ch *ch,
    418				   enum buffer_format src_fmt,
    419				   enum buffer_format dst_fmt)
    420{
    421	dcss_scaler_write(ch, src_fmt, DCSS_SCALER_SRC_FORMAT);
    422	dcss_scaler_write(ch, dst_fmt, DCSS_SCALER_DST_FORMAT);
    423}
    424
    425static void dcss_scaler_res_set(struct dcss_scaler_ch *ch,
    426				int src_xres, int src_yres,
    427				int dst_xres, int dst_yres,
    428				u32 pix_format, enum buffer_format dst_format)
    429{
    430	u32 lsrc_xres, lsrc_yres, csrc_xres, csrc_yres;
    431	u32 ldst_xres, ldst_yres, cdst_xres, cdst_yres;
    432	bool src_is_444 = true;
    433
    434	lsrc_xres = src_xres;
    435	csrc_xres = src_xres;
    436	lsrc_yres = src_yres;
    437	csrc_yres = src_yres;
    438	ldst_xres = dst_xres;
    439	cdst_xres = dst_xres;
    440	ldst_yres = dst_yres;
    441	cdst_yres = dst_yres;
    442
    443	if (pix_format == DRM_FORMAT_UYVY || pix_format == DRM_FORMAT_VYUY ||
    444	    pix_format == DRM_FORMAT_YUYV || pix_format == DRM_FORMAT_YVYU) {
    445		csrc_xres >>= 1;
    446		src_is_444 = false;
    447	} else if (pix_format == DRM_FORMAT_NV12 ||
    448		   pix_format == DRM_FORMAT_NV21) {
    449		csrc_xres >>= 1;
    450		csrc_yres >>= 1;
    451		src_is_444 = false;
    452	}
    453
    454	if (dst_format == BUF_FMT_YUV422)
    455		cdst_xres >>= 1;
    456
    457	/* for 4:4:4 to 4:2:2 conversion, source height should be 1 less */
    458	if (src_is_444 && dst_format == BUF_FMT_YUV422) {
    459		lsrc_yres--;
    460		csrc_yres--;
    461	}
    462
    463	dcss_scaler_write(ch, (((lsrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
    464			       (((lsrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
    465			  DCSS_SCALER_SRC_LUM_RES);
    466	dcss_scaler_write(ch, (((csrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
    467			       (((csrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
    468			  DCSS_SCALER_SRC_CHR_RES);
    469	dcss_scaler_write(ch, (((ldst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
    470			       (((ldst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
    471			  DCSS_SCALER_DST_LUM_RES);
    472	dcss_scaler_write(ch, (((cdst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
    473			       (((cdst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
    474			  DCSS_SCALER_DST_CHR_RES);
    475}
    476
    477#define downscale_fp(factor, fp_pos)		((factor) << (fp_pos))
    478#define upscale_fp(factor, fp_pos)		((1 << (fp_pos)) / (factor))
    479
    480struct dcss_scaler_factors {
    481	int downscale;
    482	int upscale;
    483};
    484
    485static const struct dcss_scaler_factors dcss_scaler_factors[] = {
    486	{3, 8}, {5, 8}, {5, 8},
    487};
    488
    489static void dcss_scaler_fractions_set(struct dcss_scaler_ch *ch,
    490				      int src_xres, int src_yres,
    491				      int dst_xres, int dst_yres,
    492				      u32 src_format, u32 dst_format,
    493				      enum chroma_location src_chroma_loc)
    494{
    495	int src_c_xres, src_c_yres, dst_c_xres, dst_c_yres;
    496	u32 l_vinc, l_hinc, c_vinc, c_hinc;
    497	u32 c_vstart, c_hstart;
    498
    499	src_c_xres = src_xres;
    500	src_c_yres = src_yres;
    501	dst_c_xres = dst_xres;
    502	dst_c_yres = dst_yres;
    503
    504	c_vstart = 0;
    505	c_hstart = 0;
    506
    507	/* adjustments for source chroma location */
    508	if (src_format == BUF_FMT_YUV420) {
    509		/* vertical input chroma position adjustment */
    510		switch (src_chroma_loc) {
    511		case PSC_LOC_HORZ_0_VERT_1_OVER_4:
    512		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
    513			/*
    514			 * move chroma up to first luma line
    515			 * (1/4 chroma input line spacing)
    516			 */
    517			c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
    518			break;
    519		case PSC_LOC_HORZ_0_VERT_1_OVER_2:
    520		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
    521			/*
    522			 * move chroma up to first luma line
    523			 * (1/2 chroma input line spacing)
    524			 */
    525			c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 1));
    526			break;
    527		default:
    528			break;
    529		}
    530		/* horizontal input chroma position adjustment */
    531		switch (src_chroma_loc) {
    532		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
    533		case PSC_LOC_HORZ_1_OVER_4_VERT_0:
    534		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
    535			/* move chroma left 1/4 chroma input sample spacing */
    536			c_hstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
    537			break;
    538		default:
    539			break;
    540		}
    541	}
    542
    543	/* adjustments to chroma resolution */
    544	if (src_format == BUF_FMT_YUV420) {
    545		src_c_xres >>= 1;
    546		src_c_yres >>= 1;
    547	} else if (src_format == BUF_FMT_YUV422) {
    548		src_c_xres >>= 1;
    549	}
    550
    551	if (dst_format == BUF_FMT_YUV422)
    552		dst_c_xres >>= 1;
    553
    554	l_vinc = ((src_yres << 13) + (dst_yres >> 1)) / dst_yres;
    555	c_vinc = ((src_c_yres << 13) + (dst_c_yres >> 1)) / dst_c_yres;
    556	l_hinc = ((src_xres << 13) + (dst_xres >> 1)) / dst_xres;
    557	c_hinc = ((src_c_xres << 13) + (dst_c_xres >> 1)) / dst_c_xres;
    558
    559	/* save chroma start phase */
    560	ch->c_vstart = c_vstart;
    561	ch->c_hstart = c_hstart;
    562
    563	dcss_scaler_write(ch, 0, DCSS_SCALER_V_LUM_START);
    564	dcss_scaler_write(ch, l_vinc, DCSS_SCALER_V_LUM_INC);
    565
    566	dcss_scaler_write(ch, 0, DCSS_SCALER_H_LUM_START);
    567	dcss_scaler_write(ch, l_hinc, DCSS_SCALER_H_LUM_INC);
    568
    569	dcss_scaler_write(ch, c_vstart, DCSS_SCALER_V_CHR_START);
    570	dcss_scaler_write(ch, c_vinc, DCSS_SCALER_V_CHR_INC);
    571
    572	dcss_scaler_write(ch, c_hstart, DCSS_SCALER_H_CHR_START);
    573	dcss_scaler_write(ch, c_hinc, DCSS_SCALER_H_CHR_INC);
    574}
    575
    576int dcss_scaler_get_min_max_ratios(struct dcss_scaler *scl, int ch_num,
    577				   int *min, int *max)
    578{
    579	*min = upscale_fp(dcss_scaler_factors[ch_num].upscale, 16);
    580	*max = downscale_fp(dcss_scaler_factors[ch_num].downscale, 16);
    581
    582	return 0;
    583}
    584
    585static void dcss_scaler_program_5_coef_set(struct dcss_scaler_ch *ch,
    586					   int base_addr,
    587					   int coef[][PSC_NUM_TAPS])
    588{
    589	int i, phase;
    590
    591	for (i = 0; i < PSC_STORED_PHASES; i++) {
    592		dcss_scaler_write(ch, ((coef[i][1] & 0xfff) << 16 |
    593				       (coef[i][2] & 0xfff) << 4  |
    594				       (coef[i][3] & 0xf00) >> 8),
    595				  base_addr + i * sizeof(u32));
    596		dcss_scaler_write(ch, ((coef[i][3] & 0x0ff) << 20 |
    597				       (coef[i][4] & 0xfff) << 8  |
    598				       (coef[i][5] & 0xff0) >> 4),
    599				  base_addr + 0x40 + i * sizeof(u32));
    600		dcss_scaler_write(ch, ((coef[i][5] & 0x00f) << 24),
    601				  base_addr + 0x80 + i * sizeof(u32));
    602	}
    603
    604	/* reverse both phase and tap orderings */
    605	for (phase = (PSC_NUM_PHASES >> 1) - 1;
    606			i < PSC_NUM_PHASES; i++, phase--) {
    607		dcss_scaler_write(ch, ((coef[phase][5] & 0xfff) << 16 |
    608				       (coef[phase][4] & 0xfff) << 4  |
    609				       (coef[phase][3] & 0xf00) >> 8),
    610				  base_addr + i * sizeof(u32));
    611		dcss_scaler_write(ch, ((coef[phase][3] & 0x0ff) << 20 |
    612				       (coef[phase][2] & 0xfff) << 8  |
    613				       (coef[phase][1] & 0xff0) >> 4),
    614				  base_addr + 0x40 + i * sizeof(u32));
    615		dcss_scaler_write(ch, ((coef[phase][1] & 0x00f) << 24),
    616				  base_addr + 0x80 + i * sizeof(u32));
    617	}
    618}
    619
    620static void dcss_scaler_program_7_coef_set(struct dcss_scaler_ch *ch,
    621					   int base_addr,
    622					   int coef[][PSC_NUM_TAPS])
    623{
    624	int i, phase;
    625
    626	for (i = 0; i < PSC_STORED_PHASES; i++) {
    627		dcss_scaler_write(ch, ((coef[i][0] & 0xfff) << 16 |
    628				       (coef[i][1] & 0xfff) << 4  |
    629				       (coef[i][2] & 0xf00) >> 8),
    630				  base_addr + i * sizeof(u32));
    631		dcss_scaler_write(ch, ((coef[i][2] & 0x0ff) << 20 |
    632				       (coef[i][3] & 0xfff) << 8  |
    633				       (coef[i][4] & 0xff0) >> 4),
    634				  base_addr + 0x40 + i * sizeof(u32));
    635		dcss_scaler_write(ch, ((coef[i][4] & 0x00f) << 24 |
    636				       (coef[i][5] & 0xfff) << 12 |
    637				       (coef[i][6] & 0xfff)),
    638				  base_addr + 0x80 + i * sizeof(u32));
    639	}
    640
    641	/* reverse both phase and tap orderings */
    642	for (phase = (PSC_NUM_PHASES >> 1) - 1;
    643			i < PSC_NUM_PHASES; i++, phase--) {
    644		dcss_scaler_write(ch, ((coef[phase][6] & 0xfff) << 16 |
    645				       (coef[phase][5] & 0xfff) << 4  |
    646				       (coef[phase][4] & 0xf00) >> 8),
    647				  base_addr + i * sizeof(u32));
    648		dcss_scaler_write(ch, ((coef[phase][4] & 0x0ff) << 20 |
    649				       (coef[phase][3] & 0xfff) << 8  |
    650				       (coef[phase][2] & 0xff0) >> 4),
    651				  base_addr + 0x40 + i * sizeof(u32));
    652		dcss_scaler_write(ch, ((coef[phase][2] & 0x00f) << 24 |
    653				       (coef[phase][1] & 0xfff) << 12 |
    654				       (coef[phase][0] & 0xfff)),
    655				  base_addr + 0x80 + i * sizeof(u32));
    656	}
    657}
    658
    659static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch,
    660				     enum buffer_format src_format,
    661				     enum buffer_format dst_format,
    662				     bool use_5_taps,
    663				     int src_xres, int src_yres, int dst_xres,
    664				     int dst_yres)
    665{
    666	int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
    667	bool program_5_taps = use_5_taps ||
    668			      (dst_format == BUF_FMT_YUV422 &&
    669			       src_format == BUF_FMT_ARGB8888_YUV444);
    670
    671	/* horizontal luma */
    672	dcss_scaler_filter_design(src_xres, dst_xres, false,
    673				  src_xres == dst_xres, coef,
    674				  ch->use_nn_interpolation);
    675	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
    676
    677	/* vertical luma */
    678	dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
    679				  src_yres == dst_yres, coef,
    680				  ch->use_nn_interpolation);
    681
    682	if (program_5_taps)
    683		dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
    684	else
    685		dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
    686
    687	/* adjust chroma resolution */
    688	if (src_format != BUF_FMT_ARGB8888_YUV444)
    689		src_xres >>= 1;
    690	if (src_format == BUF_FMT_YUV420)
    691		src_yres >>= 1;
    692	if (dst_format != BUF_FMT_ARGB8888_YUV444)
    693		dst_xres >>= 1;
    694	if (dst_format == BUF_FMT_YUV420) /* should not happen */
    695		dst_yres >>= 1;
    696
    697	/* horizontal chroma */
    698	dcss_scaler_filter_design(src_xres, dst_xres, false,
    699				  (src_xres == dst_xres) && (ch->c_hstart == 0),
    700				  coef, ch->use_nn_interpolation);
    701
    702	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HCHR, coef);
    703
    704	/* vertical chroma */
    705	dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
    706				  (src_yres == dst_yres) && (ch->c_vstart == 0),
    707				  coef, ch->use_nn_interpolation);
    708	if (program_5_taps)
    709		dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
    710	else
    711		dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
    712}
    713
    714static void dcss_scaler_rgb_coef_set(struct dcss_scaler_ch *ch,
    715				     int src_xres, int src_yres, int dst_xres,
    716				     int dst_yres)
    717{
    718	int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
    719
    720	/* horizontal RGB */
    721	dcss_scaler_filter_design(src_xres, dst_xres, false,
    722				  src_xres == dst_xres, coef,
    723				  ch->use_nn_interpolation);
    724	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
    725
    726	/* vertical RGB */
    727	dcss_scaler_filter_design(src_yres, dst_yres, false,
    728				  src_yres == dst_yres, coef,
    729				  ch->use_nn_interpolation);
    730	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
    731}
    732
    733static void dcss_scaler_set_rgb10_order(struct dcss_scaler_ch *ch,
    734					const struct drm_format_info *format)
    735{
    736	u32 a2r10g10b10_format;
    737
    738	if (format->is_yuv)
    739		return;
    740
    741	ch->sdata_ctrl &= ~A2R10G10B10_FORMAT_MASK;
    742
    743	if (format->depth != 30)
    744		return;
    745
    746	switch (format->format) {
    747	case DRM_FORMAT_ARGB2101010:
    748	case DRM_FORMAT_XRGB2101010:
    749		a2r10g10b10_format = 0;
    750		break;
    751
    752	case DRM_FORMAT_ABGR2101010:
    753	case DRM_FORMAT_XBGR2101010:
    754		a2r10g10b10_format = 5;
    755		break;
    756
    757	case DRM_FORMAT_RGBA1010102:
    758	case DRM_FORMAT_RGBX1010102:
    759		a2r10g10b10_format = 6;
    760		break;
    761
    762	case DRM_FORMAT_BGRA1010102:
    763	case DRM_FORMAT_BGRX1010102:
    764		a2r10g10b10_format = 11;
    765		break;
    766
    767	default:
    768		a2r10g10b10_format = 0;
    769		break;
    770	}
    771
    772	ch->sdata_ctrl |= a2r10g10b10_format << A2R10G10B10_FORMAT_POS;
    773}
    774
    775void dcss_scaler_set_filter(struct dcss_scaler *scl, int ch_num,
    776			    enum drm_scaling_filter scaling_filter)
    777{
    778	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
    779
    780	ch->use_nn_interpolation = scaling_filter == DRM_SCALING_FILTER_NEAREST_NEIGHBOR;
    781}
    782
    783void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num,
    784		       const struct drm_format_info *format,
    785		       int src_xres, int src_yres, int dst_xres, int dst_yres,
    786		       u32 vrefresh_hz)
    787{
    788	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
    789	unsigned int pixel_depth = 0;
    790	bool rtr_8line_en = false;
    791	bool use_5_taps = false;
    792	enum buffer_format src_format = BUF_FMT_ARGB8888_YUV444;
    793	enum buffer_format dst_format = BUF_FMT_ARGB8888_YUV444;
    794	u32 pix_format = format->format;
    795
    796	if (format->is_yuv) {
    797		dcss_scaler_yuv_enable(ch, true);
    798
    799		if (pix_format == DRM_FORMAT_NV12 ||
    800		    pix_format == DRM_FORMAT_NV21) {
    801			rtr_8line_en = true;
    802			src_format = BUF_FMT_YUV420;
    803		} else if (pix_format == DRM_FORMAT_UYVY ||
    804			   pix_format == DRM_FORMAT_VYUY ||
    805			   pix_format == DRM_FORMAT_YUYV ||
    806			   pix_format == DRM_FORMAT_YVYU) {
    807			src_format = BUF_FMT_YUV422;
    808		}
    809
    810		use_5_taps = !rtr_8line_en;
    811	} else {
    812		dcss_scaler_yuv_enable(ch, false);
    813
    814		pixel_depth = format->depth;
    815	}
    816
    817	dcss_scaler_fractions_set(ch, src_xres, src_yres, dst_xres,
    818				  dst_yres, src_format, dst_format,
    819				  PSC_LOC_HORZ_0_VERT_1_OVER_4);
    820
    821	if (format->is_yuv)
    822		dcss_scaler_yuv_coef_set(ch, src_format, dst_format,
    823					 use_5_taps, src_xres, src_yres,
    824					 dst_xres, dst_yres);
    825	else
    826		dcss_scaler_rgb_coef_set(ch, src_xres, src_yres,
    827					 dst_xres, dst_yres);
    828
    829	dcss_scaler_rtr_8lines_enable(ch, rtr_8line_en);
    830	dcss_scaler_bit_depth_set(ch, pixel_depth);
    831	dcss_scaler_set_rgb10_order(ch, format);
    832	dcss_scaler_format_set(ch, src_format, dst_format);
    833	dcss_scaler_res_set(ch, src_xres, src_yres, dst_xres, dst_yres,
    834			    pix_format, dst_format);
    835}
    836
    837/* This function will be called from interrupt context. */
    838void dcss_scaler_write_sclctrl(struct dcss_scaler *scl)
    839{
    840	int chnum;
    841
    842	dcss_ctxld_assert_locked(scl->ctxld);
    843
    844	for (chnum = 0; chnum < 3; chnum++) {
    845		struct dcss_scaler_ch *ch = &scl->ch[chnum];
    846
    847		if (ch->scaler_ctrl_chgd) {
    848			dcss_ctxld_write_irqsafe(scl->ctxld, scl->ctx_id,
    849						 ch->scaler_ctrl,
    850						 ch->base_ofs +
    851						 DCSS_SCALER_CTRL);
    852			ch->scaler_ctrl_chgd = false;
    853		}
    854	}
    855}