cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vc4_plane.c (46946B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2015 Broadcom
      4 */
      5
      6/**
      7 * DOC: VC4 plane module
      8 *
      9 * Each DRM plane is a layer of pixels being scanned out by the HVS.
     10 *
     11 * At atomic modeset check time, we compute the HVS display element
     12 * state that would be necessary for displaying the plane (giving us a
     13 * chance to figure out if a plane configuration is invalid), then at
     14 * atomic flush time the CRTC will ask us to write our element state
     15 * into the region of the HVS that it has allocated for us.
     16 */
     17
     18#include <drm/drm_atomic.h>
     19#include <drm/drm_atomic_helper.h>
     20#include <drm/drm_atomic_uapi.h>
     21#include <drm/drm_fb_cma_helper.h>
     22#include <drm/drm_fourcc.h>
     23#include <drm/drm_gem_atomic_helper.h>
     24#include <drm/drm_plane_helper.h>
     25
     26#include "uapi/drm/vc4_drm.h"
     27
     28#include "vc4_drv.h"
     29#include "vc4_regs.h"
     30
     31static const struct hvs_format {
     32	u32 drm; /* DRM_FORMAT_* */
     33	u32 hvs; /* HVS_FORMAT_* */
     34	u32 pixel_order;
     35	u32 pixel_order_hvs5;
     36	bool hvs5_only;
     37} hvs_formats[] = {
     38	{
     39		.drm = DRM_FORMAT_XRGB8888,
     40		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
     41		.pixel_order = HVS_PIXEL_ORDER_ABGR,
     42		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
     43	},
     44	{
     45		.drm = DRM_FORMAT_ARGB8888,
     46		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
     47		.pixel_order = HVS_PIXEL_ORDER_ABGR,
     48		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
     49	},
     50	{
     51		.drm = DRM_FORMAT_ABGR8888,
     52		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
     53		.pixel_order = HVS_PIXEL_ORDER_ARGB,
     54		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
     55	},
     56	{
     57		.drm = DRM_FORMAT_XBGR8888,
     58		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
     59		.pixel_order = HVS_PIXEL_ORDER_ARGB,
     60		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
     61	},
     62	{
     63		.drm = DRM_FORMAT_RGB565,
     64		.hvs = HVS_PIXEL_FORMAT_RGB565,
     65		.pixel_order = HVS_PIXEL_ORDER_XRGB,
     66	},
     67	{
     68		.drm = DRM_FORMAT_BGR565,
     69		.hvs = HVS_PIXEL_FORMAT_RGB565,
     70		.pixel_order = HVS_PIXEL_ORDER_XBGR,
     71	},
     72	{
     73		.drm = DRM_FORMAT_ARGB1555,
     74		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
     75		.pixel_order = HVS_PIXEL_ORDER_ABGR,
     76	},
     77	{
     78		.drm = DRM_FORMAT_XRGB1555,
     79		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
     80		.pixel_order = HVS_PIXEL_ORDER_ABGR,
     81	},
     82	{
     83		.drm = DRM_FORMAT_RGB888,
     84		.hvs = HVS_PIXEL_FORMAT_RGB888,
     85		.pixel_order = HVS_PIXEL_ORDER_XRGB,
     86	},
     87	{
     88		.drm = DRM_FORMAT_BGR888,
     89		.hvs = HVS_PIXEL_FORMAT_RGB888,
     90		.pixel_order = HVS_PIXEL_ORDER_XBGR,
     91	},
     92	{
     93		.drm = DRM_FORMAT_YUV422,
     94		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
     95		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
     96	},
     97	{
     98		.drm = DRM_FORMAT_YVU422,
     99		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
    100		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
    101	},
    102	{
    103		.drm = DRM_FORMAT_YUV420,
    104		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
    105		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
    106	},
    107	{
    108		.drm = DRM_FORMAT_YVU420,
    109		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
    110		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
    111	},
    112	{
    113		.drm = DRM_FORMAT_NV12,
    114		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
    115		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
    116	},
    117	{
    118		.drm = DRM_FORMAT_NV21,
    119		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
    120		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
    121	},
    122	{
    123		.drm = DRM_FORMAT_NV16,
    124		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
    125		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
    126	},
    127	{
    128		.drm = DRM_FORMAT_NV61,
    129		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
    130		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
    131	},
    132	{
    133		.drm = DRM_FORMAT_P030,
    134		.hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT,
    135		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
    136		.hvs5_only = true,
    137	},
    138};
    139
    140static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
    141{
    142	unsigned i;
    143
    144	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
    145		if (hvs_formats[i].drm == drm_format)
    146			return &hvs_formats[i];
    147	}
    148
    149	return NULL;
    150}
    151
    152static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
    153{
    154	if (dst == src)
    155		return VC4_SCALING_NONE;
    156	if (3 * dst >= 2 * src)
    157		return VC4_SCALING_PPF;
    158	else
    159		return VC4_SCALING_TPZ;
    160}
    161
    162static bool plane_enabled(struct drm_plane_state *state)
    163{
    164	return state->fb && !WARN_ON(!state->crtc);
    165}
    166
    167static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
    168{
    169	struct vc4_plane_state *vc4_state;
    170
    171	if (WARN_ON(!plane->state))
    172		return NULL;
    173
    174	vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
    175	if (!vc4_state)
    176		return NULL;
    177
    178	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
    179	vc4_state->dlist_initialized = 0;
    180
    181	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
    182
    183	if (vc4_state->dlist) {
    184		vc4_state->dlist = kmemdup(vc4_state->dlist,
    185					   vc4_state->dlist_count * 4,
    186					   GFP_KERNEL);
    187		if (!vc4_state->dlist) {
    188			kfree(vc4_state);
    189			return NULL;
    190		}
    191		vc4_state->dlist_size = vc4_state->dlist_count;
    192	}
    193
    194	return &vc4_state->base;
    195}
    196
    197static void vc4_plane_destroy_state(struct drm_plane *plane,
    198				    struct drm_plane_state *state)
    199{
    200	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
    201	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
    202
    203	if (drm_mm_node_allocated(&vc4_state->lbm)) {
    204		unsigned long irqflags;
    205
    206		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
    207		drm_mm_remove_node(&vc4_state->lbm);
    208		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
    209	}
    210
    211	kfree(vc4_state->dlist);
    212	__drm_atomic_helper_plane_destroy_state(&vc4_state->base);
    213	kfree(state);
    214}
    215
    216/* Called during init to allocate the plane's atomic state. */
    217static void vc4_plane_reset(struct drm_plane *plane)
    218{
    219	struct vc4_plane_state *vc4_state;
    220
    221	WARN_ON(plane->state);
    222
    223	vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
    224	if (!vc4_state)
    225		return;
    226
    227	__drm_atomic_helper_plane_reset(plane, &vc4_state->base);
    228}
    229
    230static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
    231{
    232	if (vc4_state->dlist_count == vc4_state->dlist_size) {
    233		u32 new_size = max(4u, vc4_state->dlist_count * 2);
    234		u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
    235
    236		if (!new_dlist)
    237			return;
    238		memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
    239
    240		kfree(vc4_state->dlist);
    241		vc4_state->dlist = new_dlist;
    242		vc4_state->dlist_size = new_size;
    243	}
    244
    245	vc4_state->dlist_count++;
    246}
    247
    248static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
    249{
    250	unsigned int idx = vc4_state->dlist_count;
    251
    252	vc4_dlist_counter_increment(vc4_state);
    253	vc4_state->dlist[idx] = val;
    254}
    255
    256/* Returns the scl0/scl1 field based on whether the dimensions need to
    257 * be up/down/non-scaled.
    258 *
    259 * This is a replication of a table from the spec.
    260 */
    261static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
    262{
    263	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
    264
    265	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
    266	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
    267		return SCALER_CTL0_SCL_H_PPF_V_PPF;
    268	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
    269		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
    270	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
    271		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
    272	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
    273		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
    274	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
    275		return SCALER_CTL0_SCL_H_PPF_V_NONE;
    276	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
    277		return SCALER_CTL0_SCL_H_NONE_V_PPF;
    278	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
    279		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
    280	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
    281		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
    282	default:
    283	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
    284		/* The unity case is independently handled by
    285		 * SCALER_CTL0_UNITY.
    286		 */
    287		return 0;
    288	}
    289}
    290
    291static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
    292{
    293	struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
    294	unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
    295	struct drm_crtc_state *crtc_state;
    296
    297	crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
    298						   pstate->crtc);
    299
    300	vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
    301	if (!left && !right && !top && !bottom)
    302		return 0;
    303
    304	if (left + right >= crtc_state->mode.hdisplay ||
    305	    top + bottom >= crtc_state->mode.vdisplay)
    306		return -EINVAL;
    307
    308	adjhdisplay = crtc_state->mode.hdisplay - (left + right);
    309	vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
    310					       adjhdisplay,
    311					       crtc_state->mode.hdisplay);
    312	vc4_pstate->crtc_x += left;
    313	if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left)
    314		vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left;
    315
    316	adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
    317	vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
    318					       adjvdisplay,
    319					       crtc_state->mode.vdisplay);
    320	vc4_pstate->crtc_y += top;
    321	if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top)
    322		vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top;
    323
    324	vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
    325					       adjhdisplay,
    326					       crtc_state->mode.hdisplay);
    327	vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
    328					       adjvdisplay,
    329					       crtc_state->mode.vdisplay);
    330
    331	if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
    332		return -EINVAL;
    333
    334	return 0;
    335}
    336
    337static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
    338{
    339	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
    340	struct drm_framebuffer *fb = state->fb;
    341	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
    342	u32 subpixel_src_mask = (1 << 16) - 1;
    343	int num_planes = fb->format->num_planes;
    344	struct drm_crtc_state *crtc_state;
    345	u32 h_subsample = fb->format->hsub;
    346	u32 v_subsample = fb->format->vsub;
    347	int i, ret;
    348
    349	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
    350							state->crtc);
    351	if (!crtc_state) {
    352		DRM_DEBUG_KMS("Invalid crtc state\n");
    353		return -EINVAL;
    354	}
    355
    356	ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
    357						  INT_MAX, true, true);
    358	if (ret)
    359		return ret;
    360
    361	for (i = 0; i < num_planes; i++)
    362		vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
    363
    364	/* We don't support subpixel source positioning for scaling. */
    365	if ((state->src.x1 & subpixel_src_mask) ||
    366	    (state->src.x2 & subpixel_src_mask) ||
    367	    (state->src.y1 & subpixel_src_mask) ||
    368	    (state->src.y2 & subpixel_src_mask)) {
    369		return -EINVAL;
    370	}
    371
    372	vc4_state->src_x = state->src.x1 >> 16;
    373	vc4_state->src_y = state->src.y1 >> 16;
    374	vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16;
    375	vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16;
    376
    377	vc4_state->crtc_x = state->dst.x1;
    378	vc4_state->crtc_y = state->dst.y1;
    379	vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
    380	vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
    381
    382	ret = vc4_plane_margins_adj(state);
    383	if (ret)
    384		return ret;
    385
    386	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
    387						       vc4_state->crtc_w);
    388	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
    389						       vc4_state->crtc_h);
    390
    391	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
    392			       vc4_state->y_scaling[0] == VC4_SCALING_NONE);
    393
    394	if (num_planes > 1) {
    395		vc4_state->is_yuv = true;
    396
    397		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
    398		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
    399
    400		vc4_state->x_scaling[1] =
    401			vc4_get_scaling_mode(vc4_state->src_w[1],
    402					     vc4_state->crtc_w);
    403		vc4_state->y_scaling[1] =
    404			vc4_get_scaling_mode(vc4_state->src_h[1],
    405					     vc4_state->crtc_h);
    406
    407		/* YUV conversion requires that horizontal scaling be enabled
    408		 * on the UV plane even if vc4_get_scaling_mode() returned
    409		 * VC4_SCALING_NONE (which can happen when the down-scaling
    410		 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
    411		 * case.
    412		 */
    413		if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
    414			vc4_state->x_scaling[1] = VC4_SCALING_PPF;
    415	} else {
    416		vc4_state->is_yuv = false;
    417		vc4_state->x_scaling[1] = VC4_SCALING_NONE;
    418		vc4_state->y_scaling[1] = VC4_SCALING_NONE;
    419	}
    420
    421	return 0;
    422}
    423
    424static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
    425{
    426	u32 scale, recip;
    427
    428	scale = (1 << 16) * src / dst;
    429
    430	/* The specs note that while the reciprocal would be defined
    431	 * as (1<<32)/scale, ~0 is close enough.
    432	 */
    433	recip = ~0 / scale;
    434
    435	vc4_dlist_write(vc4_state,
    436			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
    437			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
    438	vc4_dlist_write(vc4_state,
    439			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
    440}
    441
    442static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
    443{
    444	u32 scale = (1 << 16) * src / dst;
    445
    446	vc4_dlist_write(vc4_state,
    447			SCALER_PPF_AGC |
    448			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
    449			VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
    450}
    451
    452static u32 vc4_lbm_size(struct drm_plane_state *state)
    453{
    454	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
    455	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
    456	u32 pix_per_line;
    457	u32 lbm;
    458
    459	/* LBM is not needed when there's no vertical scaling. */
    460	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
    461	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
    462		return 0;
    463
    464	/*
    465	 * This can be further optimized in the RGB/YUV444 case if the PPF
    466	 * decimation factor is between 0.5 and 1.0 by using crtc_w.
    467	 *
    468	 * It's not an issue though, since in that case since src_w[0] is going
    469	 * to be greater than or equal to crtc_w.
    470	 */
    471	if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
    472		pix_per_line = vc4_state->crtc_w;
    473	else
    474		pix_per_line = vc4_state->src_w[0];
    475
    476	if (!vc4_state->is_yuv) {
    477		if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
    478			lbm = pix_per_line * 8;
    479		else {
    480			/* In special cases, this multiplier might be 12. */
    481			lbm = pix_per_line * 16;
    482		}
    483	} else {
    484		/* There are cases for this going down to a multiplier
    485		 * of 2, but according to the firmware source, the
    486		 * table in the docs is somewhat wrong.
    487		 */
    488		lbm = pix_per_line * 16;
    489	}
    490
    491	/* Align it to 64 or 128 (hvs5) bytes */
    492	lbm = roundup(lbm, vc4->is_vc5 ? 128 : 64);
    493
    494	/* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
    495	lbm /= vc4->is_vc5 ? 4 : 2;
    496
    497	return lbm;
    498}
    499
    500static void vc4_write_scaling_parameters(struct drm_plane_state *state,
    501					 int channel)
    502{
    503	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
    504
    505	/* Ch0 H-PPF Word 0: Scaling Parameters */
    506	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
    507		vc4_write_ppf(vc4_state,
    508			      vc4_state->src_w[channel], vc4_state->crtc_w);
    509	}
    510
    511	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
    512	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
    513		vc4_write_ppf(vc4_state,
    514			      vc4_state->src_h[channel], vc4_state->crtc_h);
    515		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
    516	}
    517
    518	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
    519	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
    520		vc4_write_tpz(vc4_state,
    521			      vc4_state->src_w[channel], vc4_state->crtc_w);
    522	}
    523
    524	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
    525	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
    526		vc4_write_tpz(vc4_state,
    527			      vc4_state->src_h[channel], vc4_state->crtc_h);
    528		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
    529	}
    530}
    531
    532static void vc4_plane_calc_load(struct drm_plane_state *state)
    533{
    534	unsigned int hvs_load_shift, vrefresh, i;
    535	struct drm_framebuffer *fb = state->fb;
    536	struct vc4_plane_state *vc4_state;
    537	struct drm_crtc_state *crtc_state;
    538	unsigned int vscale_factor;
    539
    540	vc4_state = to_vc4_plane_state(state);
    541	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
    542							state->crtc);
    543	vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
    544
    545	/* The HVS is able to process 2 pixels/cycle when scaling the source,
    546	 * 4 pixels/cycle otherwise.
    547	 * Alpha blending step seems to be pipelined and it's always operating
    548	 * at 4 pixels/cycle, so the limiting aspect here seems to be the
    549	 * scaler block.
    550	 * HVS load is expressed in clk-cycles/sec (AKA Hz).
    551	 */
    552	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
    553	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
    554	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
    555	    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
    556		hvs_load_shift = 1;
    557	else
    558		hvs_load_shift = 2;
    559
    560	vc4_state->membus_load = 0;
    561	vc4_state->hvs_load = 0;
    562	for (i = 0; i < fb->format->num_planes; i++) {
    563		/* Even if the bandwidth/plane required for a single frame is
    564		 *
    565		 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
    566		 *
    567		 * when downscaling, we have to read more pixels per line in
    568		 * the time frame reserved for a single line, so the bandwidth
    569		 * demand can be punctually higher. To account for that, we
    570		 * calculate the down-scaling factor and multiply the plane
    571		 * load by this number. We're likely over-estimating the read
    572		 * demand, but that's better than under-estimating it.
    573		 */
    574		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
    575					     vc4_state->crtc_h);
    576		vc4_state->membus_load += vc4_state->src_w[i] *
    577					  vc4_state->src_h[i] * vscale_factor *
    578					  fb->format->cpp[i];
    579		vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
    580	}
    581
    582	vc4_state->hvs_load *= vrefresh;
    583	vc4_state->hvs_load >>= hvs_load_shift;
    584	vc4_state->membus_load *= vrefresh;
    585}
    586
    587static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
    588{
    589	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
    590	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
    591	unsigned long irqflags;
    592	u32 lbm_size;
    593
    594	lbm_size = vc4_lbm_size(state);
    595	if (!lbm_size)
    596		return 0;
    597
    598	if (WARN_ON(!vc4_state->lbm_offset))
    599		return -EINVAL;
    600
    601	/* Allocate the LBM memory that the HVS will use for temporary
    602	 * storage due to our scaling/format conversion.
    603	 */
    604	if (!drm_mm_node_allocated(&vc4_state->lbm)) {
    605		int ret;
    606
    607		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
    608		ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
    609						 &vc4_state->lbm,
    610						 lbm_size,
    611						 vc4->is_vc5 ? 64 : 32,
    612						 0, 0);
    613		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
    614
    615		if (ret)
    616			return ret;
    617	} else {
    618		WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
    619	}
    620
    621	vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
    622
    623	return 0;
    624}
    625
    626/*
    627 * The colorspace conversion matrices are held in 3 entries in the dlist.
    628 * Create an array of them, with entries for each full and limited mode, and
    629 * each supported colorspace.
    630 */
    631static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = {
    632	{
    633		/* Limited range */
    634		{
    635			/* BT601 */
    636			SCALER_CSC0_ITR_R_601_5,
    637			SCALER_CSC1_ITR_R_601_5,
    638			SCALER_CSC2_ITR_R_601_5,
    639		}, {
    640			/* BT709 */
    641			SCALER_CSC0_ITR_R_709_3,
    642			SCALER_CSC1_ITR_R_709_3,
    643			SCALER_CSC2_ITR_R_709_3,
    644		}, {
    645			/* BT2020 */
    646			SCALER_CSC0_ITR_R_2020,
    647			SCALER_CSC1_ITR_R_2020,
    648			SCALER_CSC2_ITR_R_2020,
    649		}
    650	}, {
    651		/* Full range */
    652		{
    653			/* JFIF */
    654			SCALER_CSC0_JPEG_JFIF,
    655			SCALER_CSC1_JPEG_JFIF,
    656			SCALER_CSC2_JPEG_JFIF,
    657		}, {
    658			/* BT709 */
    659			SCALER_CSC0_ITR_R_709_3_FR,
    660			SCALER_CSC1_ITR_R_709_3_FR,
    661			SCALER_CSC2_ITR_R_709_3_FR,
    662		}, {
    663			/* BT2020 */
    664			SCALER_CSC0_ITR_R_2020_FR,
    665			SCALER_CSC1_ITR_R_2020_FR,
    666			SCALER_CSC2_ITR_R_2020_FR,
    667		}
    668	}
    669};
    670
    671/* Writes out a full display list for an active plane to the plane's
    672 * private dlist state.
    673 */
    674static int vc4_plane_mode_set(struct drm_plane *plane,
    675			      struct drm_plane_state *state)
    676{
    677	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
    678	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
    679	struct drm_framebuffer *fb = state->fb;
    680	u32 ctl0_offset = vc4_state->dlist_count;
    681	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
    682	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
    683	int num_planes = fb->format->num_planes;
    684	u32 h_subsample = fb->format->hsub;
    685	u32 v_subsample = fb->format->vsub;
    686	bool mix_plane_alpha;
    687	bool covers_screen;
    688	u32 scl0, scl1, pitch0;
    689	u32 tiling, src_y;
    690	u32 hvs_format = format->hvs;
    691	unsigned int rotation;
    692	int ret, i;
    693
    694	if (vc4_state->dlist_initialized)
    695		return 0;
    696
    697	ret = vc4_plane_setup_clipping_and_scaling(state);
    698	if (ret)
    699		return ret;
    700
    701	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
    702	 * and 4:4:4, scl1 should be set to scl0 so both channels of
    703	 * the scaler do the same thing.  For YUV, the Y plane needs
    704	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
    705	 * the scl fields here.
    706	 */
    707	if (num_planes == 1) {
    708		scl0 = vc4_get_scl_field(state, 0);
    709		scl1 = scl0;
    710	} else {
    711		scl0 = vc4_get_scl_field(state, 1);
    712		scl1 = vc4_get_scl_field(state, 0);
    713	}
    714
    715	rotation = drm_rotation_simplify(state->rotation,
    716					 DRM_MODE_ROTATE_0 |
    717					 DRM_MODE_REFLECT_X |
    718					 DRM_MODE_REFLECT_Y);
    719
    720	/* We must point to the last line when Y reflection is enabled. */
    721	src_y = vc4_state->src_y;
    722	if (rotation & DRM_MODE_REFLECT_Y)
    723		src_y += vc4_state->src_h[0] - 1;
    724
    725	switch (base_format_mod) {
    726	case DRM_FORMAT_MOD_LINEAR:
    727		tiling = SCALER_CTL0_TILING_LINEAR;
    728		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
    729
    730		/* Adjust the base pointer to the first pixel to be scanned
    731		 * out.
    732		 */
    733		for (i = 0; i < num_planes; i++) {
    734			vc4_state->offsets[i] += src_y /
    735						 (i ? v_subsample : 1) *
    736						 fb->pitches[i];
    737
    738			vc4_state->offsets[i] += vc4_state->src_x /
    739						 (i ? h_subsample : 1) *
    740						 fb->format->cpp[i];
    741		}
    742
    743		break;
    744
    745	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
    746		u32 tile_size_shift = 12; /* T tiles are 4kb */
    747		/* Whole-tile offsets, mostly for setting the pitch. */
    748		u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
    749		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
    750		u32 tile_w_mask = (1 << tile_w_shift) - 1;
    751		/* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
    752		 * the height (in pixels) of a 4k tile.
    753		 */
    754		u32 tile_h_mask = (2 << tile_h_shift) - 1;
    755		/* For T-tiled, the FB pitch is "how many bytes from one row to
    756		 * the next, such that
    757		 *
    758		 *	pitch * tile_h == tile_size * tiles_per_row
    759		 */
    760		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
    761		u32 tiles_l = vc4_state->src_x >> tile_w_shift;
    762		u32 tiles_r = tiles_w - tiles_l;
    763		u32 tiles_t = src_y >> tile_h_shift;
    764		/* Intra-tile offsets, which modify the base address (the
    765		 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
    766		 * base address).
    767		 */
    768		u32 tile_y = (src_y >> 4) & 1;
    769		u32 subtile_y = (src_y >> 2) & 3;
    770		u32 utile_y = src_y & 3;
    771		u32 x_off = vc4_state->src_x & tile_w_mask;
    772		u32 y_off = src_y & tile_h_mask;
    773
    774		/* When Y reflection is requested we must set the
    775		 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
    776		 * after the initial one should be fetched in descending order,
    777		 * which makes sense since we start from the last line and go
    778		 * backward.
    779		 * Don't know why we need y_off = max_y_off - y_off, but it's
    780		 * definitely required (I guess it's also related to the "going
    781		 * backward" situation).
    782		 */
    783		if (rotation & DRM_MODE_REFLECT_Y) {
    784			y_off = tile_h_mask - y_off;
    785			pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
    786		} else {
    787			pitch0 = 0;
    788		}
    789
    790		tiling = SCALER_CTL0_TILING_256B_OR_T;
    791		pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
    792			   VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
    793			   VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
    794			   VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
    795		vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift);
    796		vc4_state->offsets[0] += subtile_y << 8;
    797		vc4_state->offsets[0] += utile_y << 4;
    798
    799		/* Rows of tiles alternate left-to-right and right-to-left. */
    800		if (tiles_t & 1) {
    801			pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
    802			vc4_state->offsets[0] += (tiles_w - tiles_l) <<
    803						 tile_size_shift;
    804			vc4_state->offsets[0] -= (1 + !tile_y) << 10;
    805		} else {
    806			vc4_state->offsets[0] += tiles_l << tile_size_shift;
    807			vc4_state->offsets[0] += tile_y << 10;
    808		}
    809
    810		break;
    811	}
    812
    813	case DRM_FORMAT_MOD_BROADCOM_SAND64:
    814	case DRM_FORMAT_MOD_BROADCOM_SAND128:
    815	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
    816		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
    817
    818		if (param > SCALER_TILE_HEIGHT_MASK) {
    819			DRM_DEBUG_KMS("SAND height too large (%d)\n",
    820				      param);
    821			return -EINVAL;
    822		}
    823
    824		if (fb->format->format == DRM_FORMAT_P030) {
    825			hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
    826			tiling = SCALER_CTL0_TILING_128B;
    827		} else {
    828			hvs_format = HVS_PIXEL_FORMAT_H264;
    829
    830			switch (base_format_mod) {
    831			case DRM_FORMAT_MOD_BROADCOM_SAND64:
    832				tiling = SCALER_CTL0_TILING_64B;
    833				break;
    834			case DRM_FORMAT_MOD_BROADCOM_SAND128:
    835				tiling = SCALER_CTL0_TILING_128B;
    836				break;
    837			case DRM_FORMAT_MOD_BROADCOM_SAND256:
    838				tiling = SCALER_CTL0_TILING_256B_OR_T;
    839				break;
    840			default:
    841				return -EINVAL;
    842			}
    843		}
    844
    845		/* Adjust the base pointer to the first pixel to be scanned
    846		 * out.
    847		 *
    848		 * For P030, y_ptr [31:4] is the 128bit word for the start pixel
    849		 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
    850		 * word that should be taken as the first pixel.
    851		 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
    852		 * element within the 128bit word, eg for pixel 3 the value
    853		 * should be 6.
    854		 */
    855		for (i = 0; i < num_planes; i++) {
    856			u32 tile_w, tile, x_off, pix_per_tile;
    857
    858			if (fb->format->format == DRM_FORMAT_P030) {
    859				/*
    860				 * Spec says: bits [31:4] of the given address
    861				 * should point to the 128-bit word containing
    862				 * the desired starting pixel, and bits[3:0]
    863				 * should be between 0 and 11, indicating which
    864				 * of the 12-pixels in that 128-bit word is the
    865				 * first pixel to be used
    866				 */
    867				u32 remaining_pixels = vc4_state->src_x % 96;
    868				u32 aligned = remaining_pixels / 12;
    869				u32 last_bits = remaining_pixels % 12;
    870
    871				x_off = aligned * 16 + last_bits;
    872				tile_w = 128;
    873				pix_per_tile = 96;
    874			} else {
    875				switch (base_format_mod) {
    876				case DRM_FORMAT_MOD_BROADCOM_SAND64:
    877					tile_w = 64;
    878					break;
    879				case DRM_FORMAT_MOD_BROADCOM_SAND128:
    880					tile_w = 128;
    881					break;
    882				case DRM_FORMAT_MOD_BROADCOM_SAND256:
    883					tile_w = 256;
    884					break;
    885				default:
    886					return -EINVAL;
    887				}
    888				pix_per_tile = tile_w / fb->format->cpp[0];
    889				x_off = (vc4_state->src_x % pix_per_tile) /
    890					(i ? h_subsample : 1) *
    891					fb->format->cpp[i];
    892			}
    893
    894			tile = vc4_state->src_x / pix_per_tile;
    895
    896			vc4_state->offsets[i] += param * tile_w * tile;
    897			vc4_state->offsets[i] += src_y /
    898						 (i ? v_subsample : 1) *
    899						 tile_w;
    900			vc4_state->offsets[i] += x_off & ~(i ? 1 : 0);
    901		}
    902
    903		pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
    904		break;
    905	}
    906
    907	default:
    908		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
    909			      (long long)fb->modifier);
    910		return -EINVAL;
    911	}
    912
    913	/* Don't waste cycles mixing with plane alpha if the set alpha
    914	 * is opaque or there is no per-pixel alpha information.
    915	 * In any case we use the alpha property value as the fixed alpha.
    916	 */
    917	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
    918			  fb->format->has_alpha;
    919
    920	if (!vc4->is_vc5) {
    921	/* Control word */
    922		vc4_dlist_write(vc4_state,
    923				SCALER_CTL0_VALID |
    924				(rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
    925				(rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
    926				VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
    927				(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
    928				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
    929				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
    930				(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
    931				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
    932				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
    933
    934		/* Position Word 0: Image Positions and Alpha Value */
    935		vc4_state->pos0_offset = vc4_state->dlist_count;
    936		vc4_dlist_write(vc4_state,
    937				VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
    938				VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
    939				VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
    940
    941		/* Position Word 1: Scaled Image Dimensions. */
    942		if (!vc4_state->is_unity) {
    943			vc4_dlist_write(vc4_state,
    944					VC4_SET_FIELD(vc4_state->crtc_w,
    945						      SCALER_POS1_SCL_WIDTH) |
    946					VC4_SET_FIELD(vc4_state->crtc_h,
    947						      SCALER_POS1_SCL_HEIGHT));
    948		}
    949
    950		/* Position Word 2: Source Image Size, Alpha */
    951		vc4_state->pos2_offset = vc4_state->dlist_count;
    952		vc4_dlist_write(vc4_state,
    953				VC4_SET_FIELD(fb->format->has_alpha ?
    954					      SCALER_POS2_ALPHA_MODE_PIPELINE :
    955					      SCALER_POS2_ALPHA_MODE_FIXED,
    956					      SCALER_POS2_ALPHA_MODE) |
    957				(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
    958				(fb->format->has_alpha ?
    959						SCALER_POS2_ALPHA_PREMULT : 0) |
    960				VC4_SET_FIELD(vc4_state->src_w[0],
    961					      SCALER_POS2_WIDTH) |
    962				VC4_SET_FIELD(vc4_state->src_h[0],
    963					      SCALER_POS2_HEIGHT));
    964
    965		/* Position Word 3: Context.  Written by the HVS. */
    966		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
    967
    968	} else {
    969		u32 hvs_pixel_order = format->pixel_order;
    970
    971		if (format->pixel_order_hvs5)
    972			hvs_pixel_order = format->pixel_order_hvs5;
    973
    974		/* Control word */
    975		vc4_dlist_write(vc4_state,
    976				SCALER_CTL0_VALID |
    977				(hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) |
    978				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
    979				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
    980				(vc4_state->is_unity ?
    981						SCALER5_CTL0_UNITY : 0) |
    982				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
    983				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
    984				SCALER5_CTL0_ALPHA_EXPAND |
    985				SCALER5_CTL0_RGB_EXPAND);
    986
    987		/* Position Word 0: Image Positions and Alpha Value */
    988		vc4_state->pos0_offset = vc4_state->dlist_count;
    989		vc4_dlist_write(vc4_state,
    990				(rotation & DRM_MODE_REFLECT_Y ?
    991						SCALER5_POS0_VFLIP : 0) |
    992				VC4_SET_FIELD(vc4_state->crtc_x,
    993					      SCALER_POS0_START_X) |
    994				(rotation & DRM_MODE_REFLECT_X ?
    995					      SCALER5_POS0_HFLIP : 0) |
    996				VC4_SET_FIELD(vc4_state->crtc_y,
    997					      SCALER5_POS0_START_Y)
    998			       );
    999
   1000		/* Control Word 2 */
   1001		vc4_dlist_write(vc4_state,
   1002				VC4_SET_FIELD(state->alpha >> 4,
   1003					      SCALER5_CTL2_ALPHA) |
   1004				(fb->format->has_alpha ?
   1005					SCALER5_CTL2_ALPHA_PREMULT : 0) |
   1006				(mix_plane_alpha ?
   1007					SCALER5_CTL2_ALPHA_MIX : 0) |
   1008				VC4_SET_FIELD(fb->format->has_alpha ?
   1009				      SCALER5_CTL2_ALPHA_MODE_PIPELINE :
   1010				      SCALER5_CTL2_ALPHA_MODE_FIXED,
   1011				      SCALER5_CTL2_ALPHA_MODE)
   1012			       );
   1013
   1014		/* Position Word 1: Scaled Image Dimensions. */
   1015		if (!vc4_state->is_unity) {
   1016			vc4_dlist_write(vc4_state,
   1017					VC4_SET_FIELD(vc4_state->crtc_w,
   1018						      SCALER5_POS1_SCL_WIDTH) |
   1019					VC4_SET_FIELD(vc4_state->crtc_h,
   1020						      SCALER5_POS1_SCL_HEIGHT));
   1021		}
   1022
   1023		/* Position Word 2: Source Image Size */
   1024		vc4_state->pos2_offset = vc4_state->dlist_count;
   1025		vc4_dlist_write(vc4_state,
   1026				VC4_SET_FIELD(vc4_state->src_w[0],
   1027					      SCALER5_POS2_WIDTH) |
   1028				VC4_SET_FIELD(vc4_state->src_h[0],
   1029					      SCALER5_POS2_HEIGHT));
   1030
   1031		/* Position Word 3: Context.  Written by the HVS. */
   1032		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
   1033	}
   1034
   1035
   1036	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
   1037	 *
   1038	 * The pointers may be any byte address.
   1039	 */
   1040	vc4_state->ptr0_offset = vc4_state->dlist_count;
   1041	for (i = 0; i < num_planes; i++)
   1042		vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
   1043
   1044	/* Pointer Context Word 0/1/2: Written by the HVS */
   1045	for (i = 0; i < num_planes; i++)
   1046		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
   1047
   1048	/* Pitch word 0 */
   1049	vc4_dlist_write(vc4_state, pitch0);
   1050
   1051	/* Pitch word 1/2 */
   1052	for (i = 1; i < num_planes; i++) {
   1053		if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
   1054		    hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) {
   1055			vc4_dlist_write(vc4_state,
   1056					VC4_SET_FIELD(fb->pitches[i],
   1057						      SCALER_SRC_PITCH));
   1058		} else {
   1059			vc4_dlist_write(vc4_state, pitch0);
   1060		}
   1061	}
   1062
   1063	/* Colorspace conversion words */
   1064	if (vc4_state->is_yuv) {
   1065		enum drm_color_encoding color_encoding = state->color_encoding;
   1066		enum drm_color_range color_range = state->color_range;
   1067		const u32 *ccm;
   1068
   1069		if (color_encoding >= DRM_COLOR_ENCODING_MAX)
   1070			color_encoding = DRM_COLOR_YCBCR_BT601;
   1071		if (color_range >= DRM_COLOR_RANGE_MAX)
   1072			color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
   1073
   1074		ccm = colorspace_coeffs[color_range][color_encoding];
   1075
   1076		vc4_dlist_write(vc4_state, ccm[0]);
   1077		vc4_dlist_write(vc4_state, ccm[1]);
   1078		vc4_dlist_write(vc4_state, ccm[2]);
   1079	}
   1080
   1081	vc4_state->lbm_offset = 0;
   1082
   1083	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
   1084	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
   1085	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
   1086	    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
   1087		/* Reserve a slot for the LBM Base Address. The real value will
   1088		 * be set when calling vc4_plane_allocate_lbm().
   1089		 */
   1090		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
   1091		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
   1092			vc4_state->lbm_offset = vc4_state->dlist_count;
   1093			vc4_dlist_counter_increment(vc4_state);
   1094		}
   1095
   1096		if (num_planes > 1) {
   1097			/* Emit Cb/Cr as channel 0 and Y as channel
   1098			 * 1. This matches how we set up scl0/scl1
   1099			 * above.
   1100			 */
   1101			vc4_write_scaling_parameters(state, 1);
   1102		}
   1103		vc4_write_scaling_parameters(state, 0);
   1104
   1105		/* If any PPF setup was done, then all the kernel
   1106		 * pointers get uploaded.
   1107		 */
   1108		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
   1109		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
   1110		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
   1111		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
   1112			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
   1113						   SCALER_PPF_KERNEL_OFFSET);
   1114
   1115			/* HPPF plane 0 */
   1116			vc4_dlist_write(vc4_state, kernel);
   1117			/* VPPF plane 0 */
   1118			vc4_dlist_write(vc4_state, kernel);
   1119			/* HPPF plane 1 */
   1120			vc4_dlist_write(vc4_state, kernel);
   1121			/* VPPF plane 1 */
   1122			vc4_dlist_write(vc4_state, kernel);
   1123		}
   1124	}
   1125
   1126	vc4_state->dlist[ctl0_offset] |=
   1127		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
   1128
   1129	/* crtc_* are already clipped coordinates. */
   1130	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
   1131			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
   1132			vc4_state->crtc_h == state->crtc->mode.vdisplay;
   1133	/* Background fill might be necessary when the plane has per-pixel
   1134	 * alpha content or a non-opaque plane alpha and could blend from the
   1135	 * background or does not cover the entire screen.
   1136	 */
   1137	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
   1138				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
   1139
   1140	/* Flag the dlist as initialized to avoid checking it twice in case
   1141	 * the async update check already called vc4_plane_mode_set() and
   1142	 * decided to fallback to sync update because async update was not
   1143	 * possible.
   1144	 */
   1145	vc4_state->dlist_initialized = 1;
   1146
   1147	vc4_plane_calc_load(state);
   1148
   1149	return 0;
   1150}
   1151
   1152/* If a modeset involves changing the setup of a plane, the atomic
   1153 * infrastructure will call this to validate a proposed plane setup.
   1154 * However, if a plane isn't getting updated, this (and the
   1155 * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
   1156 * compute the dlist here and have all active plane dlists get updated
   1157 * in the CRTC's flush.
   1158 */
   1159static int vc4_plane_atomic_check(struct drm_plane *plane,
   1160				  struct drm_atomic_state *state)
   1161{
   1162	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
   1163										 plane);
   1164	struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
   1165	int ret;
   1166
   1167	vc4_state->dlist_count = 0;
   1168
   1169	if (!plane_enabled(new_plane_state))
   1170		return 0;
   1171
   1172	ret = vc4_plane_mode_set(plane, new_plane_state);
   1173	if (ret)
   1174		return ret;
   1175
   1176	return vc4_plane_allocate_lbm(new_plane_state);
   1177}
   1178
   1179static void vc4_plane_atomic_update(struct drm_plane *plane,
   1180				    struct drm_atomic_state *state)
   1181{
   1182	/* No contents here.  Since we don't know where in the CRTC's
   1183	 * dlist we should be stored, our dlist is uploaded to the
   1184	 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
   1185	 * time.
   1186	 */
   1187}
   1188
   1189u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
   1190{
   1191	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
   1192	int i;
   1193
   1194	vc4_state->hw_dlist = dlist;
   1195
   1196	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
   1197	for (i = 0; i < vc4_state->dlist_count; i++)
   1198		writel(vc4_state->dlist[i], &dlist[i]);
   1199
   1200	return vc4_state->dlist_count;
   1201}
   1202
   1203u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
   1204{
   1205	const struct vc4_plane_state *vc4_state =
   1206		container_of(state, typeof(*vc4_state), base);
   1207
   1208	return vc4_state->dlist_count;
   1209}
   1210
   1211/* Updates the plane to immediately (well, once the FIFO needs
   1212 * refilling) scan out from at a new framebuffer.
   1213 */
   1214void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
   1215{
   1216	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
   1217	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
   1218	uint32_t addr;
   1219
   1220	/* We're skipping the address adjustment for negative origin,
   1221	 * because this is only called on the primary plane.
   1222	 */
   1223	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
   1224	addr = bo->paddr + fb->offsets[0];
   1225
   1226	/* Write the new address into the hardware immediately.  The
   1227	 * scanout will start from this address as soon as the FIFO
   1228	 * needs to refill with pixels.
   1229	 */
   1230	writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
   1231
   1232	/* Also update the CPU-side dlist copy, so that any later
   1233	 * atomic updates that don't do a new modeset on our plane
   1234	 * also use our updated address.
   1235	 */
   1236	vc4_state->dlist[vc4_state->ptr0_offset] = addr;
   1237}
   1238
   1239static void vc4_plane_atomic_async_update(struct drm_plane *plane,
   1240					  struct drm_atomic_state *state)
   1241{
   1242	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
   1243										 plane);
   1244	struct vc4_plane_state *vc4_state, *new_vc4_state;
   1245
   1246	swap(plane->state->fb, new_plane_state->fb);
   1247	plane->state->crtc_x = new_plane_state->crtc_x;
   1248	plane->state->crtc_y = new_plane_state->crtc_y;
   1249	plane->state->crtc_w = new_plane_state->crtc_w;
   1250	plane->state->crtc_h = new_plane_state->crtc_h;
   1251	plane->state->src_x = new_plane_state->src_x;
   1252	plane->state->src_y = new_plane_state->src_y;
   1253	plane->state->src_w = new_plane_state->src_w;
   1254	plane->state->src_h = new_plane_state->src_h;
   1255	plane->state->alpha = new_plane_state->alpha;
   1256	plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode;
   1257	plane->state->rotation = new_plane_state->rotation;
   1258	plane->state->zpos = new_plane_state->zpos;
   1259	plane->state->normalized_zpos = new_plane_state->normalized_zpos;
   1260	plane->state->color_encoding = new_plane_state->color_encoding;
   1261	plane->state->color_range = new_plane_state->color_range;
   1262	plane->state->src = new_plane_state->src;
   1263	plane->state->dst = new_plane_state->dst;
   1264	plane->state->visible = new_plane_state->visible;
   1265
   1266	new_vc4_state = to_vc4_plane_state(new_plane_state);
   1267	vc4_state = to_vc4_plane_state(plane->state);
   1268
   1269	vc4_state->crtc_x = new_vc4_state->crtc_x;
   1270	vc4_state->crtc_y = new_vc4_state->crtc_y;
   1271	vc4_state->crtc_h = new_vc4_state->crtc_h;
   1272	vc4_state->crtc_w = new_vc4_state->crtc_w;
   1273	vc4_state->src_x = new_vc4_state->src_x;
   1274	vc4_state->src_y = new_vc4_state->src_y;
   1275	memcpy(vc4_state->src_w, new_vc4_state->src_w,
   1276	       sizeof(vc4_state->src_w));
   1277	memcpy(vc4_state->src_h, new_vc4_state->src_h,
   1278	       sizeof(vc4_state->src_h));
   1279	memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
   1280	       sizeof(vc4_state->x_scaling));
   1281	memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
   1282	       sizeof(vc4_state->y_scaling));
   1283	vc4_state->is_unity = new_vc4_state->is_unity;
   1284	vc4_state->is_yuv = new_vc4_state->is_yuv;
   1285	memcpy(vc4_state->offsets, new_vc4_state->offsets,
   1286	       sizeof(vc4_state->offsets));
   1287	vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
   1288
   1289	/* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
   1290	vc4_state->dlist[vc4_state->pos0_offset] =
   1291		new_vc4_state->dlist[vc4_state->pos0_offset];
   1292	vc4_state->dlist[vc4_state->pos2_offset] =
   1293		new_vc4_state->dlist[vc4_state->pos2_offset];
   1294	vc4_state->dlist[vc4_state->ptr0_offset] =
   1295		new_vc4_state->dlist[vc4_state->ptr0_offset];
   1296
   1297	/* Note that we can't just call vc4_plane_write_dlist()
   1298	 * because that would smash the context data that the HVS is
   1299	 * currently using.
   1300	 */
   1301	writel(vc4_state->dlist[vc4_state->pos0_offset],
   1302	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
   1303	writel(vc4_state->dlist[vc4_state->pos2_offset],
   1304	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
   1305	writel(vc4_state->dlist[vc4_state->ptr0_offset],
   1306	       &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
   1307}
   1308
   1309static int vc4_plane_atomic_async_check(struct drm_plane *plane,
   1310					struct drm_atomic_state *state)
   1311{
   1312	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
   1313										 plane);
   1314	struct vc4_plane_state *old_vc4_state, *new_vc4_state;
   1315	int ret;
   1316	u32 i;
   1317
   1318	ret = vc4_plane_mode_set(plane, new_plane_state);
   1319	if (ret)
   1320		return ret;
   1321
   1322	old_vc4_state = to_vc4_plane_state(plane->state);
   1323	new_vc4_state = to_vc4_plane_state(new_plane_state);
   1324
   1325	if (!new_vc4_state->hw_dlist)
   1326		return -EINVAL;
   1327
   1328	if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
   1329	    old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
   1330	    old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
   1331	    old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset ||
   1332	    vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state))
   1333		return -EINVAL;
   1334
   1335	/* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
   1336	 * if anything else has changed, fallback to a sync update.
   1337	 */
   1338	for (i = 0; i < new_vc4_state->dlist_count; i++) {
   1339		if (i == new_vc4_state->pos0_offset ||
   1340		    i == new_vc4_state->pos2_offset ||
   1341		    i == new_vc4_state->ptr0_offset ||
   1342		    (new_vc4_state->lbm_offset &&
   1343		     i == new_vc4_state->lbm_offset))
   1344			continue;
   1345
   1346		if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
   1347			return -EINVAL;
   1348	}
   1349
   1350	return 0;
   1351}
   1352
   1353static int vc4_prepare_fb(struct drm_plane *plane,
   1354			  struct drm_plane_state *state)
   1355{
   1356	struct vc4_bo *bo;
   1357
   1358	if (!state->fb)
   1359		return 0;
   1360
   1361	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
   1362
   1363	drm_gem_plane_helper_prepare_fb(plane, state);
   1364
   1365	if (plane->state->fb == state->fb)
   1366		return 0;
   1367
   1368	return vc4_bo_inc_usecnt(bo);
   1369}
   1370
   1371static void vc4_cleanup_fb(struct drm_plane *plane,
   1372			   struct drm_plane_state *state)
   1373{
   1374	struct vc4_bo *bo;
   1375
   1376	if (plane->state->fb == state->fb || !state->fb)
   1377		return;
   1378
   1379	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
   1380	vc4_bo_dec_usecnt(bo);
   1381}
   1382
   1383static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
   1384	.atomic_check = vc4_plane_atomic_check,
   1385	.atomic_update = vc4_plane_atomic_update,
   1386	.prepare_fb = vc4_prepare_fb,
   1387	.cleanup_fb = vc4_cleanup_fb,
   1388	.atomic_async_check = vc4_plane_atomic_async_check,
   1389	.atomic_async_update = vc4_plane_atomic_async_update,
   1390};
   1391
   1392static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = {
   1393	.atomic_check = vc4_plane_atomic_check,
   1394	.atomic_update = vc4_plane_atomic_update,
   1395	.atomic_async_check = vc4_plane_atomic_async_check,
   1396	.atomic_async_update = vc4_plane_atomic_async_update,
   1397};
   1398
   1399static bool vc4_format_mod_supported(struct drm_plane *plane,
   1400				     uint32_t format,
   1401				     uint64_t modifier)
   1402{
   1403	/* Support T_TILING for RGB formats only. */
   1404	switch (format) {
   1405	case DRM_FORMAT_XRGB8888:
   1406	case DRM_FORMAT_ARGB8888:
   1407	case DRM_FORMAT_ABGR8888:
   1408	case DRM_FORMAT_XBGR8888:
   1409	case DRM_FORMAT_RGB565:
   1410	case DRM_FORMAT_BGR565:
   1411	case DRM_FORMAT_ARGB1555:
   1412	case DRM_FORMAT_XRGB1555:
   1413		switch (fourcc_mod_broadcom_mod(modifier)) {
   1414		case DRM_FORMAT_MOD_LINEAR:
   1415		case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
   1416			return true;
   1417		default:
   1418			return false;
   1419		}
   1420	case DRM_FORMAT_NV12:
   1421	case DRM_FORMAT_NV21:
   1422		switch (fourcc_mod_broadcom_mod(modifier)) {
   1423		case DRM_FORMAT_MOD_LINEAR:
   1424		case DRM_FORMAT_MOD_BROADCOM_SAND64:
   1425		case DRM_FORMAT_MOD_BROADCOM_SAND128:
   1426		case DRM_FORMAT_MOD_BROADCOM_SAND256:
   1427			return true;
   1428		default:
   1429			return false;
   1430		}
   1431	case DRM_FORMAT_P030:
   1432		switch (fourcc_mod_broadcom_mod(modifier)) {
   1433		case DRM_FORMAT_MOD_BROADCOM_SAND128:
   1434			return true;
   1435		default:
   1436			return false;
   1437		}
   1438	case DRM_FORMAT_RGBX1010102:
   1439	case DRM_FORMAT_BGRX1010102:
   1440	case DRM_FORMAT_RGBA1010102:
   1441	case DRM_FORMAT_BGRA1010102:
   1442	case DRM_FORMAT_YUV422:
   1443	case DRM_FORMAT_YVU422:
   1444	case DRM_FORMAT_YUV420:
   1445	case DRM_FORMAT_YVU420:
   1446	case DRM_FORMAT_NV16:
   1447	case DRM_FORMAT_NV61:
   1448	default:
   1449		return (modifier == DRM_FORMAT_MOD_LINEAR);
   1450	}
   1451}
   1452
   1453static const struct drm_plane_funcs vc4_plane_funcs = {
   1454	.update_plane = drm_atomic_helper_update_plane,
   1455	.disable_plane = drm_atomic_helper_disable_plane,
   1456	.destroy = drm_plane_cleanup,
   1457	.set_property = NULL,
   1458	.reset = vc4_plane_reset,
   1459	.atomic_duplicate_state = vc4_plane_duplicate_state,
   1460	.atomic_destroy_state = vc4_plane_destroy_state,
   1461	.format_mod_supported = vc4_format_mod_supported,
   1462};
   1463
   1464struct drm_plane *vc4_plane_init(struct drm_device *dev,
   1465				 enum drm_plane_type type)
   1466{
   1467	struct vc4_dev *vc4 = to_vc4_dev(dev);
   1468	struct drm_plane *plane = NULL;
   1469	struct vc4_plane *vc4_plane;
   1470	u32 formats[ARRAY_SIZE(hvs_formats)];
   1471	int num_formats = 0;
   1472	int ret = 0;
   1473	unsigned i;
   1474	static const uint64_t modifiers[] = {
   1475		DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
   1476		DRM_FORMAT_MOD_BROADCOM_SAND128,
   1477		DRM_FORMAT_MOD_BROADCOM_SAND64,
   1478		DRM_FORMAT_MOD_BROADCOM_SAND256,
   1479		DRM_FORMAT_MOD_LINEAR,
   1480		DRM_FORMAT_MOD_INVALID
   1481	};
   1482
   1483	vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane),
   1484				 GFP_KERNEL);
   1485	if (!vc4_plane)
   1486		return ERR_PTR(-ENOMEM);
   1487
   1488	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
   1489		if (!hvs_formats[i].hvs5_only || vc4->is_vc5) {
   1490			formats[num_formats] = hvs_formats[i].drm;
   1491			num_formats++;
   1492		}
   1493	}
   1494
   1495	plane = &vc4_plane->base;
   1496	ret = drm_universal_plane_init(dev, plane, 0,
   1497				       &vc4_plane_funcs,
   1498				       formats, num_formats,
   1499				       modifiers, type, NULL);
   1500	if (ret)
   1501		return ERR_PTR(ret);
   1502
   1503	if (vc4->is_vc5)
   1504		drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
   1505	else
   1506		drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
   1507
   1508	drm_plane_create_alpha_property(plane);
   1509	drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
   1510					   DRM_MODE_ROTATE_0 |
   1511					   DRM_MODE_ROTATE_180 |
   1512					   DRM_MODE_REFLECT_X |
   1513					   DRM_MODE_REFLECT_Y);
   1514
   1515	drm_plane_create_color_properties(plane,
   1516					  BIT(DRM_COLOR_YCBCR_BT601) |
   1517					  BIT(DRM_COLOR_YCBCR_BT709) |
   1518					  BIT(DRM_COLOR_YCBCR_BT2020),
   1519					  BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
   1520					  BIT(DRM_COLOR_YCBCR_FULL_RANGE),
   1521					  DRM_COLOR_YCBCR_BT709,
   1522					  DRM_COLOR_YCBCR_LIMITED_RANGE);
   1523
   1524	return plane;
   1525}
   1526
   1527int vc4_plane_create_additional_planes(struct drm_device *drm)
   1528{
   1529	struct drm_plane *cursor_plane;
   1530	struct drm_crtc *crtc;
   1531	unsigned int i;
   1532
   1533	/* Set up some arbitrary number of planes.  We're not limited
   1534	 * by a set number of physical registers, just the space in
   1535	 * the HVS (16k) and how small an plane can be (28 bytes).
   1536	 * However, each plane we set up takes up some memory, and
   1537	 * increases the cost of looping over planes, which atomic
   1538	 * modesetting does quite a bit.  As a result, we pick a
   1539	 * modest number of planes to expose, that should hopefully
   1540	 * still cover any sane usecase.
   1541	 */
   1542	for (i = 0; i < 16; i++) {
   1543		struct drm_plane *plane =
   1544			vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);
   1545
   1546		if (IS_ERR(plane))
   1547			continue;
   1548
   1549		plane->possible_crtcs =
   1550			GENMASK(drm->mode_config.num_crtc - 1, 0);
   1551	}
   1552
   1553	drm_for_each_crtc(crtc, drm) {
   1554		/* Set up the legacy cursor after overlay initialization,
   1555		 * since we overlay planes on the CRTC in the order they were
   1556		 * initialized.
   1557		 */
   1558		cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
   1559		if (!IS_ERR(cursor_plane)) {
   1560			cursor_plane->possible_crtcs = drm_crtc_mask(crtc);
   1561			crtc->cursor = cursor_plane;
   1562		}
   1563	}
   1564
   1565	return 0;
   1566}