cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vc4_validate.c (26642B)


      1/*
      2 * Copyright © 2014 Broadcom
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 */
     23
     24/**
     25 * DOC: Command list validator for VC4.
     26 *
     27 * Since the VC4 has no IOMMU between it and system memory, a user
     28 * with access to execute command lists could escalate privilege by
     29 * overwriting system memory (drawing to it as a framebuffer) or
     30 * reading system memory it shouldn't (reading it as a vertex buffer
     31 * or index buffer)
     32 *
     33 * We validate binner command lists to ensure that all accesses are
     34 * within the bounds of the GEM objects referenced by the submitted
     35 * job.  It explicitly whitelists packets, and looks at the offsets in
     36 * any address fields to make sure they're contained within the BOs
     37 * they reference.
     38 *
     39 * Note that because CL validation is already reading the
     40 * user-submitted CL and writing the validated copy out to the memory
     41 * that the GPU will actually read, this is also where GEM relocation
     42 * processing (turning BO references into actual addresses for the GPU
     43 * to use) happens.
     44 */
     45
     46#include "uapi/drm/vc4_drm.h"
     47#include "vc4_drv.h"
     48#include "vc4_packet.h"
     49
     50#define VALIDATE_ARGS \
     51	struct vc4_exec_info *exec,			\
     52	void *validated,				\
     53	void *untrusted
     54
     55/** Return the width in pixels of a 64-byte microtile. */
     56static uint32_t
     57utile_width(int cpp)
     58{
     59	switch (cpp) {
     60	case 1:
     61	case 2:
     62		return 8;
     63	case 4:
     64		return 4;
     65	case 8:
     66		return 2;
     67	default:
     68		DRM_ERROR("unknown cpp: %d\n", cpp);
     69		return 1;
     70	}
     71}
     72
     73/** Return the height in pixels of a 64-byte microtile. */
     74static uint32_t
     75utile_height(int cpp)
     76{
     77	switch (cpp) {
     78	case 1:
     79		return 8;
     80	case 2:
     81	case 4:
     82	case 8:
     83		return 4;
     84	default:
     85		DRM_ERROR("unknown cpp: %d\n", cpp);
     86		return 1;
     87	}
     88}
     89
     90/**
     91 * size_is_lt() - Returns whether a miplevel of the given size will
     92 * use the lineartile (LT) tiling layout rather than the normal T
     93 * tiling layout.
     94 * @width: Width in pixels of the miplevel
     95 * @height: Height in pixels of the miplevel
     96 * @cpp: Bytes per pixel of the pixel format
     97 */
     98static bool
     99size_is_lt(uint32_t width, uint32_t height, int cpp)
    100{
    101	return (width <= 4 * utile_width(cpp) ||
    102		height <= 4 * utile_height(cpp));
    103}
    104
    105struct drm_gem_cma_object *
    106vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
    107{
    108	struct vc4_dev *vc4 = exec->dev;
    109	struct drm_gem_cma_object *obj;
    110	struct vc4_bo *bo;
    111
    112	if (WARN_ON_ONCE(vc4->is_vc5))
    113		return NULL;
    114
    115	if (hindex >= exec->bo_count) {
    116		DRM_DEBUG("BO index %d greater than BO count %d\n",
    117			  hindex, exec->bo_count);
    118		return NULL;
    119	}
    120	obj = exec->bo[hindex];
    121	bo = to_vc4_bo(&obj->base);
    122
    123	if (bo->validated_shader) {
    124		DRM_DEBUG("Trying to use shader BO as something other than "
    125			  "a shader\n");
    126		return NULL;
    127	}
    128
    129	return obj;
    130}
    131
    132static struct drm_gem_cma_object *
    133vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
    134{
    135	return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
    136}
    137
    138static bool
    139validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
    140{
    141	/* Note that the untrusted pointer passed to these functions is
    142	 * incremented past the packet byte.
    143	 */
    144	return (untrusted - 1 == exec->bin_u + pos);
    145}
    146
    147static uint32_t
    148gl_shader_rec_size(uint32_t pointer_bits)
    149{
    150	uint32_t attribute_count = pointer_bits & 7;
    151	bool extended = pointer_bits & 8;
    152
    153	if (attribute_count == 0)
    154		attribute_count = 8;
    155
    156	if (extended)
    157		return 100 + attribute_count * 4;
    158	else
    159		return 36 + attribute_count * 8;
    160}
    161
    162bool
    163vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
    164		   uint32_t offset, uint8_t tiling_format,
    165		   uint32_t width, uint32_t height, uint8_t cpp)
    166{
    167	struct vc4_dev *vc4 = exec->dev;
    168	uint32_t aligned_width, aligned_height, stride, size;
    169	uint32_t utile_w = utile_width(cpp);
    170	uint32_t utile_h = utile_height(cpp);
    171
    172	if (WARN_ON_ONCE(vc4->is_vc5))
    173		return false;
    174
    175	/* The shaded vertex format stores signed 12.4 fixed point
    176	 * (-2048,2047) offsets from the viewport center, so we should
    177	 * never have a render target larger than 4096.  The texture
    178	 * unit can only sample from 2048x2048, so it's even more
    179	 * restricted.  This lets us avoid worrying about overflow in
    180	 * our math.
    181	 */
    182	if (width > 4096 || height > 4096) {
    183		DRM_DEBUG("Surface dimensions (%d,%d) too large",
    184			  width, height);
    185		return false;
    186	}
    187
    188	switch (tiling_format) {
    189	case VC4_TILING_FORMAT_LINEAR:
    190		aligned_width = round_up(width, utile_w);
    191		aligned_height = height;
    192		break;
    193	case VC4_TILING_FORMAT_T:
    194		aligned_width = round_up(width, utile_w * 8);
    195		aligned_height = round_up(height, utile_h * 8);
    196		break;
    197	case VC4_TILING_FORMAT_LT:
    198		aligned_width = round_up(width, utile_w);
    199		aligned_height = round_up(height, utile_h);
    200		break;
    201	default:
    202		DRM_DEBUG("buffer tiling %d unsupported\n", tiling_format);
    203		return false;
    204	}
    205
    206	stride = aligned_width * cpp;
    207	size = stride * aligned_height;
    208
    209	if (size + offset < size ||
    210	    size + offset > fbo->base.size) {
    211		DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
    212			  width, height,
    213			  aligned_width, aligned_height,
    214			  size, offset, fbo->base.size);
    215		return false;
    216	}
    217
    218	return true;
    219}
    220
    221static int
    222validate_flush(VALIDATE_ARGS)
    223{
    224	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
    225		DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n");
    226		return -EINVAL;
    227	}
    228	exec->found_flush = true;
    229
    230	return 0;
    231}
    232
    233static int
    234validate_start_tile_binning(VALIDATE_ARGS)
    235{
    236	if (exec->found_start_tile_binning_packet) {
    237		DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n");
    238		return -EINVAL;
    239	}
    240	exec->found_start_tile_binning_packet = true;
    241
    242	if (!exec->found_tile_binning_mode_config_packet) {
    243		DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
    244		return -EINVAL;
    245	}
    246
    247	return 0;
    248}
    249
    250static int
    251validate_increment_semaphore(VALIDATE_ARGS)
    252{
    253	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
    254		DRM_DEBUG("Bin CL must end with "
    255			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
    256		return -EINVAL;
    257	}
    258	exec->found_increment_semaphore_packet = true;
    259
    260	return 0;
    261}
    262
    263static int
    264validate_indexed_prim_list(VALIDATE_ARGS)
    265{
    266	struct drm_gem_cma_object *ib;
    267	uint32_t length = *(uint32_t *)(untrusted + 1);
    268	uint32_t offset = *(uint32_t *)(untrusted + 5);
    269	uint32_t max_index = *(uint32_t *)(untrusted + 9);
    270	uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
    271	struct vc4_shader_state *shader_state;
    272
    273	/* Check overflow condition */
    274	if (exec->shader_state_count == 0) {
    275		DRM_DEBUG("shader state must precede primitives\n");
    276		return -EINVAL;
    277	}
    278	shader_state = &exec->shader_state[exec->shader_state_count - 1];
    279
    280	if (max_index > shader_state->max_index)
    281		shader_state->max_index = max_index;
    282
    283	ib = vc4_use_handle(exec, 0);
    284	if (!ib)
    285		return -EINVAL;
    286
    287	exec->bin_dep_seqno = max(exec->bin_dep_seqno,
    288				  to_vc4_bo(&ib->base)->write_seqno);
    289
    290	if (offset > ib->base.size ||
    291	    (ib->base.size - offset) / index_size < length) {
    292		DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
    293			  offset, length, index_size, ib->base.size);
    294		return -EINVAL;
    295	}
    296
    297	*(uint32_t *)(validated + 5) = ib->paddr + offset;
    298
    299	return 0;
    300}
    301
    302static int
    303validate_gl_array_primitive(VALIDATE_ARGS)
    304{
    305	uint32_t length = *(uint32_t *)(untrusted + 1);
    306	uint32_t base_index = *(uint32_t *)(untrusted + 5);
    307	uint32_t max_index;
    308	struct vc4_shader_state *shader_state;
    309
    310	/* Check overflow condition */
    311	if (exec->shader_state_count == 0) {
    312		DRM_DEBUG("shader state must precede primitives\n");
    313		return -EINVAL;
    314	}
    315	shader_state = &exec->shader_state[exec->shader_state_count - 1];
    316
    317	if (length + base_index < length) {
    318		DRM_DEBUG("primitive vertex count overflow\n");
    319		return -EINVAL;
    320	}
    321	max_index = length + base_index - 1;
    322
    323	if (max_index > shader_state->max_index)
    324		shader_state->max_index = max_index;
    325
    326	return 0;
    327}
    328
    329static int
    330validate_gl_shader_state(VALIDATE_ARGS)
    331{
    332	uint32_t i = exec->shader_state_count++;
    333
    334	if (i >= exec->shader_state_size) {
    335		DRM_DEBUG("More requests for shader states than declared\n");
    336		return -EINVAL;
    337	}
    338
    339	exec->shader_state[i].addr = *(uint32_t *)untrusted;
    340	exec->shader_state[i].max_index = 0;
    341
    342	if (exec->shader_state[i].addr & ~0xf) {
    343		DRM_DEBUG("high bits set in GL shader rec reference\n");
    344		return -EINVAL;
    345	}
    346
    347	*(uint32_t *)validated = (exec->shader_rec_p +
    348				  exec->shader_state[i].addr);
    349
    350	exec->shader_rec_p +=
    351		roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
    352
    353	return 0;
    354}
    355
    356static int
    357validate_tile_binning_config(VALIDATE_ARGS)
    358{
    359	struct drm_device *dev = exec->exec_bo->base.dev;
    360	struct vc4_dev *vc4 = to_vc4_dev(dev);
    361	uint8_t flags;
    362	uint32_t tile_state_size;
    363	uint32_t tile_count, bin_addr;
    364	int bin_slot;
    365
    366	if (exec->found_tile_binning_mode_config_packet) {
    367		DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
    368		return -EINVAL;
    369	}
    370	exec->found_tile_binning_mode_config_packet = true;
    371
    372	exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
    373	exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
    374	tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
    375	flags = *(uint8_t *)(untrusted + 14);
    376
    377	if (exec->bin_tiles_x == 0 ||
    378	    exec->bin_tiles_y == 0) {
    379		DRM_DEBUG("Tile binning config of %dx%d too small\n",
    380			  exec->bin_tiles_x, exec->bin_tiles_y);
    381		return -EINVAL;
    382	}
    383
    384	if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
    385		     VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
    386		DRM_DEBUG("unsupported binning config flags 0x%02x\n", flags);
    387		return -EINVAL;
    388	}
    389
    390	bin_slot = vc4_v3d_get_bin_slot(vc4);
    391	if (bin_slot < 0) {
    392		if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) {
    393			DRM_ERROR("Failed to allocate binner memory: %d\n",
    394				  bin_slot);
    395		}
    396		return bin_slot;
    397	}
    398
    399	/* The slot we allocated will only be used by this job, and is
    400	 * free when the job completes rendering.
    401	 */
    402	exec->bin_slots |= BIT(bin_slot);
    403	bin_addr = vc4->bin_bo->base.paddr + bin_slot * vc4->bin_alloc_size;
    404
    405	/* The tile state data array is 48 bytes per tile, and we put it at
    406	 * the start of a BO containing both it and the tile alloc.
    407	 */
    408	tile_state_size = 48 * tile_count;
    409
    410	/* Since the tile alloc array will follow us, align. */
    411	exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096);
    412
    413	*(uint8_t *)(validated + 14) =
    414		((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
    415			    VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
    416		 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
    417		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
    418			       VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
    419		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
    420			       VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
    421
    422	/* tile alloc address. */
    423	*(uint32_t *)(validated + 0) = exec->tile_alloc_offset;
    424	/* tile alloc size. */
    425	*(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size -
    426					exec->tile_alloc_offset);
    427	/* tile state address. */
    428	*(uint32_t *)(validated + 8) = bin_addr;
    429
    430	return 0;
    431}
    432
    433static int
    434validate_gem_handles(VALIDATE_ARGS)
    435{
    436	memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
    437	return 0;
    438}
    439
    440#define VC4_DEFINE_PACKET(packet, func) \
    441	[packet] = { packet ## _SIZE, #packet, func }
    442
    443static const struct cmd_info {
    444	uint16_t len;
    445	const char *name;
    446	int (*func)(struct vc4_exec_info *exec, void *validated,
    447		    void *untrusted);
    448} cmd_info[] = {
    449	VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
    450	VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
    451	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
    452	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
    453	VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
    454			  validate_start_tile_binning),
    455	VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
    456			  validate_increment_semaphore),
    457
    458	VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
    459			  validate_indexed_prim_list),
    460	VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
    461			  validate_gl_array_primitive),
    462
    463	VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
    464
    465	VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
    466
    467	VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
    468	VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
    469	VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
    470	VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
    471	VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
    472	VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
    473	VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
    474	VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
    475	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
    476	/* Note: The docs say this was also 105, but it was 106 in the
    477	 * initial userland code drop.
    478	 */
    479	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
    480
    481	VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
    482			  validate_tile_binning_config),
    483
    484	VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
    485};
    486
    487int
    488vc4_validate_bin_cl(struct drm_device *dev,
    489		    void *validated,
    490		    void *unvalidated,
    491		    struct vc4_exec_info *exec)
    492{
    493	struct vc4_dev *vc4 = to_vc4_dev(dev);
    494	uint32_t len = exec->args->bin_cl_size;
    495	uint32_t dst_offset = 0;
    496	uint32_t src_offset = 0;
    497
    498	if (WARN_ON_ONCE(vc4->is_vc5))
    499		return -ENODEV;
    500
    501	while (src_offset < len) {
    502		void *dst_pkt = validated + dst_offset;
    503		void *src_pkt = unvalidated + src_offset;
    504		u8 cmd = *(uint8_t *)src_pkt;
    505		const struct cmd_info *info;
    506
    507		if (cmd >= ARRAY_SIZE(cmd_info)) {
    508			DRM_DEBUG("0x%08x: packet %d out of bounds\n",
    509				  src_offset, cmd);
    510			return -EINVAL;
    511		}
    512
    513		info = &cmd_info[cmd];
    514		if (!info->name) {
    515			DRM_DEBUG("0x%08x: packet %d invalid\n",
    516				  src_offset, cmd);
    517			return -EINVAL;
    518		}
    519
    520		if (src_offset + info->len > len) {
    521			DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x "
    522				  "exceeds bounds (0x%08x)\n",
    523				  src_offset, cmd, info->name, info->len,
    524				  src_offset + len);
    525			return -EINVAL;
    526		}
    527
    528		if (cmd != VC4_PACKET_GEM_HANDLES)
    529			memcpy(dst_pkt, src_pkt, info->len);
    530
    531		if (info->func && info->func(exec,
    532					     dst_pkt + 1,
    533					     src_pkt + 1)) {
    534			DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n",
    535				  src_offset, cmd, info->name);
    536			return -EINVAL;
    537		}
    538
    539		src_offset += info->len;
    540		/* GEM handle loading doesn't produce HW packets. */
    541		if (cmd != VC4_PACKET_GEM_HANDLES)
    542			dst_offset += info->len;
    543
    544		/* When the CL hits halt, it'll stop reading anything else. */
    545		if (cmd == VC4_PACKET_HALT)
    546			break;
    547	}
    548
    549	exec->ct0ea = exec->ct0ca + dst_offset;
    550
    551	if (!exec->found_start_tile_binning_packet) {
    552		DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
    553		return -EINVAL;
    554	}
    555
    556	/* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
    557	 * semaphore is used to trigger the render CL to start up, and the
    558	 * FLUSH is what caps the bin lists with
    559	 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
    560	 * render CL when they get called to) and actually triggers the queued
    561	 * semaphore increment.
    562	 */
    563	if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
    564		DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
    565			  "VC4_PACKET_FLUSH\n");
    566		return -EINVAL;
    567	}
    568
    569	return 0;
    570}
    571
    572static bool
    573reloc_tex(struct vc4_exec_info *exec,
    574	  void *uniform_data_u,
    575	  struct vc4_texture_sample_info *sample,
    576	  uint32_t texture_handle_index, bool is_cs)
    577{
    578	struct drm_gem_cma_object *tex;
    579	uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
    580	uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
    581	uint32_t p2 = (sample->p_offset[2] != ~0 ?
    582		       *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
    583	uint32_t p3 = (sample->p_offset[3] != ~0 ?
    584		       *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
    585	uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
    586	uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
    587	uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
    588	uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
    589	uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
    590	uint32_t cpp, tiling_format, utile_w, utile_h;
    591	uint32_t i;
    592	uint32_t cube_map_stride = 0;
    593	enum vc4_texture_data_type type;
    594
    595	tex = vc4_use_bo(exec, texture_handle_index);
    596	if (!tex)
    597		return false;
    598
    599	if (sample->is_direct) {
    600		uint32_t remaining_size = tex->base.size - p0;
    601
    602		if (p0 > tex->base.size - 4) {
    603			DRM_DEBUG("UBO offset greater than UBO size\n");
    604			goto fail;
    605		}
    606		if (p1 > remaining_size - 4) {
    607			DRM_DEBUG("UBO clamp would allow reads "
    608				  "outside of UBO\n");
    609			goto fail;
    610		}
    611		*validated_p0 = tex->paddr + p0;
    612		return true;
    613	}
    614
    615	if (width == 0)
    616		width = 2048;
    617	if (height == 0)
    618		height = 2048;
    619
    620	if (p0 & VC4_TEX_P0_CMMODE_MASK) {
    621		if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
    622		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
    623			cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
    624		if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
    625		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
    626			if (cube_map_stride) {
    627				DRM_DEBUG("Cube map stride set twice\n");
    628				goto fail;
    629			}
    630
    631			cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
    632		}
    633		if (!cube_map_stride) {
    634			DRM_DEBUG("Cube map stride not set\n");
    635			goto fail;
    636		}
    637	}
    638
    639	type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
    640		(VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
    641
    642	switch (type) {
    643	case VC4_TEXTURE_TYPE_RGBA8888:
    644	case VC4_TEXTURE_TYPE_RGBX8888:
    645	case VC4_TEXTURE_TYPE_RGBA32R:
    646		cpp = 4;
    647		break;
    648	case VC4_TEXTURE_TYPE_RGBA4444:
    649	case VC4_TEXTURE_TYPE_RGBA5551:
    650	case VC4_TEXTURE_TYPE_RGB565:
    651	case VC4_TEXTURE_TYPE_LUMALPHA:
    652	case VC4_TEXTURE_TYPE_S16F:
    653	case VC4_TEXTURE_TYPE_S16:
    654		cpp = 2;
    655		break;
    656	case VC4_TEXTURE_TYPE_LUMINANCE:
    657	case VC4_TEXTURE_TYPE_ALPHA:
    658	case VC4_TEXTURE_TYPE_S8:
    659		cpp = 1;
    660		break;
    661	case VC4_TEXTURE_TYPE_ETC1:
    662		/* ETC1 is arranged as 64-bit blocks, where each block is 4x4
    663		 * pixels.
    664		 */
    665		cpp = 8;
    666		width = (width + 3) >> 2;
    667		height = (height + 3) >> 2;
    668		break;
    669	case VC4_TEXTURE_TYPE_BW1:
    670	case VC4_TEXTURE_TYPE_A4:
    671	case VC4_TEXTURE_TYPE_A1:
    672	case VC4_TEXTURE_TYPE_RGBA64:
    673	case VC4_TEXTURE_TYPE_YUV422R:
    674	default:
    675		DRM_DEBUG("Texture format %d unsupported\n", type);
    676		goto fail;
    677	}
    678	utile_w = utile_width(cpp);
    679	utile_h = utile_height(cpp);
    680
    681	if (type == VC4_TEXTURE_TYPE_RGBA32R) {
    682		tiling_format = VC4_TILING_FORMAT_LINEAR;
    683	} else {
    684		if (size_is_lt(width, height, cpp))
    685			tiling_format = VC4_TILING_FORMAT_LT;
    686		else
    687			tiling_format = VC4_TILING_FORMAT_T;
    688	}
    689
    690	if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
    691				tiling_format, width, height, cpp)) {
    692		goto fail;
    693	}
    694
    695	/* The mipmap levels are stored before the base of the texture.  Make
    696	 * sure there is actually space in the BO.
    697	 */
    698	for (i = 1; i <= miplevels; i++) {
    699		uint32_t level_width = max(width >> i, 1u);
    700		uint32_t level_height = max(height >> i, 1u);
    701		uint32_t aligned_width, aligned_height;
    702		uint32_t level_size;
    703
    704		/* Once the levels get small enough, they drop from T to LT. */
    705		if (tiling_format == VC4_TILING_FORMAT_T &&
    706		    size_is_lt(level_width, level_height, cpp)) {
    707			tiling_format = VC4_TILING_FORMAT_LT;
    708		}
    709
    710		switch (tiling_format) {
    711		case VC4_TILING_FORMAT_T:
    712			aligned_width = round_up(level_width, utile_w * 8);
    713			aligned_height = round_up(level_height, utile_h * 8);
    714			break;
    715		case VC4_TILING_FORMAT_LT:
    716			aligned_width = round_up(level_width, utile_w);
    717			aligned_height = round_up(level_height, utile_h);
    718			break;
    719		default:
    720			aligned_width = round_up(level_width, utile_w);
    721			aligned_height = level_height;
    722			break;
    723		}
    724
    725		level_size = aligned_width * cpp * aligned_height;
    726
    727		if (offset < level_size) {
    728			DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db "
    729				  "overflowed buffer bounds (offset %d)\n",
    730				  i, level_width, level_height,
    731				  aligned_width, aligned_height,
    732				  level_size, offset);
    733			goto fail;
    734		}
    735
    736		offset -= level_size;
    737	}
    738
    739	*validated_p0 = tex->paddr + p0;
    740
    741	if (is_cs) {
    742		exec->bin_dep_seqno = max(exec->bin_dep_seqno,
    743					  to_vc4_bo(&tex->base)->write_seqno);
    744	}
    745
    746	return true;
    747 fail:
    748	DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
    749	DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
    750	DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
    751	DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
    752	return false;
    753}
    754
    755static int
    756validate_gl_shader_rec(struct drm_device *dev,
    757		       struct vc4_exec_info *exec,
    758		       struct vc4_shader_state *state)
    759{
    760	uint32_t *src_handles;
    761	void *pkt_u, *pkt_v;
    762	static const uint32_t shader_reloc_offsets[] = {
    763		4, /* fs */
    764		16, /* vs */
    765		28, /* cs */
    766	};
    767	uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
    768	struct drm_gem_cma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8];
    769	uint32_t nr_attributes, nr_relocs, packet_size;
    770	int i;
    771
    772	nr_attributes = state->addr & 0x7;
    773	if (nr_attributes == 0)
    774		nr_attributes = 8;
    775	packet_size = gl_shader_rec_size(state->addr);
    776
    777	nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
    778	if (nr_relocs * 4 > exec->shader_rec_size) {
    779		DRM_DEBUG("overflowed shader recs reading %d handles "
    780			  "from %d bytes left\n",
    781			  nr_relocs, exec->shader_rec_size);
    782		return -EINVAL;
    783	}
    784	src_handles = exec->shader_rec_u;
    785	exec->shader_rec_u += nr_relocs * 4;
    786	exec->shader_rec_size -= nr_relocs * 4;
    787
    788	if (packet_size > exec->shader_rec_size) {
    789		DRM_DEBUG("overflowed shader recs copying %db packet "
    790			  "from %d bytes left\n",
    791			  packet_size, exec->shader_rec_size);
    792		return -EINVAL;
    793	}
    794	pkt_u = exec->shader_rec_u;
    795	pkt_v = exec->shader_rec_v;
    796	memcpy(pkt_v, pkt_u, packet_size);
    797	exec->shader_rec_u += packet_size;
    798	/* Shader recs have to be aligned to 16 bytes (due to the attribute
    799	 * flags being in the low bytes), so round the next validated shader
    800	 * rec address up.  This should be safe, since we've got so many
    801	 * relocations in a shader rec packet.
    802	 */
    803	BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
    804	exec->shader_rec_v += roundup(packet_size, 16);
    805	exec->shader_rec_size -= packet_size;
    806
    807	for (i = 0; i < shader_reloc_count; i++) {
    808		if (src_handles[i] > exec->bo_count) {
    809			DRM_DEBUG("Shader handle %d too big\n", src_handles[i]);
    810			return -EINVAL;
    811		}
    812
    813		bo[i] = exec->bo[src_handles[i]];
    814		if (!bo[i])
    815			return -EINVAL;
    816	}
    817	for (i = shader_reloc_count; i < nr_relocs; i++) {
    818		bo[i] = vc4_use_bo(exec, src_handles[i]);
    819		if (!bo[i])
    820			return -EINVAL;
    821	}
    822
    823	if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
    824	    to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
    825		DRM_DEBUG("Thread mode of CL and FS do not match\n");
    826		return -EINVAL;
    827	}
    828
    829	if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
    830	    to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
    831		DRM_DEBUG("cs and vs cannot be threaded\n");
    832		return -EINVAL;
    833	}
    834
    835	for (i = 0; i < shader_reloc_count; i++) {
    836		struct vc4_validated_shader_info *validated_shader;
    837		uint32_t o = shader_reloc_offsets[i];
    838		uint32_t src_offset = *(uint32_t *)(pkt_u + o);
    839		uint32_t *texture_handles_u;
    840		void *uniform_data_u;
    841		uint32_t tex, uni;
    842
    843		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
    844
    845		if (src_offset != 0) {
    846			DRM_DEBUG("Shaders must be at offset 0 of "
    847				  "the BO.\n");
    848			return -EINVAL;
    849		}
    850
    851		validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
    852		if (!validated_shader)
    853			return -EINVAL;
    854
    855		if (validated_shader->uniforms_src_size >
    856		    exec->uniforms_size) {
    857			DRM_DEBUG("Uniforms src buffer overflow\n");
    858			return -EINVAL;
    859		}
    860
    861		texture_handles_u = exec->uniforms_u;
    862		uniform_data_u = (texture_handles_u +
    863				  validated_shader->num_texture_samples);
    864
    865		memcpy(exec->uniforms_v, uniform_data_u,
    866		       validated_shader->uniforms_size);
    867
    868		for (tex = 0;
    869		     tex < validated_shader->num_texture_samples;
    870		     tex++) {
    871			if (!reloc_tex(exec,
    872				       uniform_data_u,
    873				       &validated_shader->texture_samples[tex],
    874				       texture_handles_u[tex],
    875				       i == 2)) {
    876				return -EINVAL;
    877			}
    878		}
    879
    880		/* Fill in the uniform slots that need this shader's
    881		 * start-of-uniforms address (used for resetting the uniform
    882		 * stream in the presence of control flow).
    883		 */
    884		for (uni = 0;
    885		     uni < validated_shader->num_uniform_addr_offsets;
    886		     uni++) {
    887			uint32_t o = validated_shader->uniform_addr_offsets[uni];
    888			((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
    889		}
    890
    891		*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
    892
    893		exec->uniforms_u += validated_shader->uniforms_src_size;
    894		exec->uniforms_v += validated_shader->uniforms_size;
    895		exec->uniforms_p += validated_shader->uniforms_size;
    896	}
    897
    898	for (i = 0; i < nr_attributes; i++) {
    899		struct drm_gem_cma_object *vbo =
    900			bo[ARRAY_SIZE(shader_reloc_offsets) + i];
    901		uint32_t o = 36 + i * 8;
    902		uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
    903		uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
    904		uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
    905		uint32_t max_index;
    906
    907		exec->bin_dep_seqno = max(exec->bin_dep_seqno,
    908					  to_vc4_bo(&vbo->base)->write_seqno);
    909
    910		if (state->addr & 0x8)
    911			stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
    912
    913		if (vbo->base.size < offset ||
    914		    vbo->base.size - offset < attr_size) {
    915			DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n",
    916				  offset, attr_size, vbo->base.size);
    917			return -EINVAL;
    918		}
    919
    920		if (stride != 0) {
    921			max_index = ((vbo->base.size - offset - attr_size) /
    922				     stride);
    923			if (state->max_index > max_index) {
    924				DRM_DEBUG("primitives use index %d out of "
    925					  "supplied %d\n",
    926					  state->max_index, max_index);
    927				return -EINVAL;
    928			}
    929		}
    930
    931		*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
    932	}
    933
    934	return 0;
    935}
    936
    937int
    938vc4_validate_shader_recs(struct drm_device *dev,
    939			 struct vc4_exec_info *exec)
    940{
    941	struct vc4_dev *vc4 = to_vc4_dev(dev);
    942	uint32_t i;
    943	int ret = 0;
    944
    945	if (WARN_ON_ONCE(vc4->is_vc5))
    946		return -ENODEV;
    947
    948	for (i = 0; i < exec->shader_state_count; i++) {
    949		ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
    950		if (ret)
    951			return ret;
    952	}
    953
    954	return ret;
    955}