vc4_gem.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
vc4_gem.c (36075B)
      1/*
      2 * Copyright © 2014 Broadcom
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 */
     23
     24#include <linux/module.h>
     25#include <linux/platform_device.h>
     26#include <linux/pm_runtime.h>
     27#include <linux/device.h>
     28#include <linux/io.h>
     29#include <linux/sched/signal.h>
     30#include <linux/dma-fence-array.h>
     31
     32#include <drm/drm_syncobj.h>
     33
     34#include "uapi/drm/vc4_drm.h"
     35#include "vc4_drv.h"
     36#include "vc4_regs.h"
     37#include "vc4_trace.h"
     38
     39static void
     40vc4_queue_hangcheck(struct drm_device *dev)
     41{
     42	struct vc4_dev *vc4 = to_vc4_dev(dev);
     43
     44	mod_timer(&vc4->hangcheck.timer,
     45		  round_jiffies_up(jiffies + msecs_to_jiffies(100)));
     46}
     47
     48struct vc4_hang_state {
     49	struct drm_vc4_get_hang_state user_state;
     50
     51	u32 bo_count;
     52	struct drm_gem_object **bo;
     53};
     54
     55static void
     56vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
     57{
     58	unsigned int i;
     59
     60	for (i = 0; i < state->user_state.bo_count; i++)
     61		drm_gem_object_put(state->bo[i]);
     62
     63	kfree(state);
     64}
     65
     66int
     67vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
     68			 struct drm_file *file_priv)
     69{
     70	struct drm_vc4_get_hang_state *get_state = data;
     71	struct drm_vc4_get_hang_state_bo *bo_state;
     72	struct vc4_hang_state *kernel_state;
     73	struct drm_vc4_get_hang_state *state;
     74	struct vc4_dev *vc4 = to_vc4_dev(dev);
     75	unsigned long irqflags;
     76	u32 i;
     77	int ret = 0;
     78
     79	if (WARN_ON_ONCE(vc4->is_vc5))
     80		return -ENODEV;
     81
     82	if (!vc4->v3d) {
     83		DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n");
     84		return -ENODEV;
     85	}
     86
     87	spin_lock_irqsave(&vc4->job_lock, irqflags);
     88	kernel_state = vc4->hang_state;
     89	if (!kernel_state) {
     90		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
     91		return -ENOENT;
     92	}
     93	state = &kernel_state->user_state;
     94
     95	/* If the user's array isn't big enough, just return the
     96	 * required array size.
     97	 */
     98	if (get_state->bo_count < state->bo_count) {
     99		get_state->bo_count = state->bo_count;
    100		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    101		return 0;
    102	}
    103
    104	vc4->hang_state = NULL;
    105	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    106
    107	/* Save the user's BO pointer, so we don't stomp it with the memcpy. */
    108	state->bo = get_state->bo;
    109	memcpy(get_state, state, sizeof(*state));
    110
    111	bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
    112	if (!bo_state) {
    113		ret = -ENOMEM;
    114		goto err_free;
    115	}
    116
    117	for (i = 0; i < state->bo_count; i++) {
    118		struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
    119		u32 handle;
    120
    121		ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
    122					    &handle);
    123
    124		if (ret) {
    125			state->bo_count = i;
    126			goto err_delete_handle;
    127		}
    128		bo_state[i].handle = handle;
    129		bo_state[i].paddr = vc4_bo->base.paddr;
    130		bo_state[i].size = vc4_bo->base.base.size;
    131	}
    132
    133	if (copy_to_user(u64_to_user_ptr(get_state->bo),
    134			 bo_state,
    135			 state->bo_count * sizeof(*bo_state)))
    136		ret = -EFAULT;
    137
    138err_delete_handle:
    139	if (ret) {
    140		for (i = 0; i < state->bo_count; i++)
    141			drm_gem_handle_delete(file_priv, bo_state[i].handle);
    142	}
    143
    144err_free:
    145	vc4_free_hang_state(dev, kernel_state);
    146	kfree(bo_state);
    147
    148	return ret;
    149}
    150
    151static void
    152vc4_save_hang_state(struct drm_device *dev)
    153{
    154	struct vc4_dev *vc4 = to_vc4_dev(dev);
    155	struct drm_vc4_get_hang_state *state;
    156	struct vc4_hang_state *kernel_state;
    157	struct vc4_exec_info *exec[2];
    158	struct vc4_bo *bo;
    159	unsigned long irqflags;
    160	unsigned int i, j, k, unref_list_count;
    161
    162	kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
    163	if (!kernel_state)
    164		return;
    165
    166	state = &kernel_state->user_state;
    167
    168	spin_lock_irqsave(&vc4->job_lock, irqflags);
    169	exec[0] = vc4_first_bin_job(vc4);
    170	exec[1] = vc4_first_render_job(vc4);
    171	if (!exec[0] && !exec[1]) {
    172		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    173		return;
    174	}
    175
    176	/* Get the bos from both binner and renderer into hang state. */
    177	state->bo_count = 0;
    178	for (i = 0; i < 2; i++) {
    179		if (!exec[i])
    180			continue;
    181
    182		unref_list_count = 0;
    183		list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
    184			unref_list_count++;
    185		state->bo_count += exec[i]->bo_count + unref_list_count;
    186	}
    187
    188	kernel_state->bo = kcalloc(state->bo_count,
    189				   sizeof(*kernel_state->bo), GFP_ATOMIC);
    190
    191	if (!kernel_state->bo) {
    192		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    193		return;
    194	}
    195
    196	k = 0;
    197	for (i = 0; i < 2; i++) {
    198		if (!exec[i])
    199			continue;
    200
    201		for (j = 0; j < exec[i]->bo_count; j++) {
    202			bo = to_vc4_bo(&exec[i]->bo[j]->base);
    203
    204			/* Retain BOs just in case they were marked purgeable.
    205			 * This prevents the BO from being purged before
    206			 * someone had a chance to dump the hang state.
    207			 */
    208			WARN_ON(!refcount_read(&bo->usecnt));
    209			refcount_inc(&bo->usecnt);
    210			drm_gem_object_get(&exec[i]->bo[j]->base);
    211			kernel_state->bo[k++] = &exec[i]->bo[j]->base;
    212		}
    213
    214		list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
    215			/* No need to retain BOs coming from the ->unref_list
    216			 * because they are naturally unpurgeable.
    217			 */
    218			drm_gem_object_get(&bo->base.base);
    219			kernel_state->bo[k++] = &bo->base.base;
    220		}
    221	}
    222
    223	WARN_ON_ONCE(k != state->bo_count);
    224
    225	if (exec[0])
    226		state->start_bin = exec[0]->ct0ca;
    227	if (exec[1])
    228		state->start_render = exec[1]->ct1ca;
    229
    230	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    231
    232	state->ct0ca = V3D_READ(V3D_CTNCA(0));
    233	state->ct0ea = V3D_READ(V3D_CTNEA(0));
    234
    235	state->ct1ca = V3D_READ(V3D_CTNCA(1));
    236	state->ct1ea = V3D_READ(V3D_CTNEA(1));
    237
    238	state->ct0cs = V3D_READ(V3D_CTNCS(0));
    239	state->ct1cs = V3D_READ(V3D_CTNCS(1));
    240
    241	state->ct0ra0 = V3D_READ(V3D_CT00RA0);
    242	state->ct1ra0 = V3D_READ(V3D_CT01RA0);
    243
    244	state->bpca = V3D_READ(V3D_BPCA);
    245	state->bpcs = V3D_READ(V3D_BPCS);
    246	state->bpoa = V3D_READ(V3D_BPOA);
    247	state->bpos = V3D_READ(V3D_BPOS);
    248
    249	state->vpmbase = V3D_READ(V3D_VPMBASE);
    250
    251	state->dbge = V3D_READ(V3D_DBGE);
    252	state->fdbgo = V3D_READ(V3D_FDBGO);
    253	state->fdbgb = V3D_READ(V3D_FDBGB);
    254	state->fdbgr = V3D_READ(V3D_FDBGR);
    255	state->fdbgs = V3D_READ(V3D_FDBGS);
    256	state->errstat = V3D_READ(V3D_ERRSTAT);
    257
    258	/* We need to turn purgeable BOs into unpurgeable ones so that
    259	 * userspace has a chance to dump the hang state before the kernel
    260	 * decides to purge those BOs.
    261	 * Note that BO consistency at dump time cannot be guaranteed. For
    262	 * example, if the owner of these BOs decides to re-use them or mark
    263	 * them purgeable again there's nothing we can do to prevent it.
    264	 */
    265	for (i = 0; i < kernel_state->user_state.bo_count; i++) {
    266		struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
    267
    268		if (bo->madv == __VC4_MADV_NOTSUPP)
    269			continue;
    270
    271		mutex_lock(&bo->madv_lock);
    272		if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
    273			bo->madv = VC4_MADV_WILLNEED;
    274		refcount_dec(&bo->usecnt);
    275		mutex_unlock(&bo->madv_lock);
    276	}
    277
    278	spin_lock_irqsave(&vc4->job_lock, irqflags);
    279	if (vc4->hang_state) {
    280		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    281		vc4_free_hang_state(dev, kernel_state);
    282	} else {
    283		vc4->hang_state = kernel_state;
    284		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    285	}
    286}
    287
    288static void
    289vc4_reset(struct drm_device *dev)
    290{
    291	struct vc4_dev *vc4 = to_vc4_dev(dev);
    292
    293	DRM_INFO("Resetting GPU.\n");
    294
    295	mutex_lock(&vc4->power_lock);
    296	if (vc4->power_refcount) {
    297		/* Power the device off and back on the by dropping the
    298		 * reference on runtime PM.
    299		 */
    300		pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
    301		pm_runtime_get_sync(&vc4->v3d->pdev->dev);
    302	}
    303	mutex_unlock(&vc4->power_lock);
    304
    305	vc4_irq_reset(dev);
    306
    307	/* Rearm the hangcheck -- another job might have been waiting
    308	 * for our hung one to get kicked off, and vc4_irq_reset()
    309	 * would have started it.
    310	 */
    311	vc4_queue_hangcheck(dev);
    312}
    313
    314static void
    315vc4_reset_work(struct work_struct *work)
    316{
    317	struct vc4_dev *vc4 =
    318		container_of(work, struct vc4_dev, hangcheck.reset_work);
    319
    320	vc4_save_hang_state(&vc4->base);
    321
    322	vc4_reset(&vc4->base);
    323}
    324
    325static void
    326vc4_hangcheck_elapsed(struct timer_list *t)
    327{
    328	struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer);
    329	struct drm_device *dev = &vc4->base;
    330	uint32_t ct0ca, ct1ca;
    331	unsigned long irqflags;
    332	struct vc4_exec_info *bin_exec, *render_exec;
    333
    334	spin_lock_irqsave(&vc4->job_lock, irqflags);
    335
    336	bin_exec = vc4_first_bin_job(vc4);
    337	render_exec = vc4_first_render_job(vc4);
    338
    339	/* If idle, we can stop watching for hangs. */
    340	if (!bin_exec && !render_exec) {
    341		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    342		return;
    343	}
    344
    345	ct0ca = V3D_READ(V3D_CTNCA(0));
    346	ct1ca = V3D_READ(V3D_CTNCA(1));
    347
    348	/* If we've made any progress in execution, rearm the timer
    349	 * and wait.
    350	 */
    351	if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
    352	    (render_exec && ct1ca != render_exec->last_ct1ca)) {
    353		if (bin_exec)
    354			bin_exec->last_ct0ca = ct0ca;
    355		if (render_exec)
    356			render_exec->last_ct1ca = ct1ca;
    357		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    358		vc4_queue_hangcheck(dev);
    359		return;
    360	}
    361
    362	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    363
    364	/* We've gone too long with no progress, reset.  This has to
    365	 * be done from a work struct, since resetting can sleep and
    366	 * this timer hook isn't allowed to.
    367	 */
    368	schedule_work(&vc4->hangcheck.reset_work);
    369}
    370
    371static void
    372submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
    373{
    374	struct vc4_dev *vc4 = to_vc4_dev(dev);
    375
    376	/* Set the current and end address of the control list.
    377	 * Writing the end register is what starts the job.
    378	 */
    379	V3D_WRITE(V3D_CTNCA(thread), start);
    380	V3D_WRITE(V3D_CTNEA(thread), end);
    381}
    382
    383int
    384vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
    385		   bool interruptible)
    386{
    387	struct vc4_dev *vc4 = to_vc4_dev(dev);
    388	int ret = 0;
    389	unsigned long timeout_expire;
    390	DEFINE_WAIT(wait);
    391
    392	if (WARN_ON_ONCE(vc4->is_vc5))
    393		return -ENODEV;
    394
    395	if (vc4->finished_seqno >= seqno)
    396		return 0;
    397
    398	if (timeout_ns == 0)
    399		return -ETIME;
    400
    401	timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
    402
    403	trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
    404	for (;;) {
    405		prepare_to_wait(&vc4->job_wait_queue, &wait,
    406				interruptible ? TASK_INTERRUPTIBLE :
    407				TASK_UNINTERRUPTIBLE);
    408
    409		if (interruptible && signal_pending(current)) {
    410			ret = -ERESTARTSYS;
    411			break;
    412		}
    413
    414		if (vc4->finished_seqno >= seqno)
    415			break;
    416
    417		if (timeout_ns != ~0ull) {
    418			if (time_after_eq(jiffies, timeout_expire)) {
    419				ret = -ETIME;
    420				break;
    421			}
    422			schedule_timeout(timeout_expire - jiffies);
    423		} else {
    424			schedule();
    425		}
    426	}
    427
    428	finish_wait(&vc4->job_wait_queue, &wait);
    429	trace_vc4_wait_for_seqno_end(dev, seqno);
    430
    431	return ret;
    432}
    433
    434static void
    435vc4_flush_caches(struct drm_device *dev)
    436{
    437	struct vc4_dev *vc4 = to_vc4_dev(dev);
    438
    439	/* Flush the GPU L2 caches.  These caches sit on top of system
    440	 * L3 (the 128kb or so shared with the CPU), and are
    441	 * non-allocating in the L3.
    442	 */
    443	V3D_WRITE(V3D_L2CACTL,
    444		  V3D_L2CACTL_L2CCLR);
    445
    446	V3D_WRITE(V3D_SLCACTL,
    447		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
    448		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
    449		  VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
    450		  VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
    451}
    452
    453static void
    454vc4_flush_texture_caches(struct drm_device *dev)
    455{
    456	struct vc4_dev *vc4 = to_vc4_dev(dev);
    457
    458	V3D_WRITE(V3D_L2CACTL,
    459		  V3D_L2CACTL_L2CCLR);
    460
    461	V3D_WRITE(V3D_SLCACTL,
    462		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
    463		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC));
    464}
    465
    466/* Sets the registers for the next job to be actually be executed in
    467 * the hardware.
    468 *
    469 * The job_lock should be held during this.
    470 */
    471void
    472vc4_submit_next_bin_job(struct drm_device *dev)
    473{
    474	struct vc4_dev *vc4 = to_vc4_dev(dev);
    475	struct vc4_exec_info *exec;
    476
    477	if (WARN_ON_ONCE(vc4->is_vc5))
    478		return;
    479
    480again:
    481	exec = vc4_first_bin_job(vc4);
    482	if (!exec)
    483		return;
    484
    485	vc4_flush_caches(dev);
    486
    487	/* Only start the perfmon if it was not already started by a previous
    488	 * job.
    489	 */
    490	if (exec->perfmon && vc4->active_perfmon != exec->perfmon)
    491		vc4_perfmon_start(vc4, exec->perfmon);
    492
    493	/* Either put the job in the binner if it uses the binner, or
    494	 * immediately move it to the to-be-rendered queue.
    495	 */
    496	if (exec->ct0ca != exec->ct0ea) {
    497		trace_vc4_submit_cl(dev, false, exec->seqno, exec->ct0ca,
    498				    exec->ct0ea);
    499		submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
    500	} else {
    501		struct vc4_exec_info *next;
    502
    503		vc4_move_job_to_render(dev, exec);
    504		next = vc4_first_bin_job(vc4);
    505
    506		/* We can't start the next bin job if the previous job had a
    507		 * different perfmon instance attached to it. The same goes
    508		 * if one of them had a perfmon attached to it and the other
    509		 * one doesn't.
    510		 */
    511		if (next && next->perfmon == exec->perfmon)
    512			goto again;
    513	}
    514}
    515
    516void
    517vc4_submit_next_render_job(struct drm_device *dev)
    518{
    519	struct vc4_dev *vc4 = to_vc4_dev(dev);
    520	struct vc4_exec_info *exec = vc4_first_render_job(vc4);
    521
    522	if (!exec)
    523		return;
    524
    525	if (WARN_ON_ONCE(vc4->is_vc5))
    526		return;
    527
    528	/* A previous RCL may have written to one of our textures, and
    529	 * our full cache flush at bin time may have occurred before
    530	 * that RCL completed.  Flush the texture cache now, but not
    531	 * the instructions or uniforms (since we don't write those
    532	 * from an RCL).
    533	 */
    534	vc4_flush_texture_caches(dev);
    535
    536	trace_vc4_submit_cl(dev, true, exec->seqno, exec->ct1ca, exec->ct1ea);
    537	submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
    538}
    539
    540void
    541vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
    542{
    543	struct vc4_dev *vc4 = to_vc4_dev(dev);
    544	bool was_empty = list_empty(&vc4->render_job_list);
    545
    546	if (WARN_ON_ONCE(vc4->is_vc5))
    547		return;
    548
    549	list_move_tail(&exec->head, &vc4->render_job_list);
    550	if (was_empty)
    551		vc4_submit_next_render_job(dev);
    552}
    553
    554static void
    555vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
    556{
    557	struct vc4_bo *bo;
    558	unsigned i;
    559
    560	for (i = 0; i < exec->bo_count; i++) {
    561		bo = to_vc4_bo(&exec->bo[i]->base);
    562		bo->seqno = seqno;
    563
    564		dma_resv_add_fence(bo->base.base.resv, exec->fence,
    565				   DMA_RESV_USAGE_READ);
    566	}
    567
    568	list_for_each_entry(bo, &exec->unref_list, unref_head) {
    569		bo->seqno = seqno;
    570	}
    571
    572	for (i = 0; i < exec->rcl_write_bo_count; i++) {
    573		bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
    574		bo->write_seqno = seqno;
    575
    576		dma_resv_add_fence(bo->base.base.resv, exec->fence,
    577				   DMA_RESV_USAGE_WRITE);
    578	}
    579}
    580
    581static void
    582vc4_unlock_bo_reservations(struct drm_device *dev,
    583			   struct vc4_exec_info *exec,
    584			   struct ww_acquire_ctx *acquire_ctx)
    585{
    586	int i;
    587
    588	for (i = 0; i < exec->bo_count; i++) {
    589		struct drm_gem_object *bo = &exec->bo[i]->base;
    590
    591		dma_resv_unlock(bo->resv);
    592	}
    593
    594	ww_acquire_fini(acquire_ctx);
    595}
    596
    597/* Takes the reservation lock on all the BOs being referenced, so that
    598 * at queue submit time we can update the reservations.
    599 *
    600 * We don't lock the RCL the tile alloc/state BOs, or overflow memory
    601 * (all of which are on exec->unref_list).  They're entirely private
    602 * to vc4, so we don't attach dma-buf fences to them.
    603 */
    604static int
    605vc4_lock_bo_reservations(struct drm_device *dev,
    606			 struct vc4_exec_info *exec,
    607			 struct ww_acquire_ctx *acquire_ctx)
    608{
    609	int contended_lock = -1;
    610	int i, ret;
    611	struct drm_gem_object *bo;
    612
    613	ww_acquire_init(acquire_ctx, &reservation_ww_class);
    614
    615retry:
    616	if (contended_lock != -1) {
    617		bo = &exec->bo[contended_lock]->base;
    618		ret = dma_resv_lock_slow_interruptible(bo->resv, acquire_ctx);
    619		if (ret) {
    620			ww_acquire_done(acquire_ctx);
    621			return ret;
    622		}
    623	}
    624
    625	for (i = 0; i < exec->bo_count; i++) {
    626		if (i == contended_lock)
    627			continue;
    628
    629		bo = &exec->bo[i]->base;
    630
    631		ret = dma_resv_lock_interruptible(bo->resv, acquire_ctx);
    632		if (ret) {
    633			int j;
    634
    635			for (j = 0; j < i; j++) {
    636				bo = &exec->bo[j]->base;
    637				dma_resv_unlock(bo->resv);
    638			}
    639
    640			if (contended_lock != -1 && contended_lock >= i) {
    641				bo = &exec->bo[contended_lock]->base;
    642
    643				dma_resv_unlock(bo->resv);
    644			}
    645
    646			if (ret == -EDEADLK) {
    647				contended_lock = i;
    648				goto retry;
    649			}
    650
    651			ww_acquire_done(acquire_ctx);
    652			return ret;
    653		}
    654	}
    655
    656	ww_acquire_done(acquire_ctx);
    657
    658	/* Reserve space for our shared (read-only) fence references,
    659	 * before we commit the CL to the hardware.
    660	 */
    661	for (i = 0; i < exec->bo_count; i++) {
    662		bo = &exec->bo[i]->base;
    663
    664		ret = dma_resv_reserve_fences(bo->resv, 1);
    665		if (ret) {
    666			vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
    667			return ret;
    668		}
    669	}
    670
    671	return 0;
    672}
    673
    674/* Queues a struct vc4_exec_info for execution.  If no job is
    675 * currently executing, then submits it.
    676 *
    677 * Unlike most GPUs, our hardware only handles one command list at a
    678 * time.  To queue multiple jobs at once, we'd need to edit the
    679 * previous command list to have a jump to the new one at the end, and
    680 * then bump the end address.  That's a change for a later date,
    681 * though.
    682 */
    683static int
    684vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
    685		 struct ww_acquire_ctx *acquire_ctx,
    686		 struct drm_syncobj *out_sync)
    687{
    688	struct vc4_dev *vc4 = to_vc4_dev(dev);
    689	struct vc4_exec_info *renderjob;
    690	uint64_t seqno;
    691	unsigned long irqflags;
    692	struct vc4_fence *fence;
    693
    694	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
    695	if (!fence)
    696		return -ENOMEM;
    697	fence->dev = dev;
    698
    699	spin_lock_irqsave(&vc4->job_lock, irqflags);
    700
    701	seqno = ++vc4->emit_seqno;
    702	exec->seqno = seqno;
    703
    704	dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
    705		       vc4->dma_fence_context, exec->seqno);
    706	fence->seqno = exec->seqno;
    707	exec->fence = &fence->base;
    708
    709	if (out_sync)
    710		drm_syncobj_replace_fence(out_sync, exec->fence);
    711
    712	vc4_update_bo_seqnos(exec, seqno);
    713
    714	vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
    715
    716	list_add_tail(&exec->head, &vc4->bin_job_list);
    717
    718	/* If no bin job was executing and if the render job (if any) has the
    719	 * same perfmon as our job attached to it (or if both jobs don't have
    720	 * perfmon activated), then kick ours off.  Otherwise, it'll get
    721	 * started when the previous job's flush/render done interrupt occurs.
    722	 */
    723	renderjob = vc4_first_render_job(vc4);
    724	if (vc4_first_bin_job(vc4) == exec &&
    725	    (!renderjob || renderjob->perfmon == exec->perfmon)) {
    726		vc4_submit_next_bin_job(dev);
    727		vc4_queue_hangcheck(dev);
    728	}
    729
    730	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    731
    732	return 0;
    733}
    734
    735/**
    736 * vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
    737 * referenced by the job.
    738 * @dev: DRM device
    739 * @file_priv: DRM file for this fd
    740 * @exec: V3D job being set up
    741 *
    742 * The command validator needs to reference BOs by their index within
    743 * the submitted job's BO list.  This does the validation of the job's
    744 * BO list and reference counting for the lifetime of the job.
    745 */
    746static int
    747vc4_cl_lookup_bos(struct drm_device *dev,
    748		  struct drm_file *file_priv,
    749		  struct vc4_exec_info *exec)
    750{
    751	struct drm_vc4_submit_cl *args = exec->args;
    752	uint32_t *handles;
    753	int ret = 0;
    754	int i;
    755
    756	exec->bo_count = args->bo_handle_count;
    757
    758	if (!exec->bo_count) {
    759		/* See comment on bo_index for why we have to check
    760		 * this.
    761		 */
    762		DRM_DEBUG("Rendering requires BOs to validate\n");
    763		return -EINVAL;
    764	}
    765
    766	exec->bo = kvmalloc_array(exec->bo_count,
    767				    sizeof(struct drm_gem_cma_object *),
    768				    GFP_KERNEL | __GFP_ZERO);
    769	if (!exec->bo) {
    770		DRM_ERROR("Failed to allocate validated BO pointers\n");
    771		return -ENOMEM;
    772	}
    773
    774	handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL);
    775	if (!handles) {
    776		ret = -ENOMEM;
    777		DRM_ERROR("Failed to allocate incoming GEM handles\n");
    778		goto fail;
    779	}
    780
    781	if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles),
    782			   exec->bo_count * sizeof(uint32_t))) {
    783		ret = -EFAULT;
    784		DRM_ERROR("Failed to copy in GEM handles\n");
    785		goto fail;
    786	}
    787
    788	spin_lock(&file_priv->table_lock);
    789	for (i = 0; i < exec->bo_count; i++) {
    790		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
    791						     handles[i]);
    792		if (!bo) {
    793			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
    794				  i, handles[i]);
    795			ret = -EINVAL;
    796			break;
    797		}
    798
    799		drm_gem_object_get(bo);
    800		exec->bo[i] = (struct drm_gem_cma_object *)bo;
    801	}
    802	spin_unlock(&file_priv->table_lock);
    803
    804	if (ret)
    805		goto fail_put_bo;
    806
    807	for (i = 0; i < exec->bo_count; i++) {
    808		ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base));
    809		if (ret)
    810			goto fail_dec_usecnt;
    811	}
    812
    813	kvfree(handles);
    814	return 0;
    815
    816fail_dec_usecnt:
    817	/* Decrease usecnt on acquired objects.
    818	 * We cannot rely on  vc4_complete_exec() to release resources here,
    819	 * because vc4_complete_exec() has no information about which BO has
    820	 * had its ->usecnt incremented.
    821	 * To make things easier we just free everything explicitly and set
    822	 * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
    823	 * step.
    824	 */
    825	for (i-- ; i >= 0; i--)
    826		vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base));
    827
    828fail_put_bo:
    829	/* Release any reference to acquired objects. */
    830	for (i = 0; i < exec->bo_count && exec->bo[i]; i++)
    831		drm_gem_object_put(&exec->bo[i]->base);
    832
    833fail:
    834	kvfree(handles);
    835	kvfree(exec->bo);
    836	exec->bo = NULL;
    837	return ret;
    838}
    839
    840static int
    841vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
    842{
    843	struct drm_vc4_submit_cl *args = exec->args;
    844	struct vc4_dev *vc4 = to_vc4_dev(dev);
    845	void *temp = NULL;
    846	void *bin;
    847	int ret = 0;
    848	uint32_t bin_offset = 0;
    849	uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
    850					     16);
    851	uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
    852	uint32_t exec_size = uniforms_offset + args->uniforms_size;
    853	uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
    854					  args->shader_rec_count);
    855	struct vc4_bo *bo;
    856
    857	if (shader_rec_offset < args->bin_cl_size ||
    858	    uniforms_offset < shader_rec_offset ||
    859	    exec_size < uniforms_offset ||
    860	    args->shader_rec_count >= (UINT_MAX /
    861					  sizeof(struct vc4_shader_state)) ||
    862	    temp_size < exec_size) {
    863		DRM_DEBUG("overflow in exec arguments\n");
    864		ret = -EINVAL;
    865		goto fail;
    866	}
    867
    868	/* Allocate space where we'll store the copied in user command lists
    869	 * and shader records.
    870	 *
    871	 * We don't just copy directly into the BOs because we need to
    872	 * read the contents back for validation, and I think the
    873	 * bo->vaddr is uncached access.
    874	 */
    875	temp = kvmalloc_array(temp_size, 1, GFP_KERNEL);
    876	if (!temp) {
    877		DRM_ERROR("Failed to allocate storage for copying "
    878			  "in bin/render CLs.\n");
    879		ret = -ENOMEM;
    880		goto fail;
    881	}
    882	bin = temp + bin_offset;
    883	exec->shader_rec_u = temp + shader_rec_offset;
    884	exec->uniforms_u = temp + uniforms_offset;
    885	exec->shader_state = temp + exec_size;
    886	exec->shader_state_size = args->shader_rec_count;
    887
    888	if (copy_from_user(bin,
    889			   u64_to_user_ptr(args->bin_cl),
    890			   args->bin_cl_size)) {
    891		ret = -EFAULT;
    892		goto fail;
    893	}
    894
    895	if (copy_from_user(exec->shader_rec_u,
    896			   u64_to_user_ptr(args->shader_rec),
    897			   args->shader_rec_size)) {
    898		ret = -EFAULT;
    899		goto fail;
    900	}
    901
    902	if (copy_from_user(exec->uniforms_u,
    903			   u64_to_user_ptr(args->uniforms),
    904			   args->uniforms_size)) {
    905		ret = -EFAULT;
    906		goto fail;
    907	}
    908
    909	bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL);
    910	if (IS_ERR(bo)) {
    911		DRM_ERROR("Couldn't allocate BO for binning\n");
    912		ret = PTR_ERR(bo);
    913		goto fail;
    914	}
    915	exec->exec_bo = &bo->base;
    916
    917	list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
    918		      &exec->unref_list);
    919
    920	exec->ct0ca = exec->exec_bo->paddr + bin_offset;
    921
    922	exec->bin_u = bin;
    923
    924	exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
    925	exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
    926	exec->shader_rec_size = args->shader_rec_size;
    927
    928	exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
    929	exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
    930	exec->uniforms_size = args->uniforms_size;
    931
    932	ret = vc4_validate_bin_cl(dev,
    933				  exec->exec_bo->vaddr + bin_offset,
    934				  bin,
    935				  exec);
    936	if (ret)
    937		goto fail;
    938
    939	ret = vc4_validate_shader_recs(dev, exec);
    940	if (ret)
    941		goto fail;
    942
    943	if (exec->found_tile_binning_mode_config_packet) {
    944		ret = vc4_v3d_bin_bo_get(vc4, &exec->bin_bo_used);
    945		if (ret)
    946			goto fail;
    947	}
    948
    949	/* Block waiting on any previous rendering into the CS's VBO,
    950	 * IB, or textures, so that pixels are actually written by the
    951	 * time we try to read them.
    952	 */
    953	ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
    954
    955fail:
    956	kvfree(temp);
    957	return ret;
    958}
    959
    960static void
    961vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
    962{
    963	struct vc4_dev *vc4 = to_vc4_dev(dev);
    964	unsigned long irqflags;
    965	unsigned i;
    966
    967	/* If we got force-completed because of GPU reset rather than
    968	 * through our IRQ handler, signal the fence now.
    969	 */
    970	if (exec->fence) {
    971		dma_fence_signal(exec->fence);
    972		dma_fence_put(exec->fence);
    973	}
    974
    975	if (exec->bo) {
    976		for (i = 0; i < exec->bo_count; i++) {
    977			struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
    978
    979			vc4_bo_dec_usecnt(bo);
    980			drm_gem_object_put(&exec->bo[i]->base);
    981		}
    982		kvfree(exec->bo);
    983	}
    984
    985	while (!list_empty(&exec->unref_list)) {
    986		struct vc4_bo *bo = list_first_entry(&exec->unref_list,
    987						     struct vc4_bo, unref_head);
    988		list_del(&bo->unref_head);
    989		drm_gem_object_put(&bo->base.base);
    990	}
    991
    992	/* Free up the allocation of any bin slots we used. */
    993	spin_lock_irqsave(&vc4->job_lock, irqflags);
    994	vc4->bin_alloc_used &= ~exec->bin_slots;
    995	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
    996
    997	/* Release the reference on the binner BO if needed. */
    998	if (exec->bin_bo_used)
    999		vc4_v3d_bin_bo_put(vc4);
   1000
   1001	/* Release the reference we had on the perf monitor. */
   1002	vc4_perfmon_put(exec->perfmon);
   1003
   1004	vc4_v3d_pm_put(vc4);
   1005
   1006	kfree(exec);
   1007}
   1008
   1009void
   1010vc4_job_handle_completed(struct vc4_dev *vc4)
   1011{
   1012	unsigned long irqflags;
   1013	struct vc4_seqno_cb *cb, *cb_temp;
   1014
   1015	if (WARN_ON_ONCE(vc4->is_vc5))
   1016		return;
   1017
   1018	spin_lock_irqsave(&vc4->job_lock, irqflags);
   1019	while (!list_empty(&vc4->job_done_list)) {
   1020		struct vc4_exec_info *exec =
   1021			list_first_entry(&vc4->job_done_list,
   1022					 struct vc4_exec_info, head);
   1023		list_del(&exec->head);
   1024
   1025		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
   1026		vc4_complete_exec(&vc4->base, exec);
   1027		spin_lock_irqsave(&vc4->job_lock, irqflags);
   1028	}
   1029
   1030	list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
   1031		if (cb->seqno <= vc4->finished_seqno) {
   1032			list_del_init(&cb->work.entry);
   1033			schedule_work(&cb->work);
   1034		}
   1035	}
   1036
   1037	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
   1038}
   1039
   1040static void vc4_seqno_cb_work(struct work_struct *work)
   1041{
   1042	struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
   1043
   1044	cb->func(cb);
   1045}
   1046
   1047int vc4_queue_seqno_cb(struct drm_device *dev,
   1048		       struct vc4_seqno_cb *cb, uint64_t seqno,
   1049		       void (*func)(struct vc4_seqno_cb *cb))
   1050{
   1051	struct vc4_dev *vc4 = to_vc4_dev(dev);
   1052	unsigned long irqflags;
   1053
   1054	if (WARN_ON_ONCE(vc4->is_vc5))
   1055		return -ENODEV;
   1056
   1057	cb->func = func;
   1058	INIT_WORK(&cb->work, vc4_seqno_cb_work);
   1059
   1060	spin_lock_irqsave(&vc4->job_lock, irqflags);
   1061	if (seqno > vc4->finished_seqno) {
   1062		cb->seqno = seqno;
   1063		list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
   1064	} else {
   1065		schedule_work(&cb->work);
   1066	}
   1067	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
   1068
   1069	return 0;
   1070}
   1071
   1072/* Scheduled when any job has been completed, this walks the list of
   1073 * jobs that had completed and unrefs their BOs and frees their exec
   1074 * structs.
   1075 */
   1076static void
   1077vc4_job_done_work(struct work_struct *work)
   1078{
   1079	struct vc4_dev *vc4 =
   1080		container_of(work, struct vc4_dev, job_done_work);
   1081
   1082	vc4_job_handle_completed(vc4);
   1083}
   1084
   1085static int
   1086vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
   1087				uint64_t seqno,
   1088				uint64_t *timeout_ns)
   1089{
   1090	unsigned long start = jiffies;
   1091	int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
   1092
   1093	if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
   1094		uint64_t delta = jiffies_to_nsecs(jiffies - start);
   1095
   1096		if (*timeout_ns >= delta)
   1097			*timeout_ns -= delta;
   1098	}
   1099
   1100	return ret;
   1101}
   1102
   1103int
   1104vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
   1105		     struct drm_file *file_priv)
   1106{
   1107	struct vc4_dev *vc4 = to_vc4_dev(dev);
   1108	struct drm_vc4_wait_seqno *args = data;
   1109
   1110	if (WARN_ON_ONCE(vc4->is_vc5))
   1111		return -ENODEV;
   1112
   1113	return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
   1114					       &args->timeout_ns);
   1115}
   1116
   1117int
   1118vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
   1119		  struct drm_file *file_priv)
   1120{
   1121	struct vc4_dev *vc4 = to_vc4_dev(dev);
   1122	int ret;
   1123	struct drm_vc4_wait_bo *args = data;
   1124	struct drm_gem_object *gem_obj;
   1125	struct vc4_bo *bo;
   1126
   1127	if (WARN_ON_ONCE(vc4->is_vc5))
   1128		return -ENODEV;
   1129
   1130	if (args->pad != 0)
   1131		return -EINVAL;
   1132
   1133	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
   1134	if (!gem_obj) {
   1135		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
   1136		return -EINVAL;
   1137	}
   1138	bo = to_vc4_bo(gem_obj);
   1139
   1140	ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
   1141					      &args->timeout_ns);
   1142
   1143	drm_gem_object_put(gem_obj);
   1144	return ret;
   1145}
   1146
   1147/**
   1148 * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4.
   1149 * @dev: DRM device
   1150 * @data: ioctl argument
   1151 * @file_priv: DRM file for this fd
   1152 *
   1153 * This is the main entrypoint for userspace to submit a 3D frame to
   1154 * the GPU.  Userspace provides the binner command list (if
   1155 * applicable), and the kernel sets up the render command list to draw
   1156 * to the framebuffer described in the ioctl, using the command lists
   1157 * that the 3D engine's binner will produce.
   1158 */
   1159int
   1160vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
   1161		    struct drm_file *file_priv)
   1162{
   1163	struct vc4_dev *vc4 = to_vc4_dev(dev);
   1164	struct vc4_file *vc4file = file_priv->driver_priv;
   1165	struct drm_vc4_submit_cl *args = data;
   1166	struct drm_syncobj *out_sync = NULL;
   1167	struct vc4_exec_info *exec;
   1168	struct ww_acquire_ctx acquire_ctx;
   1169	struct dma_fence *in_fence;
   1170	int ret = 0;
   1171
   1172	trace_vc4_submit_cl_ioctl(dev, args->bin_cl_size,
   1173				  args->shader_rec_size,
   1174				  args->bo_handle_count);
   1175
   1176	if (WARN_ON_ONCE(vc4->is_vc5))
   1177		return -ENODEV;
   1178
   1179	if (!vc4->v3d) {
   1180		DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n");
   1181		return -ENODEV;
   1182	}
   1183
   1184	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
   1185			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
   1186			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
   1187			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
   1188		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
   1189		return -EINVAL;
   1190	}
   1191
   1192	if (args->pad2 != 0) {
   1193		DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2);
   1194		return -EINVAL;
   1195	}
   1196
   1197	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
   1198	if (!exec) {
   1199		DRM_ERROR("malloc failure on exec struct\n");
   1200		return -ENOMEM;
   1201	}
   1202	exec->dev = vc4;
   1203
   1204	ret = vc4_v3d_pm_get(vc4);
   1205	if (ret) {
   1206		kfree(exec);
   1207		return ret;
   1208	}
   1209
   1210	exec->args = args;
   1211	INIT_LIST_HEAD(&exec->unref_list);
   1212
   1213	ret = vc4_cl_lookup_bos(dev, file_priv, exec);
   1214	if (ret)
   1215		goto fail;
   1216
   1217	if (args->perfmonid) {
   1218		exec->perfmon = vc4_perfmon_find(vc4file,
   1219						 args->perfmonid);
   1220		if (!exec->perfmon) {
   1221			ret = -ENOENT;
   1222			goto fail;
   1223		}
   1224	}
   1225
   1226	if (args->in_sync) {
   1227		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
   1228					     0, 0, &in_fence);
   1229		if (ret)
   1230			goto fail;
   1231
   1232		/* When the fence (or fence array) is exclusively from our
   1233		 * context we can skip the wait since jobs are executed in
   1234		 * order of their submission through this ioctl and this can
   1235		 * only have fences from a prior job.
   1236		 */
   1237		if (!dma_fence_match_context(in_fence,
   1238					     vc4->dma_fence_context)) {
   1239			ret = dma_fence_wait(in_fence, true);
   1240			if (ret) {
   1241				dma_fence_put(in_fence);
   1242				goto fail;
   1243			}
   1244		}
   1245
   1246		dma_fence_put(in_fence);
   1247	}
   1248
   1249	if (exec->args->bin_cl_size != 0) {
   1250		ret = vc4_get_bcl(dev, exec);
   1251		if (ret)
   1252			goto fail;
   1253	} else {
   1254		exec->ct0ca = 0;
   1255		exec->ct0ea = 0;
   1256	}
   1257
   1258	ret = vc4_get_rcl(dev, exec);
   1259	if (ret)
   1260		goto fail;
   1261
   1262	ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
   1263	if (ret)
   1264		goto fail;
   1265
   1266	if (args->out_sync) {
   1267		out_sync = drm_syncobj_find(file_priv, args->out_sync);
   1268		if (!out_sync) {
   1269			ret = -EINVAL;
   1270			goto fail;
   1271		}
   1272
   1273		/* We replace the fence in out_sync in vc4_queue_submit since
   1274		 * the render job could execute immediately after that call.
   1275		 * If it finishes before our ioctl processing resumes the
   1276		 * render job fence could already have been freed.
   1277		 */
   1278	}
   1279
   1280	/* Clear this out of the struct we'll be putting in the queue,
   1281	 * since it's part of our stack.
   1282	 */
   1283	exec->args = NULL;
   1284
   1285	ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync);
   1286
   1287	/* The syncobj isn't part of the exec data and we need to free our
   1288	 * reference even if job submission failed.
   1289	 */
   1290	if (out_sync)
   1291		drm_syncobj_put(out_sync);
   1292
   1293	if (ret)
   1294		goto fail;
   1295
   1296	/* Return the seqno for our job. */
   1297	args->seqno = vc4->emit_seqno;
   1298
   1299	return 0;
   1300
   1301fail:
   1302	vc4_complete_exec(&vc4->base, exec);
   1303
   1304	return ret;
   1305}
   1306
   1307static void vc4_gem_destroy(struct drm_device *dev, void *unused);
   1308int vc4_gem_init(struct drm_device *dev)
   1309{
   1310	struct vc4_dev *vc4 = to_vc4_dev(dev);
   1311
   1312	if (WARN_ON_ONCE(vc4->is_vc5))
   1313		return -ENODEV;
   1314
   1315	vc4->dma_fence_context = dma_fence_context_alloc(1);
   1316
   1317	INIT_LIST_HEAD(&vc4->bin_job_list);
   1318	INIT_LIST_HEAD(&vc4->render_job_list);
   1319	INIT_LIST_HEAD(&vc4->job_done_list);
   1320	INIT_LIST_HEAD(&vc4->seqno_cb_list);
   1321	spin_lock_init(&vc4->job_lock);
   1322
   1323	INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
   1324	timer_setup(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, 0);
   1325
   1326	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
   1327
   1328	mutex_init(&vc4->power_lock);
   1329
   1330	INIT_LIST_HEAD(&vc4->purgeable.list);
   1331	mutex_init(&vc4->purgeable.lock);
   1332
   1333	return drmm_add_action_or_reset(dev, vc4_gem_destroy, NULL);
   1334}
   1335
   1336static void vc4_gem_destroy(struct drm_device *dev, void *unused)
   1337{
   1338	struct vc4_dev *vc4 = to_vc4_dev(dev);
   1339
   1340	/* Waiting for exec to finish would need to be done before
   1341	 * unregistering V3D.
   1342	 */
   1343	WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
   1344
   1345	/* V3D should already have disabled its interrupt and cleared
   1346	 * the overflow allocation registers.  Now free the object.
   1347	 */
   1348	if (vc4->bin_bo) {
   1349		drm_gem_object_put(&vc4->bin_bo->base.base);
   1350		vc4->bin_bo = NULL;
   1351	}
   1352
   1353	if (vc4->hang_state)
   1354		vc4_free_hang_state(dev, vc4->hang_state);
   1355}
   1356
   1357int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
   1358			  struct drm_file *file_priv)
   1359{
   1360	struct vc4_dev *vc4 = to_vc4_dev(dev);
   1361	struct drm_vc4_gem_madvise *args = data;
   1362	struct drm_gem_object *gem_obj;
   1363	struct vc4_bo *bo;
   1364	int ret;
   1365
   1366	if (WARN_ON_ONCE(vc4->is_vc5))
   1367		return -ENODEV;
   1368
   1369	switch (args->madv) {
   1370	case VC4_MADV_DONTNEED:
   1371	case VC4_MADV_WILLNEED:
   1372		break;
   1373	default:
   1374		return -EINVAL;
   1375	}
   1376
   1377	if (args->pad != 0)
   1378		return -EINVAL;
   1379
   1380	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
   1381	if (!gem_obj) {
   1382		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
   1383		return -ENOENT;
   1384	}
   1385
   1386	bo = to_vc4_bo(gem_obj);
   1387
   1388	/* Only BOs exposed to userspace can be purged. */
   1389	if (bo->madv == __VC4_MADV_NOTSUPP) {
   1390		DRM_DEBUG("madvise not supported on this BO\n");
   1391		ret = -EINVAL;
   1392		goto out_put_gem;
   1393	}
   1394
   1395	/* Not sure it's safe to purge imported BOs. Let's just assume it's
   1396	 * not until proven otherwise.
   1397	 */
   1398	if (gem_obj->import_attach) {
   1399		DRM_DEBUG("madvise not supported on imported BOs\n");
   1400		ret = -EINVAL;
   1401		goto out_put_gem;
   1402	}
   1403
   1404	mutex_lock(&bo->madv_lock);
   1405
   1406	if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED &&
   1407	    !refcount_read(&bo->usecnt)) {
   1408		/* If the BO is about to be marked as purgeable, is not used
   1409		 * and is not already purgeable or purged, add it to the
   1410		 * purgeable list.
   1411		 */
   1412		vc4_bo_add_to_purgeable_pool(bo);
   1413	} else if (args->madv == VC4_MADV_WILLNEED &&
   1414		   bo->madv == VC4_MADV_DONTNEED &&
   1415		   !refcount_read(&bo->usecnt)) {
   1416		/* The BO has not been purged yet, just remove it from
   1417		 * the purgeable list.
   1418		 */
   1419		vc4_bo_remove_from_purgeable_pool(bo);
   1420	}
   1421
   1422	/* Save the purged state. */
   1423	args->retained = bo->madv != __VC4_MADV_PURGED;
   1424
   1425	/* Update internal madv state only if the bo was not purged. */
   1426	if (bo->madv != __VC4_MADV_PURGED)
   1427		bo->madv = args->madv;
   1428
   1429	mutex_unlock(&bo->madv_lock);
   1430
   1431	ret = 0;
   1432
   1433out_put_gem:
   1434	drm_gem_object_put(gem_obj);
   1435
   1436	return ret;
   1437}