i915_gem_execbuffer.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
i915_gem_execbuffer.c (92929B)
      1/*
      2 * SPDX-License-Identifier: MIT
      3 *
      4 * Copyright © 2008,2010 Intel Corporation
      5 */
      6
      7#include <linux/dma-resv.h>
      8#include <linux/highmem.h>
      9#include <linux/intel-iommu.h>
     10#include <linux/sync_file.h>
     11#include <linux/uaccess.h>
     12
     13#include <drm/drm_syncobj.h>
     14
     15#include "display/intel_frontbuffer.h"
     16
     17#include "gem/i915_gem_ioctls.h"
     18#include "gt/intel_context.h"
     19#include "gt/intel_gpu_commands.h"
     20#include "gt/intel_gt.h"
     21#include "gt/intel_gt_buffer_pool.h"
     22#include "gt/intel_gt_pm.h"
     23#include "gt/intel_ring.h"
     24
     25#include "pxp/intel_pxp.h"
     26
     27#include "i915_cmd_parser.h"
     28#include "i915_drv.h"
     29#include "i915_file_private.h"
     30#include "i915_gem_clflush.h"
     31#include "i915_gem_context.h"
     32#include "i915_gem_evict.h"
     33#include "i915_gem_ioctls.h"
     34#include "i915_trace.h"
     35#include "i915_user_extensions.h"
     36
     37struct eb_vma {
     38	struct i915_vma *vma;
     39	unsigned int flags;
     40
     41	/** This vma's place in the execbuf reservation list */
     42	struct drm_i915_gem_exec_object2 *exec;
     43	struct list_head bind_link;
     44	struct list_head reloc_link;
     45
     46	struct hlist_node node;
     47	u32 handle;
     48};
     49
     50enum {
     51	FORCE_CPU_RELOC = 1,
     52	FORCE_GTT_RELOC,
     53	FORCE_GPU_RELOC,
     54#define DBG_FORCE_RELOC 0 /* choose one of the above! */
     55};
     56
     57/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
     58#define __EXEC_OBJECT_HAS_PIN		BIT(30)
     59#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
     60#define __EXEC_OBJECT_USERPTR_INIT	BIT(28)
     61#define __EXEC_OBJECT_NEEDS_MAP		BIT(27)
     62#define __EXEC_OBJECT_NEEDS_BIAS	BIT(26)
     63#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 26) /* all of the above + */
     64#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
     65
     66#define __EXEC_HAS_RELOC	BIT(31)
     67#define __EXEC_ENGINE_PINNED	BIT(30)
     68#define __EXEC_USERPTR_USED	BIT(29)
     69#define __EXEC_INTERNAL_FLAGS	(~0u << 29)
     70#define UPDATE			PIN_OFFSET_FIXED
     71
     72#define BATCH_OFFSET_BIAS (256*1024)
     73
     74#define __I915_EXEC_ILLEGAL_FLAGS \
     75	(__I915_EXEC_UNKNOWN_FLAGS | \
     76	 I915_EXEC_CONSTANTS_MASK  | \
     77	 I915_EXEC_RESOURCE_STREAMER)
     78
     79/* Catch emission of unexpected errors for CI! */
     80#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
     81#undef EINVAL
     82#define EINVAL ({ \
     83	DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \
     84	22; \
     85})
     86#endif
     87
     88/**
     89 * DOC: User command execution
     90 *
     91 * Userspace submits commands to be executed on the GPU as an instruction
     92 * stream within a GEM object we call a batchbuffer. This instructions may
     93 * refer to other GEM objects containing auxiliary state such as kernels,
     94 * samplers, render targets and even secondary batchbuffers. Userspace does
     95 * not know where in the GPU memory these objects reside and so before the
     96 * batchbuffer is passed to the GPU for execution, those addresses in the
     97 * batchbuffer and auxiliary objects are updated. This is known as relocation,
     98 * or patching. To try and avoid having to relocate each object on the next
     99 * execution, userspace is told the location of those objects in this pass,
    100 * but this remains just a hint as the kernel may choose a new location for
    101 * any object in the future.
    102 *
    103 * At the level of talking to the hardware, submitting a batchbuffer for the
    104 * GPU to execute is to add content to a buffer from which the HW
    105 * command streamer is reading.
    106 *
    107 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
    108 *    Execlists, this command is not placed on the same buffer as the
    109 *    remaining items.
    110 *
    111 * 2. Add a command to invalidate caches to the buffer.
    112 *
    113 * 3. Add a batchbuffer start command to the buffer; the start command is
    114 *    essentially a token together with the GPU address of the batchbuffer
    115 *    to be executed.
    116 *
    117 * 4. Add a pipeline flush to the buffer.
    118 *
    119 * 5. Add a memory write command to the buffer to record when the GPU
    120 *    is done executing the batchbuffer. The memory write writes the
    121 *    global sequence number of the request, ``i915_request::global_seqno``;
    122 *    the i915 driver uses the current value in the register to determine
    123 *    if the GPU has completed the batchbuffer.
    124 *
    125 * 6. Add a user interrupt command to the buffer. This command instructs
    126 *    the GPU to issue an interrupt when the command, pipeline flush and
    127 *    memory write are completed.
    128 *
    129 * 7. Inform the hardware of the additional commands added to the buffer
    130 *    (by updating the tail pointer).
    131 *
    132 * Processing an execbuf ioctl is conceptually split up into a few phases.
    133 *
    134 * 1. Validation - Ensure all the pointers, handles and flags are valid.
    135 * 2. Reservation - Assign GPU address space for every object
    136 * 3. Relocation - Update any addresses to point to the final locations
    137 * 4. Serialisation - Order the request with respect to its dependencies
    138 * 5. Construction - Construct a request to execute the batchbuffer
    139 * 6. Submission (at some point in the future execution)
    140 *
    141 * Reserving resources for the execbuf is the most complicated phase. We
    142 * neither want to have to migrate the object in the address space, nor do
    143 * we want to have to update any relocations pointing to this object. Ideally,
    144 * we want to leave the object where it is and for all the existing relocations
    145 * to match. If the object is given a new address, or if userspace thinks the
    146 * object is elsewhere, we have to parse all the relocation entries and update
    147 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
    148 * all the target addresses in all of its objects match the value in the
    149 * relocation entries and that they all match the presumed offsets given by the
    150 * list of execbuffer objects. Using this knowledge, we know that if we haven't
    151 * moved any buffers, all the relocation entries are valid and we can skip
    152 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
    153 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
    154 *
    155 *      The addresses written in the objects must match the corresponding
    156 *      reloc.presumed_offset which in turn must match the corresponding
    157 *      execobject.offset.
    158 *
    159 *      Any render targets written to in the batch must be flagged with
    160 *      EXEC_OBJECT_WRITE.
    161 *
    162 *      To avoid stalling, execobject.offset should match the current
    163 *      address of that object within the active context.
    164 *
    165 * The reservation is done is multiple phases. First we try and keep any
    166 * object already bound in its current location - so as long as meets the
    167 * constraints imposed by the new execbuffer. Any object left unbound after the
    168 * first pass is then fitted into any available idle space. If an object does
    169 * not fit, all objects are removed from the reservation and the process rerun
    170 * after sorting the objects into a priority order (more difficult to fit
    171 * objects are tried first). Failing that, the entire VM is cleared and we try
    172 * to fit the execbuf once last time before concluding that it simply will not
    173 * fit.
    174 *
    175 * A small complication to all of this is that we allow userspace not only to
    176 * specify an alignment and a size for the object in the address space, but
    177 * we also allow userspace to specify the exact offset. This objects are
    178 * simpler to place (the location is known a priori) all we have to do is make
    179 * sure the space is available.
    180 *
    181 * Once all the objects are in place, patching up the buried pointers to point
    182 * to the final locations is a fairly simple job of walking over the relocation
    183 * entry arrays, looking up the right address and rewriting the value into
    184 * the object. Simple! ... The relocation entries are stored in user memory
    185 * and so to access them we have to copy them into a local buffer. That copy
    186 * has to avoid taking any pagefaults as they may lead back to a GEM object
    187 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
    188 * the relocation into multiple passes. First we try to do everything within an
    189 * atomic context (avoid the pagefaults) which requires that we never wait. If
    190 * we detect that we may wait, or if we need to fault, then we have to fallback
    191 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
    192 * bells yet?) Dropping the mutex means that we lose all the state we have
    193 * built up so far for the execbuf and we must reset any global data. However,
    194 * we do leave the objects pinned in their final locations - which is a
    195 * potential issue for concurrent execbufs. Once we have left the mutex, we can
    196 * allocate and copy all the relocation entries into a large array at our
    197 * leisure, reacquire the mutex, reclaim all the objects and other state and
    198 * then proceed to update any incorrect addresses with the objects.
    199 *
    200 * As we process the relocation entries, we maintain a record of whether the
    201 * object is being written to. Using NORELOC, we expect userspace to provide
    202 * this information instead. We also check whether we can skip the relocation
    203 * by comparing the expected value inside the relocation entry with the target's
    204 * final address. If they differ, we have to map the current object and rewrite
    205 * the 4 or 8 byte pointer within.
    206 *
    207 * Serialising an execbuf is quite simple according to the rules of the GEM
    208 * ABI. Execution within each context is ordered by the order of submission.
    209 * Writes to any GEM object are in order of submission and are exclusive. Reads
    210 * from a GEM object are unordered with respect to other reads, but ordered by
    211 * writes. A write submitted after a read cannot occur before the read, and
    212 * similarly any read submitted after a write cannot occur before the write.
    213 * Writes are ordered between engines such that only one write occurs at any
    214 * time (completing any reads beforehand) - using semaphores where available
    215 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
    216 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
    217 * reads before starting, and any read (either using set-domain or pread) must
    218 * flush all GPU writes before starting. (Note we only employ a barrier before,
    219 * we currently rely on userspace not concurrently starting a new execution
    220 * whilst reading or writing to an object. This may be an advantage or not
    221 * depending on how much you trust userspace not to shoot themselves in the
    222 * foot.) Serialisation may just result in the request being inserted into
    223 * a DAG awaiting its turn, but most simple is to wait on the CPU until
    224 * all dependencies are resolved.
    225 *
    226 * After all of that, is just a matter of closing the request and handing it to
    227 * the hardware (well, leaving it in a queue to be executed). However, we also
    228 * offer the ability for batchbuffers to be run with elevated privileges so
    229 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
    230 * Before any batch is given extra privileges we first must check that it
    231 * contains no nefarious instructions, we check that each instruction is from
    232 * our whitelist and all registers are also from an allowed list. We first
    233 * copy the user's batchbuffer to a shadow (so that the user doesn't have
    234 * access to it, either by the CPU or GPU as we scan it) and then parse each
    235 * instruction. If everything is ok, we set a flag telling the hardware to run
    236 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
    237 */
    238
    239struct eb_fence {
    240	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
    241	struct dma_fence *dma_fence;
    242	u64 value;
    243	struct dma_fence_chain *chain_fence;
    244};
    245
    246struct i915_execbuffer {
    247	struct drm_i915_private *i915; /** i915 backpointer */
    248	struct drm_file *file; /** per-file lookup tables and limits */
    249	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
    250	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
    251	struct eb_vma *vma;
    252
    253	struct intel_gt *gt; /* gt for the execbuf */
    254	struct intel_context *context; /* logical state for the request */
    255	struct i915_gem_context *gem_context; /** caller's context */
    256
    257	/** our requests to build */
    258	struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
    259	/** identity of the batch obj/vma */
    260	struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1];
    261	struct i915_vma *trampoline; /** trampoline used for chaining */
    262
    263	/** used for excl fence in dma_resv objects when > 1 BB submitted */
    264	struct dma_fence *composite_fence;
    265
    266	/** actual size of execobj[] as we may extend it for the cmdparser */
    267	unsigned int buffer_count;
    268
    269	/* number of batches in execbuf IOCTL */
    270	unsigned int num_batches;
    271
    272	/** list of vma not yet bound during reservation phase */
    273	struct list_head unbound;
    274
    275	/** list of vma that have execobj.relocation_count */
    276	struct list_head relocs;
    277
    278	struct i915_gem_ww_ctx ww;
    279
    280	/**
    281	 * Track the most recently used object for relocations, as we
    282	 * frequently have to perform multiple relocations within the same
    283	 * obj/page
    284	 */
    285	struct reloc_cache {
    286		struct drm_mm_node node; /** temporary GTT binding */
    287		unsigned long vaddr; /** Current kmap address */
    288		unsigned long page; /** Currently mapped page index */
    289		unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */
    290		bool use_64bit_reloc : 1;
    291		bool has_llc : 1;
    292		bool has_fence : 1;
    293		bool needs_unfenced : 1;
    294	} reloc_cache;
    295
    296	u64 invalid_flags; /** Set of execobj.flags that are invalid */
    297
    298	/** Length of batch within object */
    299	u64 batch_len[MAX_ENGINE_INSTANCE + 1];
    300	u32 batch_start_offset; /** Location within object of batch */
    301	u32 batch_flags; /** Flags composed for emit_bb_start() */
    302	struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
    303
    304	/**
    305	 * Indicate either the size of the hastable used to resolve
    306	 * relocation handles, or if negative that we are using a direct
    307	 * index into the execobj[].
    308	 */
    309	int lut_size;
    310	struct hlist_head *buckets; /** ht for relocation handles */
    311
    312	struct eb_fence *fences;
    313	unsigned long num_fences;
    314#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
    315	struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1];
    316#endif
    317};
    318
    319static int eb_parse(struct i915_execbuffer *eb);
    320static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
    321static void eb_unpin_engine(struct i915_execbuffer *eb);
    322static void eb_capture_release(struct i915_execbuffer *eb);
    323
    324static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
    325{
    326	return intel_engine_requires_cmd_parser(eb->context->engine) ||
    327		(intel_engine_using_cmd_parser(eb->context->engine) &&
    328		 eb->args->batch_len);
    329}
    330
    331static int eb_create(struct i915_execbuffer *eb)
    332{
    333	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
    334		unsigned int size = 1 + ilog2(eb->buffer_count);
    335
    336		/*
    337		 * Without a 1:1 association between relocation handles and
    338		 * the execobject[] index, we instead create a hashtable.
    339		 * We size it dynamically based on available memory, starting
    340		 * first with 1:1 assocative hash and scaling back until
    341		 * the allocation succeeds.
    342		 *
    343		 * Later on we use a positive lut_size to indicate we are
    344		 * using this hashtable, and a negative value to indicate a
    345		 * direct lookup.
    346		 */
    347		do {
    348			gfp_t flags;
    349
    350			/* While we can still reduce the allocation size, don't
    351			 * raise a warning and allow the allocation to fail.
    352			 * On the last pass though, we want to try as hard
    353			 * as possible to perform the allocation and warn
    354			 * if it fails.
    355			 */
    356			flags = GFP_KERNEL;
    357			if (size > 1)
    358				flags |= __GFP_NORETRY | __GFP_NOWARN;
    359
    360			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
    361					      flags);
    362			if (eb->buckets)
    363				break;
    364		} while (--size);
    365
    366		if (unlikely(!size))
    367			return -ENOMEM;
    368
    369		eb->lut_size = size;
    370	} else {
    371		eb->lut_size = -eb->buffer_count;
    372	}
    373
    374	return 0;
    375}
    376
    377static bool
    378eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
    379		 const struct i915_vma *vma,
    380		 unsigned int flags)
    381{
    382	if (vma->node.size < entry->pad_to_size)
    383		return true;
    384
    385	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
    386		return true;
    387
    388	if (flags & EXEC_OBJECT_PINNED &&
    389	    vma->node.start != entry->offset)
    390		return true;
    391
    392	if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
    393	    vma->node.start < BATCH_OFFSET_BIAS)
    394		return true;
    395
    396	if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
    397	    (vma->node.start + vma->node.size + 4095) >> 32)
    398		return true;
    399
    400	if (flags & __EXEC_OBJECT_NEEDS_MAP &&
    401	    !i915_vma_is_map_and_fenceable(vma))
    402		return true;
    403
    404	return false;
    405}
    406
    407static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
    408			unsigned int exec_flags)
    409{
    410	u64 pin_flags = 0;
    411
    412	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
    413		pin_flags |= PIN_GLOBAL;
    414
    415	/*
    416	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
    417	 * limit address to the first 4GBs for unflagged objects.
    418	 */
    419	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
    420		pin_flags |= PIN_ZONE_4G;
    421
    422	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
    423		pin_flags |= PIN_MAPPABLE;
    424
    425	if (exec_flags & EXEC_OBJECT_PINNED)
    426		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
    427	else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
    428		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
    429
    430	return pin_flags;
    431}
    432
    433static inline int
    434eb_pin_vma(struct i915_execbuffer *eb,
    435	   const struct drm_i915_gem_exec_object2 *entry,
    436	   struct eb_vma *ev)
    437{
    438	struct i915_vma *vma = ev->vma;
    439	u64 pin_flags;
    440	int err;
    441
    442	if (vma->node.size)
    443		pin_flags = vma->node.start;
    444	else
    445		pin_flags = entry->offset & PIN_OFFSET_MASK;
    446
    447	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
    448	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
    449		pin_flags |= PIN_GLOBAL;
    450
    451	/* Attempt to reuse the current location if available */
    452	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags);
    453	if (err == -EDEADLK)
    454		return err;
    455
    456	if (unlikely(err)) {
    457		if (entry->flags & EXEC_OBJECT_PINNED)
    458			return err;
    459
    460		/* Failing that pick any _free_ space if suitable */
    461		err = i915_vma_pin_ww(vma, &eb->ww,
    462					     entry->pad_to_size,
    463					     entry->alignment,
    464					     eb_pin_flags(entry, ev->flags) |
    465					     PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
    466		if (unlikely(err))
    467			return err;
    468	}
    469
    470	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
    471		err = i915_vma_pin_fence(vma);
    472		if (unlikely(err))
    473			return err;
    474
    475		if (vma->fence)
    476			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
    477	}
    478
    479	ev->flags |= __EXEC_OBJECT_HAS_PIN;
    480	if (eb_vma_misplaced(entry, vma, ev->flags))
    481		return -EBADSLT;
    482
    483	return 0;
    484}
    485
    486static inline void
    487eb_unreserve_vma(struct eb_vma *ev)
    488{
    489	if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
    490		__i915_vma_unpin_fence(ev->vma);
    491
    492	ev->flags &= ~__EXEC_OBJECT_RESERVED;
    493}
    494
    495static int
    496eb_validate_vma(struct i915_execbuffer *eb,
    497		struct drm_i915_gem_exec_object2 *entry,
    498		struct i915_vma *vma)
    499{
    500	/* Relocations are disallowed for all platforms after TGL-LP.  This
    501	 * also covers all platforms with local memory.
    502	 */
    503	if (entry->relocation_count &&
    504	    GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915))
    505		return -EINVAL;
    506
    507	if (unlikely(entry->flags & eb->invalid_flags))
    508		return -EINVAL;
    509
    510	if (unlikely(entry->alignment &&
    511		     !is_power_of_2_u64(entry->alignment)))
    512		return -EINVAL;
    513
    514	/*
    515	 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
    516	 * any non-page-aligned or non-canonical addresses.
    517	 */
    518	if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
    519		     entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
    520		return -EINVAL;
    521
    522	/* pad_to_size was once a reserved field, so sanitize it */
    523	if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
    524		if (unlikely(offset_in_page(entry->pad_to_size)))
    525			return -EINVAL;
    526	} else {
    527		entry->pad_to_size = 0;
    528	}
    529	/*
    530	 * From drm_mm perspective address space is continuous,
    531	 * so from this point we're always using non-canonical
    532	 * form internally.
    533	 */
    534	entry->offset = gen8_noncanonical_addr(entry->offset);
    535
    536	if (!eb->reloc_cache.has_fence) {
    537		entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
    538	} else {
    539		if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
    540		     eb->reloc_cache.needs_unfenced) &&
    541		    i915_gem_object_is_tiled(vma->obj))
    542			entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
    543	}
    544
    545	return 0;
    546}
    547
    548static inline bool
    549is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx)
    550{
    551	return eb->args->flags & I915_EXEC_BATCH_FIRST ?
    552		buffer_idx < eb->num_batches :
    553		buffer_idx >= eb->args->buffer_count - eb->num_batches;
    554}
    555
    556static int
    557eb_add_vma(struct i915_execbuffer *eb,
    558	   unsigned int *current_batch,
    559	   unsigned int i,
    560	   struct i915_vma *vma)
    561{
    562	struct drm_i915_private *i915 = eb->i915;
    563	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
    564	struct eb_vma *ev = &eb->vma[i];
    565
    566	ev->vma = vma;
    567	ev->exec = entry;
    568	ev->flags = entry->flags;
    569
    570	if (eb->lut_size > 0) {
    571		ev->handle = entry->handle;
    572		hlist_add_head(&ev->node,
    573			       &eb->buckets[hash_32(entry->handle,
    574						    eb->lut_size)]);
    575	}
    576
    577	if (entry->relocation_count)
    578		list_add_tail(&ev->reloc_link, &eb->relocs);
    579
    580	/*
    581	 * SNA is doing fancy tricks with compressing batch buffers, which leads
    582	 * to negative relocation deltas. Usually that works out ok since the
    583	 * relocate address is still positive, except when the batch is placed
    584	 * very low in the GTT. Ensure this doesn't happen.
    585	 *
    586	 * Note that actual hangs have only been observed on gen7, but for
    587	 * paranoia do it everywhere.
    588	 */
    589	if (is_batch_buffer(eb, i)) {
    590		if (entry->relocation_count &&
    591		    !(ev->flags & EXEC_OBJECT_PINNED))
    592			ev->flags |= __EXEC_OBJECT_NEEDS_BIAS;
    593		if (eb->reloc_cache.has_fence)
    594			ev->flags |= EXEC_OBJECT_NEEDS_FENCE;
    595
    596		eb->batches[*current_batch] = ev;
    597
    598		if (unlikely(ev->flags & EXEC_OBJECT_WRITE)) {
    599			drm_dbg(&i915->drm,
    600				"Attempting to use self-modifying batch buffer\n");
    601			return -EINVAL;
    602		}
    603
    604		if (range_overflows_t(u64,
    605				      eb->batch_start_offset,
    606				      eb->args->batch_len,
    607				      ev->vma->size)) {
    608			drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
    609			return -EINVAL;
    610		}
    611
    612		if (eb->args->batch_len == 0)
    613			eb->batch_len[*current_batch] = ev->vma->size -
    614				eb->batch_start_offset;
    615		else
    616			eb->batch_len[*current_batch] = eb->args->batch_len;
    617		if (unlikely(eb->batch_len[*current_batch] == 0)) { /* impossible! */
    618			drm_dbg(&i915->drm, "Invalid batch length\n");
    619			return -EINVAL;
    620		}
    621
    622		++*current_batch;
    623	}
    624
    625	return 0;
    626}
    627
    628static inline int use_cpu_reloc(const struct reloc_cache *cache,
    629				const struct drm_i915_gem_object *obj)
    630{
    631	if (!i915_gem_object_has_struct_page(obj))
    632		return false;
    633
    634	if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
    635		return true;
    636
    637	if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
    638		return false;
    639
    640	return (cache->has_llc ||
    641		obj->cache_dirty ||
    642		obj->cache_level != I915_CACHE_NONE);
    643}
    644
    645static int eb_reserve_vma(struct i915_execbuffer *eb,
    646			  struct eb_vma *ev,
    647			  u64 pin_flags)
    648{
    649	struct drm_i915_gem_exec_object2 *entry = ev->exec;
    650	struct i915_vma *vma = ev->vma;
    651	int err;
    652
    653	if (drm_mm_node_allocated(&vma->node) &&
    654	    eb_vma_misplaced(entry, vma, ev->flags)) {
    655		err = i915_vma_unbind(vma);
    656		if (err)
    657			return err;
    658	}
    659
    660	err = i915_vma_pin_ww(vma, &eb->ww,
    661			   entry->pad_to_size, entry->alignment,
    662			   eb_pin_flags(entry, ev->flags) | pin_flags);
    663	if (err)
    664		return err;
    665
    666	if (entry->offset != vma->node.start) {
    667		entry->offset = vma->node.start | UPDATE;
    668		eb->args->flags |= __EXEC_HAS_RELOC;
    669	}
    670
    671	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
    672		err = i915_vma_pin_fence(vma);
    673		if (unlikely(err))
    674			return err;
    675
    676		if (vma->fence)
    677			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
    678	}
    679
    680	ev->flags |= __EXEC_OBJECT_HAS_PIN;
    681	GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
    682
    683	return 0;
    684}
    685
    686static bool eb_unbind(struct i915_execbuffer *eb, bool force)
    687{
    688	const unsigned int count = eb->buffer_count;
    689	unsigned int i;
    690	struct list_head last;
    691	bool unpinned = false;
    692
    693	/* Resort *all* the objects into priority order */
    694	INIT_LIST_HEAD(&eb->unbound);
    695	INIT_LIST_HEAD(&last);
    696
    697	for (i = 0; i < count; i++) {
    698		struct eb_vma *ev = &eb->vma[i];
    699		unsigned int flags = ev->flags;
    700
    701		if (!force && flags & EXEC_OBJECT_PINNED &&
    702		    flags & __EXEC_OBJECT_HAS_PIN)
    703			continue;
    704
    705		unpinned = true;
    706		eb_unreserve_vma(ev);
    707
    708		if (flags & EXEC_OBJECT_PINNED)
    709			/* Pinned must have their slot */
    710			list_add(&ev->bind_link, &eb->unbound);
    711		else if (flags & __EXEC_OBJECT_NEEDS_MAP)
    712			/* Map require the lowest 256MiB (aperture) */
    713			list_add_tail(&ev->bind_link, &eb->unbound);
    714		else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
    715			/* Prioritise 4GiB region for restricted bo */
    716			list_add(&ev->bind_link, &last);
    717		else
    718			list_add_tail(&ev->bind_link, &last);
    719	}
    720
    721	list_splice_tail(&last, &eb->unbound);
    722	return unpinned;
    723}
    724
    725static int eb_reserve(struct i915_execbuffer *eb)
    726{
    727	struct eb_vma *ev;
    728	unsigned int pass;
    729	int err = 0;
    730	bool unpinned;
    731
    732	/*
    733	 * Attempt to pin all of the buffers into the GTT.
    734	 * This is done in 2 phases:
    735	 *
    736	 * 1. Unbind all objects that do not match the GTT constraints for
    737	 *    the execbuffer (fenceable, mappable, alignment etc).
    738	 * 2. Bind new objects.
    739	 *
    740	 * This avoid unnecessary unbinding of later objects in order to make
    741	 * room for the earlier objects *unless* we need to defragment.
    742	 *
    743	 * Defragmenting is skipped if all objects are pinned at a fixed location.
    744	 */
    745	for (pass = 0; pass <= 2; pass++) {
    746		int pin_flags = PIN_USER | PIN_VALIDATE;
    747
    748		if (pass == 0)
    749			pin_flags |= PIN_NONBLOCK;
    750
    751		if (pass >= 1)
    752			unpinned = eb_unbind(eb, pass == 2);
    753
    754		if (pass == 2) {
    755			err = mutex_lock_interruptible(&eb->context->vm->mutex);
    756			if (!err) {
    757				err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
    758				mutex_unlock(&eb->context->vm->mutex);
    759			}
    760			if (err)
    761				return err;
    762		}
    763
    764		list_for_each_entry(ev, &eb->unbound, bind_link) {
    765			err = eb_reserve_vma(eb, ev, pin_flags);
    766			if (err)
    767				break;
    768		}
    769
    770		if (err != -ENOSPC)
    771			break;
    772	}
    773
    774	return err;
    775}
    776
    777static int eb_select_context(struct i915_execbuffer *eb)
    778{
    779	struct i915_gem_context *ctx;
    780
    781	ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
    782	if (unlikely(IS_ERR(ctx)))
    783		return PTR_ERR(ctx);
    784
    785	eb->gem_context = ctx;
    786	if (i915_gem_context_has_full_ppgtt(ctx))
    787		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
    788
    789	return 0;
    790}
    791
    792static int __eb_add_lut(struct i915_execbuffer *eb,
    793			u32 handle, struct i915_vma *vma)
    794{
    795	struct i915_gem_context *ctx = eb->gem_context;
    796	struct i915_lut_handle *lut;
    797	int err;
    798
    799	lut = i915_lut_handle_alloc();
    800	if (unlikely(!lut))
    801		return -ENOMEM;
    802
    803	i915_vma_get(vma);
    804	if (!atomic_fetch_inc(&vma->open_count))
    805		i915_vma_reopen(vma);
    806	lut->handle = handle;
    807	lut->ctx = ctx;
    808
    809	/* Check that the context hasn't been closed in the meantime */
    810	err = -EINTR;
    811	if (!mutex_lock_interruptible(&ctx->lut_mutex)) {
    812		if (likely(!i915_gem_context_is_closed(ctx)))
    813			err = radix_tree_insert(&ctx->handles_vma, handle, vma);
    814		else
    815			err = -ENOENT;
    816		if (err == 0) { /* And nor has this handle */
    817			struct drm_i915_gem_object *obj = vma->obj;
    818
    819			spin_lock(&obj->lut_lock);
    820			if (idr_find(&eb->file->object_idr, handle) == obj) {
    821				list_add(&lut->obj_link, &obj->lut_list);
    822			} else {
    823				radix_tree_delete(&ctx->handles_vma, handle);
    824				err = -ENOENT;
    825			}
    826			spin_unlock(&obj->lut_lock);
    827		}
    828		mutex_unlock(&ctx->lut_mutex);
    829	}
    830	if (unlikely(err))
    831		goto err;
    832
    833	return 0;
    834
    835err:
    836	i915_vma_close(vma);
    837	i915_vma_put(vma);
    838	i915_lut_handle_free(lut);
    839	return err;
    840}
    841
    842static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
    843{
    844	struct i915_address_space *vm = eb->context->vm;
    845
    846	do {
    847		struct drm_i915_gem_object *obj;
    848		struct i915_vma *vma;
    849		int err;
    850
    851		rcu_read_lock();
    852		vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
    853		if (likely(vma && vma->vm == vm))
    854			vma = i915_vma_tryget(vma);
    855		rcu_read_unlock();
    856		if (likely(vma))
    857			return vma;
    858
    859		obj = i915_gem_object_lookup(eb->file, handle);
    860		if (unlikely(!obj))
    861			return ERR_PTR(-ENOENT);
    862
    863		/*
    864		 * If the user has opted-in for protected-object tracking, make
    865		 * sure the object encryption can be used.
    866		 * We only need to do this when the object is first used with
    867		 * this context, because the context itself will be banned when
    868		 * the protected objects become invalid.
    869		 */
    870		if (i915_gem_context_uses_protected_content(eb->gem_context) &&
    871		    i915_gem_object_is_protected(obj)) {
    872			err = intel_pxp_key_check(&vm->gt->pxp, obj, true);
    873			if (err) {
    874				i915_gem_object_put(obj);
    875				return ERR_PTR(err);
    876			}
    877		}
    878
    879		vma = i915_vma_instance(obj, vm, NULL);
    880		if (IS_ERR(vma)) {
    881			i915_gem_object_put(obj);
    882			return vma;
    883		}
    884
    885		err = __eb_add_lut(eb, handle, vma);
    886		if (likely(!err))
    887			return vma;
    888
    889		i915_gem_object_put(obj);
    890		if (err != -EEXIST)
    891			return ERR_PTR(err);
    892	} while (1);
    893}
    894
    895static int eb_lookup_vmas(struct i915_execbuffer *eb)
    896{
    897	unsigned int i, current_batch = 0;
    898	int err = 0;
    899
    900	INIT_LIST_HEAD(&eb->relocs);
    901
    902	for (i = 0; i < eb->buffer_count; i++) {
    903		struct i915_vma *vma;
    904
    905		vma = eb_lookup_vma(eb, eb->exec[i].handle);
    906		if (IS_ERR(vma)) {
    907			err = PTR_ERR(vma);
    908			goto err;
    909		}
    910
    911		err = eb_validate_vma(eb, &eb->exec[i], vma);
    912		if (unlikely(err)) {
    913			i915_vma_put(vma);
    914			goto err;
    915		}
    916
    917		err = eb_add_vma(eb, &current_batch, i, vma);
    918		if (err)
    919			return err;
    920
    921		if (i915_gem_object_is_userptr(vma->obj)) {
    922			err = i915_gem_object_userptr_submit_init(vma->obj);
    923			if (err) {
    924				if (i + 1 < eb->buffer_count) {
    925					/*
    926					 * Execbuffer code expects last vma entry to be NULL,
    927					 * since we already initialized this entry,
    928					 * set the next value to NULL or we mess up
    929					 * cleanup handling.
    930					 */
    931					eb->vma[i + 1].vma = NULL;
    932				}
    933
    934				return err;
    935			}
    936
    937			eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
    938			eb->args->flags |= __EXEC_USERPTR_USED;
    939		}
    940	}
    941
    942	return 0;
    943
    944err:
    945	eb->vma[i].vma = NULL;
    946	return err;
    947}
    948
    949static int eb_lock_vmas(struct i915_execbuffer *eb)
    950{
    951	unsigned int i;
    952	int err;
    953
    954	for (i = 0; i < eb->buffer_count; i++) {
    955		struct eb_vma *ev = &eb->vma[i];
    956		struct i915_vma *vma = ev->vma;
    957
    958		err = i915_gem_object_lock(vma->obj, &eb->ww);
    959		if (err)
    960			return err;
    961	}
    962
    963	return 0;
    964}
    965
    966static int eb_validate_vmas(struct i915_execbuffer *eb)
    967{
    968	unsigned int i;
    969	int err;
    970
    971	INIT_LIST_HEAD(&eb->unbound);
    972
    973	err = eb_lock_vmas(eb);
    974	if (err)
    975		return err;
    976
    977	for (i = 0; i < eb->buffer_count; i++) {
    978		struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
    979		struct eb_vma *ev = &eb->vma[i];
    980		struct i915_vma *vma = ev->vma;
    981
    982		err = eb_pin_vma(eb, entry, ev);
    983		if (err == -EDEADLK)
    984			return err;
    985
    986		if (!err) {
    987			if (entry->offset != vma->node.start) {
    988				entry->offset = vma->node.start | UPDATE;
    989				eb->args->flags |= __EXEC_HAS_RELOC;
    990			}
    991		} else {
    992			eb_unreserve_vma(ev);
    993
    994			list_add_tail(&ev->bind_link, &eb->unbound);
    995			if (drm_mm_node_allocated(&vma->node)) {
    996				err = i915_vma_unbind(vma);
    997				if (err)
    998					return err;
    999			}
   1000		}
   1001
   1002		/* Reserve enough slots to accommodate composite fences */
   1003		err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
   1004		if (err)
   1005			return err;
   1006
   1007		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
   1008			   eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
   1009	}
   1010
   1011	if (!list_empty(&eb->unbound))
   1012		return eb_reserve(eb);
   1013
   1014	return 0;
   1015}
   1016
   1017static struct eb_vma *
   1018eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
   1019{
   1020	if (eb->lut_size < 0) {
   1021		if (handle >= -eb->lut_size)
   1022			return NULL;
   1023		return &eb->vma[handle];
   1024	} else {
   1025		struct hlist_head *head;
   1026		struct eb_vma *ev;
   1027
   1028		head = &eb->buckets[hash_32(handle, eb->lut_size)];
   1029		hlist_for_each_entry(ev, head, node) {
   1030			if (ev->handle == handle)
   1031				return ev;
   1032		}
   1033		return NULL;
   1034	}
   1035}
   1036
   1037static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
   1038{
   1039	const unsigned int count = eb->buffer_count;
   1040	unsigned int i;
   1041
   1042	for (i = 0; i < count; i++) {
   1043		struct eb_vma *ev = &eb->vma[i];
   1044		struct i915_vma *vma = ev->vma;
   1045
   1046		if (!vma)
   1047			break;
   1048
   1049		eb_unreserve_vma(ev);
   1050
   1051		if (final)
   1052			i915_vma_put(vma);
   1053	}
   1054
   1055	eb_capture_release(eb);
   1056	eb_unpin_engine(eb);
   1057}
   1058
   1059static void eb_destroy(const struct i915_execbuffer *eb)
   1060{
   1061	if (eb->lut_size > 0)
   1062		kfree(eb->buckets);
   1063}
   1064
   1065static inline u64
   1066relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
   1067		  const struct i915_vma *target)
   1068{
   1069	return gen8_canonical_addr((int)reloc->delta + target->node.start);
   1070}
   1071
   1072static void reloc_cache_init(struct reloc_cache *cache,
   1073			     struct drm_i915_private *i915)
   1074{
   1075	cache->page = -1;
   1076	cache->vaddr = 0;
   1077	/* Must be a variable in the struct to allow GCC to unroll. */
   1078	cache->graphics_ver = GRAPHICS_VER(i915);
   1079	cache->has_llc = HAS_LLC(i915);
   1080	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
   1081	cache->has_fence = cache->graphics_ver < 4;
   1082	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
   1083	cache->node.flags = 0;
   1084}
   1085
   1086static inline void *unmask_page(unsigned long p)
   1087{
   1088	return (void *)(uintptr_t)(p & PAGE_MASK);
   1089}
   1090
   1091static inline unsigned int unmask_flags(unsigned long p)
   1092{
   1093	return p & ~PAGE_MASK;
   1094}
   1095
   1096#define KMAP 0x4 /* after CLFLUSH_FLAGS */
   1097
   1098static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
   1099{
   1100	struct drm_i915_private *i915 =
   1101		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
   1102	return to_gt(i915)->ggtt;
   1103}
   1104
   1105static void reloc_cache_unmap(struct reloc_cache *cache)
   1106{
   1107	void *vaddr;
   1108
   1109	if (!cache->vaddr)
   1110		return;
   1111
   1112	vaddr = unmask_page(cache->vaddr);
   1113	if (cache->vaddr & KMAP)
   1114		kunmap_atomic(vaddr);
   1115	else
   1116		io_mapping_unmap_atomic((void __iomem *)vaddr);
   1117}
   1118
   1119static void reloc_cache_remap(struct reloc_cache *cache,
   1120			      struct drm_i915_gem_object *obj)
   1121{
   1122	void *vaddr;
   1123
   1124	if (!cache->vaddr)
   1125		return;
   1126
   1127	if (cache->vaddr & KMAP) {
   1128		struct page *page = i915_gem_object_get_page(obj, cache->page);
   1129
   1130		vaddr = kmap_atomic(page);
   1131		cache->vaddr = unmask_flags(cache->vaddr) |
   1132			(unsigned long)vaddr;
   1133	} else {
   1134		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
   1135		unsigned long offset;
   1136
   1137		offset = cache->node.start;
   1138		if (!drm_mm_node_allocated(&cache->node))
   1139			offset += cache->page << PAGE_SHIFT;
   1140
   1141		cache->vaddr = (unsigned long)
   1142			io_mapping_map_atomic_wc(&ggtt->iomap, offset);
   1143	}
   1144}
   1145
   1146static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
   1147{
   1148	void *vaddr;
   1149
   1150	if (!cache->vaddr)
   1151		return;
   1152
   1153	vaddr = unmask_page(cache->vaddr);
   1154	if (cache->vaddr & KMAP) {
   1155		struct drm_i915_gem_object *obj =
   1156			(struct drm_i915_gem_object *)cache->node.mm;
   1157		if (cache->vaddr & CLFLUSH_AFTER)
   1158			mb();
   1159
   1160		kunmap_atomic(vaddr);
   1161		i915_gem_object_finish_access(obj);
   1162	} else {
   1163		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
   1164
   1165		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
   1166		io_mapping_unmap_atomic((void __iomem *)vaddr);
   1167
   1168		if (drm_mm_node_allocated(&cache->node)) {
   1169			ggtt->vm.clear_range(&ggtt->vm,
   1170					     cache->node.start,
   1171					     cache->node.size);
   1172			mutex_lock(&ggtt->vm.mutex);
   1173			drm_mm_remove_node(&cache->node);
   1174			mutex_unlock(&ggtt->vm.mutex);
   1175		} else {
   1176			i915_vma_unpin((struct i915_vma *)cache->node.mm);
   1177		}
   1178	}
   1179
   1180	cache->vaddr = 0;
   1181	cache->page = -1;
   1182}
   1183
   1184static void *reloc_kmap(struct drm_i915_gem_object *obj,
   1185			struct reloc_cache *cache,
   1186			unsigned long pageno)
   1187{
   1188	void *vaddr;
   1189	struct page *page;
   1190
   1191	if (cache->vaddr) {
   1192		kunmap_atomic(unmask_page(cache->vaddr));
   1193	} else {
   1194		unsigned int flushes;
   1195		int err;
   1196
   1197		err = i915_gem_object_prepare_write(obj, &flushes);
   1198		if (err)
   1199			return ERR_PTR(err);
   1200
   1201		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
   1202		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
   1203
   1204		cache->vaddr = flushes | KMAP;
   1205		cache->node.mm = (void *)obj;
   1206		if (flushes)
   1207			mb();
   1208	}
   1209
   1210	page = i915_gem_object_get_page(obj, pageno);
   1211	if (!obj->mm.dirty)
   1212		set_page_dirty(page);
   1213
   1214	vaddr = kmap_atomic(page);
   1215	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
   1216	cache->page = pageno;
   1217
   1218	return vaddr;
   1219}
   1220
   1221static void *reloc_iomap(struct i915_vma *batch,
   1222			 struct i915_execbuffer *eb,
   1223			 unsigned long page)
   1224{
   1225	struct drm_i915_gem_object *obj = batch->obj;
   1226	struct reloc_cache *cache = &eb->reloc_cache;
   1227	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
   1228	unsigned long offset;
   1229	void *vaddr;
   1230
   1231	if (cache->vaddr) {
   1232		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
   1233		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
   1234	} else {
   1235		struct i915_vma *vma = ERR_PTR(-ENODEV);
   1236		int err;
   1237
   1238		if (i915_gem_object_is_tiled(obj))
   1239			return ERR_PTR(-EINVAL);
   1240
   1241		if (use_cpu_reloc(cache, obj))
   1242			return NULL;
   1243
   1244		err = i915_gem_object_set_to_gtt_domain(obj, true);
   1245		if (err)
   1246			return ERR_PTR(err);
   1247
   1248		/*
   1249		 * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
   1250		 * VMA from the object list because we no longer pin.
   1251		 *
   1252		 * Only attempt to pin the batch buffer to ggtt if the current batch
   1253		 * is not inside ggtt, or the batch buffer is not misplaced.
   1254		 */
   1255		if (!i915_is_ggtt(batch->vm) ||
   1256		    !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) {
   1257			vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
   1258							  PIN_MAPPABLE |
   1259							  PIN_NONBLOCK /* NOWARN */ |
   1260							  PIN_NOEVICT);
   1261		}
   1262
   1263		if (vma == ERR_PTR(-EDEADLK))
   1264			return vma;
   1265
   1266		if (IS_ERR(vma)) {
   1267			memset(&cache->node, 0, sizeof(cache->node));
   1268			mutex_lock(&ggtt->vm.mutex);
   1269			err = drm_mm_insert_node_in_range
   1270				(&ggtt->vm.mm, &cache->node,
   1271				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
   1272				 0, ggtt->mappable_end,
   1273				 DRM_MM_INSERT_LOW);
   1274			mutex_unlock(&ggtt->vm.mutex);
   1275			if (err) /* no inactive aperture space, use cpu reloc */
   1276				return NULL;
   1277		} else {
   1278			cache->node.start = vma->node.start;
   1279			cache->node.mm = (void *)vma;
   1280		}
   1281	}
   1282
   1283	offset = cache->node.start;
   1284	if (drm_mm_node_allocated(&cache->node)) {
   1285		ggtt->vm.insert_page(&ggtt->vm,
   1286				     i915_gem_object_get_dma_address(obj, page),
   1287				     offset, I915_CACHE_NONE, 0);
   1288	} else {
   1289		offset += page << PAGE_SHIFT;
   1290	}
   1291
   1292	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
   1293							 offset);
   1294	cache->page = page;
   1295	cache->vaddr = (unsigned long)vaddr;
   1296
   1297	return vaddr;
   1298}
   1299
   1300static void *reloc_vaddr(struct i915_vma *vma,
   1301			 struct i915_execbuffer *eb,
   1302			 unsigned long page)
   1303{
   1304	struct reloc_cache *cache = &eb->reloc_cache;
   1305	void *vaddr;
   1306
   1307	if (cache->page == page) {
   1308		vaddr = unmask_page(cache->vaddr);
   1309	} else {
   1310		vaddr = NULL;
   1311		if ((cache->vaddr & KMAP) == 0)
   1312			vaddr = reloc_iomap(vma, eb, page);
   1313		if (!vaddr)
   1314			vaddr = reloc_kmap(vma->obj, cache, page);
   1315	}
   1316
   1317	return vaddr;
   1318}
   1319
   1320static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
   1321{
   1322	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
   1323		if (flushes & CLFLUSH_BEFORE)
   1324			drm_clflush_virt_range(addr, sizeof(*addr));
   1325
   1326		*addr = value;
   1327
   1328		/*
   1329		 * Writes to the same cacheline are serialised by the CPU
   1330		 * (including clflush). On the write path, we only require
   1331		 * that it hits memory in an orderly fashion and place
   1332		 * mb barriers at the start and end of the relocation phase
   1333		 * to ensure ordering of clflush wrt to the system.
   1334		 */
   1335		if (flushes & CLFLUSH_AFTER)
   1336			drm_clflush_virt_range(addr, sizeof(*addr));
   1337	} else
   1338		*addr = value;
   1339}
   1340
   1341static u64
   1342relocate_entry(struct i915_vma *vma,
   1343	       const struct drm_i915_gem_relocation_entry *reloc,
   1344	       struct i915_execbuffer *eb,
   1345	       const struct i915_vma *target)
   1346{
   1347	u64 target_addr = relocation_target(reloc, target);
   1348	u64 offset = reloc->offset;
   1349	bool wide = eb->reloc_cache.use_64bit_reloc;
   1350	void *vaddr;
   1351
   1352repeat:
   1353	vaddr = reloc_vaddr(vma, eb,
   1354			    offset >> PAGE_SHIFT);
   1355	if (IS_ERR(vaddr))
   1356		return PTR_ERR(vaddr);
   1357
   1358	GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
   1359	clflush_write32(vaddr + offset_in_page(offset),
   1360			lower_32_bits(target_addr),
   1361			eb->reloc_cache.vaddr);
   1362
   1363	if (wide) {
   1364		offset += sizeof(u32);
   1365		target_addr >>= 32;
   1366		wide = false;
   1367		goto repeat;
   1368	}
   1369
   1370	return target->node.start | UPDATE;
   1371}
   1372
   1373static u64
   1374eb_relocate_entry(struct i915_execbuffer *eb,
   1375		  struct eb_vma *ev,
   1376		  const struct drm_i915_gem_relocation_entry *reloc)
   1377{
   1378	struct drm_i915_private *i915 = eb->i915;
   1379	struct eb_vma *target;
   1380	int err;
   1381
   1382	/* we've already hold a reference to all valid objects */
   1383	target = eb_get_vma(eb, reloc->target_handle);
   1384	if (unlikely(!target))
   1385		return -ENOENT;
   1386
   1387	/* Validate that the target is in a valid r/w GPU domain */
   1388	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
   1389		drm_dbg(&i915->drm, "reloc with multiple write domains: "
   1390			  "target %d offset %d "
   1391			  "read %08x write %08x",
   1392			  reloc->target_handle,
   1393			  (int) reloc->offset,
   1394			  reloc->read_domains,
   1395			  reloc->write_domain);
   1396		return -EINVAL;
   1397	}
   1398	if (unlikely((reloc->write_domain | reloc->read_domains)
   1399		     & ~I915_GEM_GPU_DOMAINS)) {
   1400		drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: "
   1401			  "target %d offset %d "
   1402			  "read %08x write %08x",
   1403			  reloc->target_handle,
   1404			  (int) reloc->offset,
   1405			  reloc->read_domains,
   1406			  reloc->write_domain);
   1407		return -EINVAL;
   1408	}
   1409
   1410	if (reloc->write_domain) {
   1411		target->flags |= EXEC_OBJECT_WRITE;
   1412
   1413		/*
   1414		 * Sandybridge PPGTT errata: We need a global gtt mapping
   1415		 * for MI and pipe_control writes because the gpu doesn't
   1416		 * properly redirect them through the ppgtt for non_secure
   1417		 * batchbuffers.
   1418		 */
   1419		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
   1420		    GRAPHICS_VER(eb->i915) == 6 &&
   1421		    !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) {
   1422			struct i915_vma *vma = target->vma;
   1423
   1424			reloc_cache_unmap(&eb->reloc_cache);
   1425			mutex_lock(&vma->vm->mutex);
   1426			err = i915_vma_bind(target->vma,
   1427					    target->vma->obj->cache_level,
   1428					    PIN_GLOBAL, NULL, NULL);
   1429			mutex_unlock(&vma->vm->mutex);
   1430			reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
   1431			if (err)
   1432				return err;
   1433		}
   1434	}
   1435
   1436	/*
   1437	 * If the relocation already has the right value in it, no
   1438	 * more work needs to be done.
   1439	 */
   1440	if (!DBG_FORCE_RELOC &&
   1441	    gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
   1442		return 0;
   1443
   1444	/* Check that the relocation address is valid... */
   1445	if (unlikely(reloc->offset >
   1446		     ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
   1447		drm_dbg(&i915->drm, "Relocation beyond object bounds: "
   1448			  "target %d offset %d size %d.\n",
   1449			  reloc->target_handle,
   1450			  (int)reloc->offset,
   1451			  (int)ev->vma->size);
   1452		return -EINVAL;
   1453	}
   1454	if (unlikely(reloc->offset & 3)) {
   1455		drm_dbg(&i915->drm, "Relocation not 4-byte aligned: "
   1456			  "target %d offset %d.\n",
   1457			  reloc->target_handle,
   1458			  (int)reloc->offset);
   1459		return -EINVAL;
   1460	}
   1461
   1462	/*
   1463	 * If we write into the object, we need to force the synchronisation
   1464	 * barrier, either with an asynchronous clflush or if we executed the
   1465	 * patching using the GPU (though that should be serialised by the
   1466	 * timeline). To be completely sure, and since we are required to
   1467	 * do relocations we are already stalling, disable the user's opt
   1468	 * out of our synchronisation.
   1469	 */
   1470	ev->flags &= ~EXEC_OBJECT_ASYNC;
   1471
   1472	/* and update the user's relocation entry */
   1473	return relocate_entry(ev->vma, reloc, eb, target->vma);
   1474}
   1475
   1476static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
   1477{
   1478#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
   1479	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
   1480	const struct drm_i915_gem_exec_object2 *entry = ev->exec;
   1481	struct drm_i915_gem_relocation_entry __user *urelocs =
   1482		u64_to_user_ptr(entry->relocs_ptr);
   1483	unsigned long remain = entry->relocation_count;
   1484
   1485	if (unlikely(remain > N_RELOC(ULONG_MAX)))
   1486		return -EINVAL;
   1487
   1488	/*
   1489	 * We must check that the entire relocation array is safe
   1490	 * to read. However, if the array is not writable the user loses
   1491	 * the updated relocation values.
   1492	 */
   1493	if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs))))
   1494		return -EFAULT;
   1495
   1496	do {
   1497		struct drm_i915_gem_relocation_entry *r = stack;
   1498		unsigned int count =
   1499			min_t(unsigned long, remain, ARRAY_SIZE(stack));
   1500		unsigned int copied;
   1501
   1502		/*
   1503		 * This is the fast path and we cannot handle a pagefault
   1504		 * whilst holding the struct mutex lest the user pass in the
   1505		 * relocations contained within a mmaped bo. For in such a case
   1506		 * we, the page fault handler would call i915_gem_fault() and
   1507		 * we would try to acquire the struct mutex again. Obviously
   1508		 * this is bad and so lockdep complains vehemently.
   1509		 */
   1510		pagefault_disable();
   1511		copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
   1512		pagefault_enable();
   1513		if (unlikely(copied)) {
   1514			remain = -EFAULT;
   1515			goto out;
   1516		}
   1517
   1518		remain -= count;
   1519		do {
   1520			u64 offset = eb_relocate_entry(eb, ev, r);
   1521
   1522			if (likely(offset == 0)) {
   1523			} else if ((s64)offset < 0) {
   1524				remain = (int)offset;
   1525				goto out;
   1526			} else {
   1527				/*
   1528				 * Note that reporting an error now
   1529				 * leaves everything in an inconsistent
   1530				 * state as we have *already* changed
   1531				 * the relocation value inside the
   1532				 * object. As we have not changed the
   1533				 * reloc.presumed_offset or will not
   1534				 * change the execobject.offset, on the
   1535				 * call we may not rewrite the value
   1536				 * inside the object, leaving it
   1537				 * dangling and causing a GPU hang. Unless
   1538				 * userspace dynamically rebuilds the
   1539				 * relocations on each execbuf rather than
   1540				 * presume a static tree.
   1541				 *
   1542				 * We did previously check if the relocations
   1543				 * were writable (access_ok), an error now
   1544				 * would be a strange race with mprotect,
   1545				 * having already demonstrated that we
   1546				 * can read from this userspace address.
   1547				 */
   1548				offset = gen8_canonical_addr(offset & ~UPDATE);
   1549				__put_user(offset,
   1550					   &urelocs[r - stack].presumed_offset);
   1551			}
   1552		} while (r++, --count);
   1553		urelocs += ARRAY_SIZE(stack);
   1554	} while (remain);
   1555out:
   1556	reloc_cache_reset(&eb->reloc_cache, eb);
   1557	return remain;
   1558}
   1559
   1560static int
   1561eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
   1562{
   1563	const struct drm_i915_gem_exec_object2 *entry = ev->exec;
   1564	struct drm_i915_gem_relocation_entry *relocs =
   1565		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
   1566	unsigned int i;
   1567	int err;
   1568
   1569	for (i = 0; i < entry->relocation_count; i++) {
   1570		u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
   1571
   1572		if ((s64)offset < 0) {
   1573			err = (int)offset;
   1574			goto err;
   1575		}
   1576	}
   1577	err = 0;
   1578err:
   1579	reloc_cache_reset(&eb->reloc_cache, eb);
   1580	return err;
   1581}
   1582
   1583static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
   1584{
   1585	const char __user *addr, *end;
   1586	unsigned long size;
   1587	char __maybe_unused c;
   1588
   1589	size = entry->relocation_count;
   1590	if (size == 0)
   1591		return 0;
   1592
   1593	if (size > N_RELOC(ULONG_MAX))
   1594		return -EINVAL;
   1595
   1596	addr = u64_to_user_ptr(entry->relocs_ptr);
   1597	size *= sizeof(struct drm_i915_gem_relocation_entry);
   1598	if (!access_ok(addr, size))
   1599		return -EFAULT;
   1600
   1601	end = addr + size;
   1602	for (; addr < end; addr += PAGE_SIZE) {
   1603		int err = __get_user(c, addr);
   1604		if (err)
   1605			return err;
   1606	}
   1607	return __get_user(c, end - 1);
   1608}
   1609
   1610static int eb_copy_relocations(const struct i915_execbuffer *eb)
   1611{
   1612	struct drm_i915_gem_relocation_entry *relocs;
   1613	const unsigned int count = eb->buffer_count;
   1614	unsigned int i;
   1615	int err;
   1616
   1617	for (i = 0; i < count; i++) {
   1618		const unsigned int nreloc = eb->exec[i].relocation_count;
   1619		struct drm_i915_gem_relocation_entry __user *urelocs;
   1620		unsigned long size;
   1621		unsigned long copied;
   1622
   1623		if (nreloc == 0)
   1624			continue;
   1625
   1626		err = check_relocations(&eb->exec[i]);
   1627		if (err)
   1628			goto err;
   1629
   1630		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
   1631		size = nreloc * sizeof(*relocs);
   1632
   1633		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
   1634		if (!relocs) {
   1635			err = -ENOMEM;
   1636			goto err;
   1637		}
   1638
   1639		/* copy_from_user is limited to < 4GiB */
   1640		copied = 0;
   1641		do {
   1642			unsigned int len =
   1643				min_t(u64, BIT_ULL(31), size - copied);
   1644
   1645			if (__copy_from_user((char *)relocs + copied,
   1646					     (char __user *)urelocs + copied,
   1647					     len))
   1648				goto end;
   1649
   1650			copied += len;
   1651		} while (copied < size);
   1652
   1653		/*
   1654		 * As we do not update the known relocation offsets after
   1655		 * relocating (due to the complexities in lock handling),
   1656		 * we need to mark them as invalid now so that we force the
   1657		 * relocation processing next time. Just in case the target
   1658		 * object is evicted and then rebound into its old
   1659		 * presumed_offset before the next execbuffer - if that
   1660		 * happened we would make the mistake of assuming that the
   1661		 * relocations were valid.
   1662		 */
   1663		if (!user_access_begin(urelocs, size))
   1664			goto end;
   1665
   1666		for (copied = 0; copied < nreloc; copied++)
   1667			unsafe_put_user(-1,
   1668					&urelocs[copied].presumed_offset,
   1669					end_user);
   1670		user_access_end();
   1671
   1672		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
   1673	}
   1674
   1675	return 0;
   1676
   1677end_user:
   1678	user_access_end();
   1679end:
   1680	kvfree(relocs);
   1681	err = -EFAULT;
   1682err:
   1683	while (i--) {
   1684		relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
   1685		if (eb->exec[i].relocation_count)
   1686			kvfree(relocs);
   1687	}
   1688	return err;
   1689}
   1690
   1691static int eb_prefault_relocations(const struct i915_execbuffer *eb)
   1692{
   1693	const unsigned int count = eb->buffer_count;
   1694	unsigned int i;
   1695
   1696	for (i = 0; i < count; i++) {
   1697		int err;
   1698
   1699		err = check_relocations(&eb->exec[i]);
   1700		if (err)
   1701			return err;
   1702	}
   1703
   1704	return 0;
   1705}
   1706
   1707static int eb_reinit_userptr(struct i915_execbuffer *eb)
   1708{
   1709	const unsigned int count = eb->buffer_count;
   1710	unsigned int i;
   1711	int ret;
   1712
   1713	if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
   1714		return 0;
   1715
   1716	for (i = 0; i < count; i++) {
   1717		struct eb_vma *ev = &eb->vma[i];
   1718
   1719		if (!i915_gem_object_is_userptr(ev->vma->obj))
   1720			continue;
   1721
   1722		ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
   1723		if (ret)
   1724			return ret;
   1725
   1726		ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
   1727	}
   1728
   1729	return 0;
   1730}
   1731
   1732static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
   1733{
   1734	bool have_copy = false;
   1735	struct eb_vma *ev;
   1736	int err = 0;
   1737
   1738repeat:
   1739	if (signal_pending(current)) {
   1740		err = -ERESTARTSYS;
   1741		goto out;
   1742	}
   1743
   1744	/* We may process another execbuffer during the unlock... */
   1745	eb_release_vmas(eb, false);
   1746	i915_gem_ww_ctx_fini(&eb->ww);
   1747
   1748	/*
   1749	 * We take 3 passes through the slowpatch.
   1750	 *
   1751	 * 1 - we try to just prefault all the user relocation entries and
   1752	 * then attempt to reuse the atomic pagefault disabled fast path again.
   1753	 *
   1754	 * 2 - we copy the user entries to a local buffer here outside of the
   1755	 * local and allow ourselves to wait upon any rendering before
   1756	 * relocations
   1757	 *
   1758	 * 3 - we already have a local copy of the relocation entries, but
   1759	 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
   1760	 */
   1761	if (!err) {
   1762		err = eb_prefault_relocations(eb);
   1763	} else if (!have_copy) {
   1764		err = eb_copy_relocations(eb);
   1765		have_copy = err == 0;
   1766	} else {
   1767		cond_resched();
   1768		err = 0;
   1769	}
   1770
   1771	if (!err)
   1772		err = eb_reinit_userptr(eb);
   1773
   1774	i915_gem_ww_ctx_init(&eb->ww, true);
   1775	if (err)
   1776		goto out;
   1777
   1778	/* reacquire the objects */
   1779repeat_validate:
   1780	err = eb_pin_engine(eb, false);
   1781	if (err)
   1782		goto err;
   1783
   1784	err = eb_validate_vmas(eb);
   1785	if (err)
   1786		goto err;
   1787
   1788	GEM_BUG_ON(!eb->batches[0]);
   1789
   1790	list_for_each_entry(ev, &eb->relocs, reloc_link) {
   1791		if (!have_copy) {
   1792			err = eb_relocate_vma(eb, ev);
   1793			if (err)
   1794				break;
   1795		} else {
   1796			err = eb_relocate_vma_slow(eb, ev);
   1797			if (err)
   1798				break;
   1799		}
   1800	}
   1801
   1802	if (err == -EDEADLK)
   1803		goto err;
   1804
   1805	if (err && !have_copy)
   1806		goto repeat;
   1807
   1808	if (err)
   1809		goto err;
   1810
   1811	/* as last step, parse the command buffer */
   1812	err = eb_parse(eb);
   1813	if (err)
   1814		goto err;
   1815
   1816	/*
   1817	 * Leave the user relocations as are, this is the painfully slow path,
   1818	 * and we want to avoid the complication of dropping the lock whilst
   1819	 * having buffers reserved in the aperture and so causing spurious
   1820	 * ENOSPC for random operations.
   1821	 */
   1822
   1823err:
   1824	if (err == -EDEADLK) {
   1825		eb_release_vmas(eb, false);
   1826		err = i915_gem_ww_ctx_backoff(&eb->ww);
   1827		if (!err)
   1828			goto repeat_validate;
   1829	}
   1830
   1831	if (err == -EAGAIN)
   1832		goto repeat;
   1833
   1834out:
   1835	if (have_copy) {
   1836		const unsigned int count = eb->buffer_count;
   1837		unsigned int i;
   1838
   1839		for (i = 0; i < count; i++) {
   1840			const struct drm_i915_gem_exec_object2 *entry =
   1841				&eb->exec[i];
   1842			struct drm_i915_gem_relocation_entry *relocs;
   1843
   1844			if (!entry->relocation_count)
   1845				continue;
   1846
   1847			relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
   1848			kvfree(relocs);
   1849		}
   1850	}
   1851
   1852	return err;
   1853}
   1854
   1855static int eb_relocate_parse(struct i915_execbuffer *eb)
   1856{
   1857	int err;
   1858	bool throttle = true;
   1859
   1860retry:
   1861	err = eb_pin_engine(eb, throttle);
   1862	if (err) {
   1863		if (err != -EDEADLK)
   1864			return err;
   1865
   1866		goto err;
   1867	}
   1868
   1869	/* only throttle once, even if we didn't need to throttle */
   1870	throttle = false;
   1871
   1872	err = eb_validate_vmas(eb);
   1873	if (err == -EAGAIN)
   1874		goto slow;
   1875	else if (err)
   1876		goto err;
   1877
   1878	/* The objects are in their final locations, apply the relocations. */
   1879	if (eb->args->flags & __EXEC_HAS_RELOC) {
   1880		struct eb_vma *ev;
   1881
   1882		list_for_each_entry(ev, &eb->relocs, reloc_link) {
   1883			err = eb_relocate_vma(eb, ev);
   1884			if (err)
   1885				break;
   1886		}
   1887
   1888		if (err == -EDEADLK)
   1889			goto err;
   1890		else if (err)
   1891			goto slow;
   1892	}
   1893
   1894	if (!err)
   1895		err = eb_parse(eb);
   1896
   1897err:
   1898	if (err == -EDEADLK) {
   1899		eb_release_vmas(eb, false);
   1900		err = i915_gem_ww_ctx_backoff(&eb->ww);
   1901		if (!err)
   1902			goto retry;
   1903	}
   1904
   1905	return err;
   1906
   1907slow:
   1908	err = eb_relocate_parse_slow(eb);
   1909	if (err)
   1910		/*
   1911		 * If the user expects the execobject.offset and
   1912		 * reloc.presumed_offset to be an exact match,
   1913		 * as for using NO_RELOC, then we cannot update
   1914		 * the execobject.offset until we have completed
   1915		 * relocation.
   1916		 */
   1917		eb->args->flags &= ~__EXEC_HAS_RELOC;
   1918
   1919	return err;
   1920}
   1921
   1922/*
   1923 * Using two helper loops for the order of which requests / batches are created
   1924 * and added the to backend. Requests are created in order from the parent to
   1925 * the last child. Requests are added in the reverse order, from the last child
   1926 * to parent. This is done for locking reasons as the timeline lock is acquired
   1927 * during request creation and released when the request is added to the
   1928 * backend. To make lockdep happy (see intel_context_timeline_lock) this must be
   1929 * the ordering.
   1930 */
   1931#define for_each_batch_create_order(_eb, _i) \
   1932	for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i))
   1933#define for_each_batch_add_order(_eb, _i) \
   1934	BUILD_BUG_ON(!typecheck(int, _i)); \
   1935	for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i))
   1936
   1937static struct i915_request *
   1938eb_find_first_request_added(struct i915_execbuffer *eb)
   1939{
   1940	int i;
   1941
   1942	for_each_batch_add_order(eb, i)
   1943		if (eb->requests[i])
   1944			return eb->requests[i];
   1945
   1946	GEM_BUG_ON("Request not found");
   1947
   1948	return NULL;
   1949}
   1950
   1951#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
   1952
   1953/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
   1954static void eb_capture_stage(struct i915_execbuffer *eb)
   1955{
   1956	const unsigned int count = eb->buffer_count;
   1957	unsigned int i = count, j;
   1958
   1959	while (i--) {
   1960		struct eb_vma *ev = &eb->vma[i];
   1961		struct i915_vma *vma = ev->vma;
   1962		unsigned int flags = ev->flags;
   1963
   1964		if (!(flags & EXEC_OBJECT_CAPTURE))
   1965			continue;
   1966
   1967		for_each_batch_create_order(eb, j) {
   1968			struct i915_capture_list *capture;
   1969
   1970			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
   1971			if (!capture)
   1972				continue;
   1973
   1974			capture->next = eb->capture_lists[j];
   1975			capture->vma_res = i915_vma_resource_get(vma->resource);
   1976			eb->capture_lists[j] = capture;
   1977		}
   1978	}
   1979}
   1980
   1981/* Commit once we're in the critical path */
   1982static void eb_capture_commit(struct i915_execbuffer *eb)
   1983{
   1984	unsigned int j;
   1985
   1986	for_each_batch_create_order(eb, j) {
   1987		struct i915_request *rq = eb->requests[j];
   1988
   1989		if (!rq)
   1990			break;
   1991
   1992		rq->capture_list = eb->capture_lists[j];
   1993		eb->capture_lists[j] = NULL;
   1994	}
   1995}
   1996
   1997/*
   1998 * Release anything that didn't get committed due to errors.
   1999 * The capture_list will otherwise be freed at request retire.
   2000 */
   2001static void eb_capture_release(struct i915_execbuffer *eb)
   2002{
   2003	unsigned int j;
   2004
   2005	for_each_batch_create_order(eb, j) {
   2006		if (eb->capture_lists[j]) {
   2007			i915_request_free_capture_list(eb->capture_lists[j]);
   2008			eb->capture_lists[j] = NULL;
   2009		}
   2010	}
   2011}
   2012
   2013static void eb_capture_list_clear(struct i915_execbuffer *eb)
   2014{
   2015	memset(eb->capture_lists, 0, sizeof(eb->capture_lists));
   2016}
   2017
   2018#else
   2019
   2020static void eb_capture_stage(struct i915_execbuffer *eb)
   2021{
   2022}
   2023
   2024static void eb_capture_commit(struct i915_execbuffer *eb)
   2025{
   2026}
   2027
   2028static void eb_capture_release(struct i915_execbuffer *eb)
   2029{
   2030}
   2031
   2032static void eb_capture_list_clear(struct i915_execbuffer *eb)
   2033{
   2034}
   2035
   2036#endif
   2037
   2038static int eb_move_to_gpu(struct i915_execbuffer *eb)
   2039{
   2040	const unsigned int count = eb->buffer_count;
   2041	unsigned int i = count;
   2042	int err = 0, j;
   2043
   2044	while (i--) {
   2045		struct eb_vma *ev = &eb->vma[i];
   2046		struct i915_vma *vma = ev->vma;
   2047		unsigned int flags = ev->flags;
   2048		struct drm_i915_gem_object *obj = vma->obj;
   2049
   2050		assert_vma_held(vma);
   2051
   2052		/*
   2053		 * If the GPU is not _reading_ through the CPU cache, we need
   2054		 * to make sure that any writes (both previous GPU writes from
   2055		 * before a change in snooping levels and normal CPU writes)
   2056		 * caught in that cache are flushed to main memory.
   2057		 *
   2058		 * We want to say
   2059		 *   obj->cache_dirty &&
   2060		 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
   2061		 * but gcc's optimiser doesn't handle that as well and emits
   2062		 * two jumps instead of one. Maybe one day...
   2063		 *
   2064		 * FIXME: There is also sync flushing in set_pages(), which
   2065		 * serves a different purpose(some of the time at least).
   2066		 *
   2067		 * We should consider:
   2068		 *
   2069		 *   1. Rip out the async flush code.
   2070		 *
   2071		 *   2. Or make the sync flushing use the async clflush path
   2072		 *   using mandatory fences underneath. Currently the below
   2073		 *   async flush happens after we bind the object.
   2074		 */
   2075		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
   2076			if (i915_gem_clflush_object(obj, 0))
   2077				flags &= ~EXEC_OBJECT_ASYNC;
   2078		}
   2079
   2080		/* We only need to await on the first request */
   2081		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
   2082			err = i915_request_await_object
   2083				(eb_find_first_request_added(eb), obj,
   2084				 flags & EXEC_OBJECT_WRITE);
   2085		}
   2086
   2087		for_each_batch_add_order(eb, j) {
   2088			if (err)
   2089				break;
   2090			if (!eb->requests[j])
   2091				continue;
   2092
   2093			err = _i915_vma_move_to_active(vma, eb->requests[j],
   2094						       j ? NULL :
   2095						       eb->composite_fence ?
   2096						       eb->composite_fence :
   2097						       &eb->requests[j]->fence,
   2098						       flags | __EXEC_OBJECT_NO_RESERVE);
   2099		}
   2100	}
   2101
   2102#ifdef CONFIG_MMU_NOTIFIER
   2103	if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
   2104		read_lock(&eb->i915->mm.notifier_lock);
   2105
   2106		/*
   2107		 * count is always at least 1, otherwise __EXEC_USERPTR_USED
   2108		 * could not have been set
   2109		 */
   2110		for (i = 0; i < count; i++) {
   2111			struct eb_vma *ev = &eb->vma[i];
   2112			struct drm_i915_gem_object *obj = ev->vma->obj;
   2113
   2114			if (!i915_gem_object_is_userptr(obj))
   2115				continue;
   2116
   2117			err = i915_gem_object_userptr_submit_done(obj);
   2118			if (err)
   2119				break;
   2120		}
   2121
   2122		read_unlock(&eb->i915->mm.notifier_lock);
   2123	}
   2124#endif
   2125
   2126	if (unlikely(err))
   2127		goto err_skip;
   2128
   2129	/* Unconditionally flush any chipset caches (for streaming writes). */
   2130	intel_gt_chipset_flush(eb->gt);
   2131	eb_capture_commit(eb);
   2132
   2133	return 0;
   2134
   2135err_skip:
   2136	for_each_batch_create_order(eb, j) {
   2137		if (!eb->requests[j])
   2138			break;
   2139
   2140		i915_request_set_error_once(eb->requests[j], err);
   2141	}
   2142	return err;
   2143}
   2144
   2145static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
   2146{
   2147	if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
   2148		return -EINVAL;
   2149
   2150	/* Kernel clipping was a DRI1 misfeature */
   2151	if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
   2152			     I915_EXEC_USE_EXTENSIONS))) {
   2153		if (exec->num_cliprects || exec->cliprects_ptr)
   2154			return -EINVAL;
   2155	}
   2156
   2157	if (exec->DR4 == 0xffffffff) {
   2158		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
   2159		exec->DR4 = 0;
   2160	}
   2161	if (exec->DR1 || exec->DR4)
   2162		return -EINVAL;
   2163
   2164	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
   2165		return -EINVAL;
   2166
   2167	return 0;
   2168}
   2169
   2170static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
   2171{
   2172	u32 *cs;
   2173	int i;
   2174
   2175	if (GRAPHICS_VER(rq->engine->i915) != 7 || rq->engine->id != RCS0) {
   2176		drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n");
   2177		return -EINVAL;
   2178	}
   2179
   2180	cs = intel_ring_begin(rq, 4 * 2 + 2);
   2181	if (IS_ERR(cs))
   2182		return PTR_ERR(cs);
   2183
   2184	*cs++ = MI_LOAD_REGISTER_IMM(4);
   2185	for (i = 0; i < 4; i++) {
   2186		*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
   2187		*cs++ = 0;
   2188	}
   2189	*cs++ = MI_NOOP;
   2190	intel_ring_advance(rq, cs);
   2191
   2192	return 0;
   2193}
   2194
   2195static struct i915_vma *
   2196shadow_batch_pin(struct i915_execbuffer *eb,
   2197		 struct drm_i915_gem_object *obj,
   2198		 struct i915_address_space *vm,
   2199		 unsigned int flags)
   2200{
   2201	struct i915_vma *vma;
   2202	int err;
   2203
   2204	vma = i915_vma_instance(obj, vm, NULL);
   2205	if (IS_ERR(vma))
   2206		return vma;
   2207
   2208	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
   2209	if (err)
   2210		return ERR_PTR(err);
   2211
   2212	return vma;
   2213}
   2214
   2215static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
   2216{
   2217	/*
   2218	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
   2219	 * batch" bit. Hence we need to pin secure batches into the global gtt.
   2220	 * hsw should have this fixed, but bdw mucks it up again. */
   2221	if (eb->batch_flags & I915_DISPATCH_SECURE)
   2222		return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
   2223
   2224	return NULL;
   2225}
   2226
   2227static int eb_parse(struct i915_execbuffer *eb)
   2228{
   2229	struct drm_i915_private *i915 = eb->i915;
   2230	struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
   2231	struct i915_vma *shadow, *trampoline, *batch;
   2232	unsigned long len;
   2233	int err;
   2234
   2235	if (!eb_use_cmdparser(eb)) {
   2236		batch = eb_dispatch_secure(eb, eb->batches[0]->vma);
   2237		if (IS_ERR(batch))
   2238			return PTR_ERR(batch);
   2239
   2240		goto secure_batch;
   2241	}
   2242
   2243	if (intel_context_is_parallel(eb->context))
   2244		return -EINVAL;
   2245
   2246	len = eb->batch_len[0];
   2247	if (!CMDPARSER_USES_GGTT(eb->i915)) {
   2248		/*
   2249		 * ppGTT backed shadow buffers must be mapped RO, to prevent
   2250		 * post-scan tampering
   2251		 */
   2252		if (!eb->context->vm->has_read_only) {
   2253			drm_dbg(&i915->drm,
   2254				"Cannot prevent post-scan tampering without RO capable vm\n");
   2255			return -EINVAL;
   2256		}
   2257	} else {
   2258		len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
   2259	}
   2260	if (unlikely(len < eb->batch_len[0])) /* last paranoid check of overflow */
   2261		return -EINVAL;
   2262
   2263	if (!pool) {
   2264		pool = intel_gt_get_buffer_pool(eb->gt, len,
   2265						I915_MAP_WB);
   2266		if (IS_ERR(pool))
   2267			return PTR_ERR(pool);
   2268		eb->batch_pool = pool;
   2269	}
   2270
   2271	err = i915_gem_object_lock(pool->obj, &eb->ww);
   2272	if (err)
   2273		return err;
   2274
   2275	shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
   2276	if (IS_ERR(shadow))
   2277		return PTR_ERR(shadow);
   2278
   2279	intel_gt_buffer_pool_mark_used(pool);
   2280	i915_gem_object_set_readonly(shadow->obj);
   2281	shadow->private = pool;
   2282
   2283	trampoline = NULL;
   2284	if (CMDPARSER_USES_GGTT(eb->i915)) {
   2285		trampoline = shadow;
   2286
   2287		shadow = shadow_batch_pin(eb, pool->obj,
   2288					  &eb->gt->ggtt->vm,
   2289					  PIN_GLOBAL);
   2290		if (IS_ERR(shadow))
   2291			return PTR_ERR(shadow);
   2292
   2293		shadow->private = pool;
   2294
   2295		eb->batch_flags |= I915_DISPATCH_SECURE;
   2296	}
   2297
   2298	batch = eb_dispatch_secure(eb, shadow);
   2299	if (IS_ERR(batch))
   2300		return PTR_ERR(batch);
   2301
   2302	err = dma_resv_reserve_fences(shadow->obj->base.resv, 1);
   2303	if (err)
   2304		return err;
   2305
   2306	err = intel_engine_cmd_parser(eb->context->engine,
   2307				      eb->batches[0]->vma,
   2308				      eb->batch_start_offset,
   2309				      eb->batch_len[0],
   2310				      shadow, trampoline);
   2311	if (err)
   2312		return err;
   2313
   2314	eb->batches[0] = &eb->vma[eb->buffer_count++];
   2315	eb->batches[0]->vma = i915_vma_get(shadow);
   2316	eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN;
   2317
   2318	eb->trampoline = trampoline;
   2319	eb->batch_start_offset = 0;
   2320
   2321secure_batch:
   2322	if (batch) {
   2323		if (intel_context_is_parallel(eb->context))
   2324			return -EINVAL;
   2325
   2326		eb->batches[0] = &eb->vma[eb->buffer_count++];
   2327		eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN;
   2328		eb->batches[0]->vma = i915_vma_get(batch);
   2329	}
   2330	return 0;
   2331}
   2332
   2333static int eb_request_submit(struct i915_execbuffer *eb,
   2334			     struct i915_request *rq,
   2335			     struct i915_vma *batch,
   2336			     u64 batch_len)
   2337{
   2338	int err;
   2339
   2340	if (intel_context_nopreempt(rq->context))
   2341		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
   2342
   2343	if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
   2344		err = i915_reset_gen7_sol_offsets(rq);
   2345		if (err)
   2346			return err;
   2347	}
   2348
   2349	/*
   2350	 * After we completed waiting for other engines (using HW semaphores)
   2351	 * then we can signal that this request/batch is ready to run. This
   2352	 * allows us to determine if the batch is still waiting on the GPU
   2353	 * or actually running by checking the breadcrumb.
   2354	 */
   2355	if (rq->context->engine->emit_init_breadcrumb) {
   2356		err = rq->context->engine->emit_init_breadcrumb(rq);
   2357		if (err)
   2358			return err;
   2359	}
   2360
   2361	err = rq->context->engine->emit_bb_start(rq,
   2362						 batch->node.start +
   2363						 eb->batch_start_offset,
   2364						 batch_len,
   2365						 eb->batch_flags);
   2366	if (err)
   2367		return err;
   2368
   2369	if (eb->trampoline) {
   2370		GEM_BUG_ON(intel_context_is_parallel(rq->context));
   2371		GEM_BUG_ON(eb->batch_start_offset);
   2372		err = rq->context->engine->emit_bb_start(rq,
   2373							 eb->trampoline->node.start +
   2374							 batch_len, 0, 0);
   2375		if (err)
   2376			return err;
   2377	}
   2378
   2379	return 0;
   2380}
   2381
   2382static int eb_submit(struct i915_execbuffer *eb)
   2383{
   2384	unsigned int i;
   2385	int err;
   2386
   2387	err = eb_move_to_gpu(eb);
   2388
   2389	for_each_batch_create_order(eb, i) {
   2390		if (!eb->requests[i])
   2391			break;
   2392
   2393		trace_i915_request_queue(eb->requests[i], eb->batch_flags);
   2394		if (!err)
   2395			err = eb_request_submit(eb, eb->requests[i],
   2396						eb->batches[i]->vma,
   2397						eb->batch_len[i]);
   2398	}
   2399
   2400	return err;
   2401}
   2402
   2403static int num_vcs_engines(struct drm_i915_private *i915)
   2404{
   2405	return hweight_long(VDBOX_MASK(to_gt(i915)));
   2406}
   2407
   2408/*
   2409 * Find one BSD ring to dispatch the corresponding BSD command.
   2410 * The engine index is returned.
   2411 */
   2412static unsigned int
   2413gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
   2414			 struct drm_file *file)
   2415{
   2416	struct drm_i915_file_private *file_priv = file->driver_priv;
   2417
   2418	/* Check whether the file_priv has already selected one ring. */
   2419	if ((int)file_priv->bsd_engine < 0)
   2420		file_priv->bsd_engine =
   2421			get_random_int() % num_vcs_engines(dev_priv);
   2422
   2423	return file_priv->bsd_engine;
   2424}
   2425
   2426static const enum intel_engine_id user_ring_map[] = {
   2427	[I915_EXEC_DEFAULT]	= RCS0,
   2428	[I915_EXEC_RENDER]	= RCS0,
   2429	[I915_EXEC_BLT]		= BCS0,
   2430	[I915_EXEC_BSD]		= VCS0,
   2431	[I915_EXEC_VEBOX]	= VECS0
   2432};
   2433
   2434static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
   2435{
   2436	struct intel_ring *ring = ce->ring;
   2437	struct intel_timeline *tl = ce->timeline;
   2438	struct i915_request *rq;
   2439
   2440	/*
   2441	 * Completely unscientific finger-in-the-air estimates for suitable
   2442	 * maximum user request size (to avoid blocking) and then backoff.
   2443	 */
   2444	if (intel_ring_update_space(ring) >= PAGE_SIZE)
   2445		return NULL;
   2446
   2447	/*
   2448	 * Find a request that after waiting upon, there will be at least half
   2449	 * the ring available. The hysteresis allows us to compete for the
   2450	 * shared ring and should mean that we sleep less often prior to
   2451	 * claiming our resources, but not so long that the ring completely
   2452	 * drains before we can submit our next request.
   2453	 */
   2454	list_for_each_entry(rq, &tl->requests, link) {
   2455		if (rq->ring != ring)
   2456			continue;
   2457
   2458		if (__intel_ring_space(rq->postfix,
   2459				       ring->emit, ring->size) > ring->size / 2)
   2460			break;
   2461	}
   2462	if (&rq->link == &tl->requests)
   2463		return NULL; /* weird, we will check again later for real */
   2464
   2465	return i915_request_get(rq);
   2466}
   2467
   2468static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
   2469			   bool throttle)
   2470{
   2471	struct intel_timeline *tl;
   2472	struct i915_request *rq = NULL;
   2473
   2474	/*
   2475	 * Take a local wakeref for preparing to dispatch the execbuf as
   2476	 * we expect to access the hardware fairly frequently in the
   2477	 * process, and require the engine to be kept awake between accesses.
   2478	 * Upon dispatch, we acquire another prolonged wakeref that we hold
   2479	 * until the timeline is idle, which in turn releases the wakeref
   2480	 * taken on the engine, and the parent device.
   2481	 */
   2482	tl = intel_context_timeline_lock(ce);
   2483	if (IS_ERR(tl))
   2484		return PTR_ERR(tl);
   2485
   2486	intel_context_enter(ce);
   2487	if (throttle)
   2488		rq = eb_throttle(eb, ce);
   2489	intel_context_timeline_unlock(tl);
   2490
   2491	if (rq) {
   2492		bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
   2493		long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT;
   2494
   2495		if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
   2496				      timeout) < 0) {
   2497			i915_request_put(rq);
   2498
   2499			/*
   2500			 * Error path, cannot use intel_context_timeline_lock as
   2501			 * that is user interruptable and this clean up step
   2502			 * must be done.
   2503			 */
   2504			mutex_lock(&ce->timeline->mutex);
   2505			intel_context_exit(ce);
   2506			mutex_unlock(&ce->timeline->mutex);
   2507
   2508			if (nonblock)
   2509				return -EWOULDBLOCK;
   2510			else
   2511				return -EINTR;
   2512		}
   2513		i915_request_put(rq);
   2514	}
   2515
   2516	return 0;
   2517}
   2518
   2519static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
   2520{
   2521	struct intel_context *ce = eb->context, *child;
   2522	int err;
   2523	int i = 0, j = 0;
   2524
   2525	GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
   2526
   2527	if (unlikely(intel_context_is_banned(ce)))
   2528		return -EIO;
   2529
   2530	/*
   2531	 * Pinning the contexts may generate requests in order to acquire
   2532	 * GGTT space, so do this first before we reserve a seqno for
   2533	 * ourselves.
   2534	 */
   2535	err = intel_context_pin_ww(ce, &eb->ww);
   2536	if (err)
   2537		return err;
   2538	for_each_child(ce, child) {
   2539		err = intel_context_pin_ww(child, &eb->ww);
   2540		GEM_BUG_ON(err);	/* perma-pinned should incr a counter */
   2541	}
   2542
   2543	for_each_child(ce, child) {
   2544		err = eb_pin_timeline(eb, child, throttle);
   2545		if (err)
   2546			goto unwind;
   2547		++i;
   2548	}
   2549	err = eb_pin_timeline(eb, ce, throttle);
   2550	if (err)
   2551		goto unwind;
   2552
   2553	eb->args->flags |= __EXEC_ENGINE_PINNED;
   2554	return 0;
   2555
   2556unwind:
   2557	for_each_child(ce, child) {
   2558		if (j++ < i) {
   2559			mutex_lock(&child->timeline->mutex);
   2560			intel_context_exit(child);
   2561			mutex_unlock(&child->timeline->mutex);
   2562		}
   2563	}
   2564	for_each_child(ce, child)
   2565		intel_context_unpin(child);
   2566	intel_context_unpin(ce);
   2567	return err;
   2568}
   2569
   2570static void eb_unpin_engine(struct i915_execbuffer *eb)
   2571{
   2572	struct intel_context *ce = eb->context, *child;
   2573
   2574	if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
   2575		return;
   2576
   2577	eb->args->flags &= ~__EXEC_ENGINE_PINNED;
   2578
   2579	for_each_child(ce, child) {
   2580		mutex_lock(&child->timeline->mutex);
   2581		intel_context_exit(child);
   2582		mutex_unlock(&child->timeline->mutex);
   2583
   2584		intel_context_unpin(child);
   2585	}
   2586
   2587	mutex_lock(&ce->timeline->mutex);
   2588	intel_context_exit(ce);
   2589	mutex_unlock(&ce->timeline->mutex);
   2590
   2591	intel_context_unpin(ce);
   2592}
   2593
   2594static unsigned int
   2595eb_select_legacy_ring(struct i915_execbuffer *eb)
   2596{
   2597	struct drm_i915_private *i915 = eb->i915;
   2598	struct drm_i915_gem_execbuffer2 *args = eb->args;
   2599	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
   2600
   2601	if (user_ring_id != I915_EXEC_BSD &&
   2602	    (args->flags & I915_EXEC_BSD_MASK)) {
   2603		drm_dbg(&i915->drm,
   2604			"execbuf with non bsd ring but with invalid "
   2605			"bsd dispatch flags: %d\n", (int)(args->flags));
   2606		return -1;
   2607	}
   2608
   2609	if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
   2610		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
   2611
   2612		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
   2613			bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
   2614		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
   2615			   bsd_idx <= I915_EXEC_BSD_RING2) {
   2616			bsd_idx >>= I915_EXEC_BSD_SHIFT;
   2617			bsd_idx--;
   2618		} else {
   2619			drm_dbg(&i915->drm,
   2620				"execbuf with unknown bsd ring: %u\n",
   2621				bsd_idx);
   2622			return -1;
   2623		}
   2624
   2625		return _VCS(bsd_idx);
   2626	}
   2627
   2628	if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
   2629		drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n",
   2630			user_ring_id);
   2631		return -1;
   2632	}
   2633
   2634	return user_ring_map[user_ring_id];
   2635}
   2636
   2637static int
   2638eb_select_engine(struct i915_execbuffer *eb)
   2639{
   2640	struct intel_context *ce, *child;
   2641	unsigned int idx;
   2642	int err;
   2643
   2644	if (i915_gem_context_user_engines(eb->gem_context))
   2645		idx = eb->args->flags & I915_EXEC_RING_MASK;
   2646	else
   2647		idx = eb_select_legacy_ring(eb);
   2648
   2649	ce = i915_gem_context_get_engine(eb->gem_context, idx);
   2650	if (IS_ERR(ce))
   2651		return PTR_ERR(ce);
   2652
   2653	if (intel_context_is_parallel(ce)) {
   2654		if (eb->buffer_count < ce->parallel.number_children + 1) {
   2655			intel_context_put(ce);
   2656			return -EINVAL;
   2657		}
   2658		if (eb->batch_start_offset || eb->args->batch_len) {
   2659			intel_context_put(ce);
   2660			return -EINVAL;
   2661		}
   2662	}
   2663	eb->num_batches = ce->parallel.number_children + 1;
   2664
   2665	for_each_child(ce, child)
   2666		intel_context_get(child);
   2667	intel_gt_pm_get(ce->engine->gt);
   2668
   2669	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
   2670		err = intel_context_alloc_state(ce);
   2671		if (err)
   2672			goto err;
   2673	}
   2674	for_each_child(ce, child) {
   2675		if (!test_bit(CONTEXT_ALLOC_BIT, &child->flags)) {
   2676			err = intel_context_alloc_state(child);
   2677			if (err)
   2678				goto err;
   2679		}
   2680	}
   2681
   2682	/*
   2683	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
   2684	 * EIO if the GPU is already wedged.
   2685	 */
   2686	err = intel_gt_terminally_wedged(ce->engine->gt);
   2687	if (err)
   2688		goto err;
   2689
   2690	if (!i915_vm_tryget(ce->vm)) {
   2691		err = -ENOENT;
   2692		goto err;
   2693	}
   2694
   2695	eb->context = ce;
   2696	eb->gt = ce->engine->gt;
   2697
   2698	/*
   2699	 * Make sure engine pool stays alive even if we call intel_context_put
   2700	 * during ww handling. The pool is destroyed when last pm reference
   2701	 * is dropped, which breaks our -EDEADLK handling.
   2702	 */
   2703	return err;
   2704
   2705err:
   2706	intel_gt_pm_put(ce->engine->gt);
   2707	for_each_child(ce, child)
   2708		intel_context_put(child);
   2709	intel_context_put(ce);
   2710	return err;
   2711}
   2712
   2713static void
   2714eb_put_engine(struct i915_execbuffer *eb)
   2715{
   2716	struct intel_context *child;
   2717
   2718	i915_vm_put(eb->context->vm);
   2719	intel_gt_pm_put(eb->gt);
   2720	for_each_child(eb->context, child)
   2721		intel_context_put(child);
   2722	intel_context_put(eb->context);
   2723}
   2724
   2725static void
   2726__free_fence_array(struct eb_fence *fences, unsigned int n)
   2727{
   2728	while (n--) {
   2729		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
   2730		dma_fence_put(fences[n].dma_fence);
   2731		dma_fence_chain_free(fences[n].chain_fence);
   2732	}
   2733	kvfree(fences);
   2734}
   2735
   2736static int
   2737add_timeline_fence_array(struct i915_execbuffer *eb,
   2738			 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
   2739{
   2740	struct drm_i915_gem_exec_fence __user *user_fences;
   2741	u64 __user *user_values;
   2742	struct eb_fence *f;
   2743	u64 nfences;
   2744	int err = 0;
   2745
   2746	nfences = timeline_fences->fence_count;
   2747	if (!nfences)
   2748		return 0;
   2749
   2750	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
   2751	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
   2752	if (nfences > min_t(unsigned long,
   2753			    ULONG_MAX / sizeof(*user_fences),
   2754			    SIZE_MAX / sizeof(*f)) - eb->num_fences)
   2755		return -EINVAL;
   2756
   2757	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
   2758	if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
   2759		return -EFAULT;
   2760
   2761	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
   2762	if (!access_ok(user_values, nfences * sizeof(*user_values)))
   2763		return -EFAULT;
   2764
   2765	f = krealloc(eb->fences,
   2766		     (eb->num_fences + nfences) * sizeof(*f),
   2767		     __GFP_NOWARN | GFP_KERNEL);
   2768	if (!f)
   2769		return -ENOMEM;
   2770
   2771	eb->fences = f;
   2772	f += eb->num_fences;
   2773
   2774	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
   2775		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
   2776
   2777	while (nfences--) {
   2778		struct drm_i915_gem_exec_fence user_fence;
   2779		struct drm_syncobj *syncobj;
   2780		struct dma_fence *fence = NULL;
   2781		u64 point;
   2782
   2783		if (__copy_from_user(&user_fence,
   2784				     user_fences++,
   2785				     sizeof(user_fence)))
   2786			return -EFAULT;
   2787
   2788		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
   2789			return -EINVAL;
   2790
   2791		if (__get_user(point, user_values++))
   2792			return -EFAULT;
   2793
   2794		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
   2795		if (!syncobj) {
   2796			DRM_DEBUG("Invalid syncobj handle provided\n");
   2797			return -ENOENT;
   2798		}
   2799
   2800		fence = drm_syncobj_fence_get(syncobj);
   2801
   2802		if (!fence && user_fence.flags &&
   2803		    !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
   2804			DRM_DEBUG("Syncobj handle has no fence\n");
   2805			drm_syncobj_put(syncobj);
   2806			return -EINVAL;
   2807		}
   2808
   2809		if (fence)
   2810			err = dma_fence_chain_find_seqno(&fence, point);
   2811
   2812		if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
   2813			DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
   2814			dma_fence_put(fence);
   2815			drm_syncobj_put(syncobj);
   2816			return err;
   2817		}
   2818
   2819		/*
   2820		 * A point might have been signaled already and
   2821		 * garbage collected from the timeline. In this case
   2822		 * just ignore the point and carry on.
   2823		 */
   2824		if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
   2825			drm_syncobj_put(syncobj);
   2826			continue;
   2827		}
   2828
   2829		/*
   2830		 * For timeline syncobjs we need to preallocate chains for
   2831		 * later signaling.
   2832		 */
   2833		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
   2834			/*
   2835			 * Waiting and signaling the same point (when point !=
   2836			 * 0) would break the timeline.
   2837			 */
   2838			if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
   2839				DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
   2840				dma_fence_put(fence);
   2841				drm_syncobj_put(syncobj);
   2842				return -EINVAL;
   2843			}
   2844
   2845			f->chain_fence = dma_fence_chain_alloc();
   2846			if (!f->chain_fence) {
   2847				drm_syncobj_put(syncobj);
   2848				dma_fence_put(fence);
   2849				return -ENOMEM;
   2850			}
   2851		} else {
   2852			f->chain_fence = NULL;
   2853		}
   2854
   2855		f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
   2856		f->dma_fence = fence;
   2857		f->value = point;
   2858		f++;
   2859		eb->num_fences++;
   2860	}
   2861
   2862	return 0;
   2863}
   2864
   2865static int add_fence_array(struct i915_execbuffer *eb)
   2866{
   2867	struct drm_i915_gem_execbuffer2 *args = eb->args;
   2868	struct drm_i915_gem_exec_fence __user *user;
   2869	unsigned long num_fences = args->num_cliprects;
   2870	struct eb_fence *f;
   2871
   2872	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
   2873		return 0;
   2874
   2875	if (!num_fences)
   2876		return 0;
   2877
   2878	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
   2879	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
   2880	if (num_fences > min_t(unsigned long,
   2881			       ULONG_MAX / sizeof(*user),
   2882			       SIZE_MAX / sizeof(*f) - eb->num_fences))
   2883		return -EINVAL;
   2884
   2885	user = u64_to_user_ptr(args->cliprects_ptr);
   2886	if (!access_ok(user, num_fences * sizeof(*user)))
   2887		return -EFAULT;
   2888
   2889	f = krealloc(eb->fences,
   2890		     (eb->num_fences + num_fences) * sizeof(*f),
   2891		     __GFP_NOWARN | GFP_KERNEL);
   2892	if (!f)
   2893		return -ENOMEM;
   2894
   2895	eb->fences = f;
   2896	f += eb->num_fences;
   2897	while (num_fences--) {
   2898		struct drm_i915_gem_exec_fence user_fence;
   2899		struct drm_syncobj *syncobj;
   2900		struct dma_fence *fence = NULL;
   2901
   2902		if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
   2903			return -EFAULT;
   2904
   2905		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
   2906			return -EINVAL;
   2907
   2908		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
   2909		if (!syncobj) {
   2910			DRM_DEBUG("Invalid syncobj handle provided\n");
   2911			return -ENOENT;
   2912		}
   2913
   2914		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
   2915			fence = drm_syncobj_fence_get(syncobj);
   2916			if (!fence) {
   2917				DRM_DEBUG("Syncobj handle has no fence\n");
   2918				drm_syncobj_put(syncobj);
   2919				return -EINVAL;
   2920			}
   2921		}
   2922
   2923		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
   2924			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
   2925
   2926		f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
   2927		f->dma_fence = fence;
   2928		f->value = 0;
   2929		f->chain_fence = NULL;
   2930		f++;
   2931		eb->num_fences++;
   2932	}
   2933
   2934	return 0;
   2935}
   2936
   2937static void put_fence_array(struct eb_fence *fences, int num_fences)
   2938{
   2939	if (fences)
   2940		__free_fence_array(fences, num_fences);
   2941}
   2942
   2943static int
   2944await_fence_array(struct i915_execbuffer *eb,
   2945		  struct i915_request *rq)
   2946{
   2947	unsigned int n;
   2948	int err;
   2949
   2950	for (n = 0; n < eb->num_fences; n++) {
   2951		struct drm_syncobj *syncobj;
   2952		unsigned int flags;
   2953
   2954		syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
   2955
   2956		if (!eb->fences[n].dma_fence)
   2957			continue;
   2958
   2959		err = i915_request_await_dma_fence(rq, eb->fences[n].dma_fence);
   2960		if (err < 0)
   2961			return err;
   2962	}
   2963
   2964	return 0;
   2965}
   2966
   2967static void signal_fence_array(const struct i915_execbuffer *eb,
   2968			       struct dma_fence * const fence)
   2969{
   2970	unsigned int n;
   2971
   2972	for (n = 0; n < eb->num_fences; n++) {
   2973		struct drm_syncobj *syncobj;
   2974		unsigned int flags;
   2975
   2976		syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
   2977		if (!(flags & I915_EXEC_FENCE_SIGNAL))
   2978			continue;
   2979
   2980		if (eb->fences[n].chain_fence) {
   2981			drm_syncobj_add_point(syncobj,
   2982					      eb->fences[n].chain_fence,
   2983					      fence,
   2984					      eb->fences[n].value);
   2985			/*
   2986			 * The chain's ownership is transferred to the
   2987			 * timeline.
   2988			 */
   2989			eb->fences[n].chain_fence = NULL;
   2990		} else {
   2991			drm_syncobj_replace_fence(syncobj, fence);
   2992		}
   2993	}
   2994}
   2995
   2996static int
   2997parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
   2998{
   2999	struct i915_execbuffer *eb = data;
   3000	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
   3001
   3002	if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
   3003		return -EFAULT;
   3004
   3005	return add_timeline_fence_array(eb, &timeline_fences);
   3006}
   3007
   3008static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
   3009{
   3010	struct i915_request *rq, *rn;
   3011
   3012	list_for_each_entry_safe(rq, rn, &tl->requests, link)
   3013		if (rq == end || !i915_request_retire(rq))
   3014			break;
   3015}
   3016
   3017static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
   3018			  int err, bool last_parallel)
   3019{
   3020	struct intel_timeline * const tl = i915_request_timeline(rq);
   3021	struct i915_sched_attr attr = {};
   3022	struct i915_request *prev;
   3023
   3024	lockdep_assert_held(&tl->mutex);
   3025	lockdep_unpin_lock(&tl->mutex, rq->cookie);
   3026
   3027	trace_i915_request_add(rq);
   3028
   3029	prev = __i915_request_commit(rq);
   3030
   3031	/* Check that the context wasn't destroyed before submission */
   3032	if (likely(!intel_context_is_closed(eb->context))) {
   3033		attr = eb->gem_context->sched;
   3034	} else {
   3035		/* Serialise with context_close via the add_to_timeline */
   3036		i915_request_set_error_once(rq, -ENOENT);
   3037		__i915_request_skip(rq);
   3038		err = -ENOENT; /* override any transient errors */
   3039	}
   3040
   3041	if (intel_context_is_parallel(eb->context)) {
   3042		if (err) {
   3043			__i915_request_skip(rq);
   3044			set_bit(I915_FENCE_FLAG_SKIP_PARALLEL,
   3045				&rq->fence.flags);
   3046		}
   3047		if (last_parallel)
   3048			set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
   3049				&rq->fence.flags);
   3050	}
   3051
   3052	__i915_request_queue(rq, &attr);
   3053
   3054	/* Try to clean up the client's timeline after submitting the request */
   3055	if (prev)
   3056		retire_requests(tl, prev);
   3057
   3058	mutex_unlock(&tl->mutex);
   3059
   3060	return err;
   3061}
   3062
   3063static int eb_requests_add(struct i915_execbuffer *eb, int err)
   3064{
   3065	int i;
   3066
   3067	/*
   3068	 * We iterate in reverse order of creation to release timeline mutexes in
   3069	 * same order.
   3070	 */
   3071	for_each_batch_add_order(eb, i) {
   3072		struct i915_request *rq = eb->requests[i];
   3073
   3074		if (!rq)
   3075			continue;
   3076		err |= eb_request_add(eb, rq, err, i == 0);
   3077	}
   3078
   3079	return err;
   3080}
   3081
   3082static const i915_user_extension_fn execbuf_extensions[] = {
   3083	[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
   3084};
   3085
   3086static int
   3087parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
   3088			  struct i915_execbuffer *eb)
   3089{
   3090	if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
   3091		return 0;
   3092
   3093	/* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
   3094	 * have another flag also using it at the same time.
   3095	 */
   3096	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
   3097		return -EINVAL;
   3098
   3099	if (args->num_cliprects != 0)
   3100		return -EINVAL;
   3101
   3102	return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
   3103				    execbuf_extensions,
   3104				    ARRAY_SIZE(execbuf_extensions),
   3105				    eb);
   3106}
   3107
   3108static void eb_requests_get(struct i915_execbuffer *eb)
   3109{
   3110	unsigned int i;
   3111
   3112	for_each_batch_create_order(eb, i) {
   3113		if (!eb->requests[i])
   3114			break;
   3115
   3116		i915_request_get(eb->requests[i]);
   3117	}
   3118}
   3119
   3120static void eb_requests_put(struct i915_execbuffer *eb)
   3121{
   3122	unsigned int i;
   3123
   3124	for_each_batch_create_order(eb, i) {
   3125		if (!eb->requests[i])
   3126			break;
   3127
   3128		i915_request_put(eb->requests[i]);
   3129	}
   3130}
   3131
   3132static struct sync_file *
   3133eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
   3134{
   3135	struct sync_file *out_fence = NULL;
   3136	struct dma_fence_array *fence_array;
   3137	struct dma_fence **fences;
   3138	unsigned int i;
   3139
   3140	GEM_BUG_ON(!intel_context_is_parent(eb->context));
   3141
   3142	fences = kmalloc_array(eb->num_batches, sizeof(*fences), GFP_KERNEL);
   3143	if (!fences)
   3144		return ERR_PTR(-ENOMEM);
   3145
   3146	for_each_batch_create_order(eb, i) {
   3147		fences[i] = &eb->requests[i]->fence;
   3148		__set_bit(I915_FENCE_FLAG_COMPOSITE,
   3149			  &eb->requests[i]->fence.flags);
   3150	}
   3151
   3152	fence_array = dma_fence_array_create(eb->num_batches,
   3153					     fences,
   3154					     eb->context->parallel.fence_context,
   3155					     eb->context->parallel.seqno++,
   3156					     false);
   3157	if (!fence_array) {
   3158		kfree(fences);
   3159		return ERR_PTR(-ENOMEM);
   3160	}
   3161
   3162	/* Move ownership to the dma_fence_array created above */
   3163	for_each_batch_create_order(eb, i)
   3164		dma_fence_get(fences[i]);
   3165
   3166	if (out_fence_fd != -1) {
   3167		out_fence = sync_file_create(&fence_array->base);
   3168		/* sync_file now owns fence_arry, drop creation ref */
   3169		dma_fence_put(&fence_array->base);
   3170		if (!out_fence)
   3171			return ERR_PTR(-ENOMEM);
   3172	}
   3173
   3174	eb->composite_fence = &fence_array->base;
   3175
   3176	return out_fence;
   3177}
   3178
   3179static struct sync_file *
   3180eb_fences_add(struct i915_execbuffer *eb, struct i915_request *rq,
   3181	      struct dma_fence *in_fence, int out_fence_fd)
   3182{
   3183	struct sync_file *out_fence = NULL;
   3184	int err;
   3185
   3186	if (unlikely(eb->gem_context->syncobj)) {
   3187		struct dma_fence *fence;
   3188
   3189		fence = drm_syncobj_fence_get(eb->gem_context->syncobj);
   3190		err = i915_request_await_dma_fence(rq, fence);
   3191		dma_fence_put(fence);
   3192		if (err)
   3193			return ERR_PTR(err);
   3194	}
   3195
   3196	if (in_fence) {
   3197		if (eb->args->flags & I915_EXEC_FENCE_SUBMIT)
   3198			err = i915_request_await_execution(rq, in_fence);
   3199		else
   3200			err = i915_request_await_dma_fence(rq, in_fence);
   3201		if (err < 0)
   3202			return ERR_PTR(err);
   3203	}
   3204
   3205	if (eb->fences) {
   3206		err = await_fence_array(eb, rq);
   3207		if (err)
   3208			return ERR_PTR(err);
   3209	}
   3210
   3211	if (intel_context_is_parallel(eb->context)) {
   3212		out_fence = eb_composite_fence_create(eb, out_fence_fd);
   3213		if (IS_ERR(out_fence))
   3214			return ERR_PTR(-ENOMEM);
   3215	} else if (out_fence_fd != -1) {
   3216		out_fence = sync_file_create(&rq->fence);
   3217		if (!out_fence)
   3218			return ERR_PTR(-ENOMEM);
   3219	}
   3220
   3221	return out_fence;
   3222}
   3223
   3224static struct intel_context *
   3225eb_find_context(struct i915_execbuffer *eb, unsigned int context_number)
   3226{
   3227	struct intel_context *child;
   3228
   3229	if (likely(context_number == 0))
   3230		return eb->context;
   3231
   3232	for_each_child(eb->context, child)
   3233		if (!--context_number)
   3234			return child;
   3235
   3236	GEM_BUG_ON("Context not found");
   3237
   3238	return NULL;
   3239}
   3240
   3241static struct sync_file *
   3242eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
   3243		   int out_fence_fd)
   3244{
   3245	struct sync_file *out_fence = NULL;
   3246	unsigned int i;
   3247
   3248	for_each_batch_create_order(eb, i) {
   3249		/* Allocate a request for this batch buffer nice and early. */
   3250		eb->requests[i] = i915_request_create(eb_find_context(eb, i));
   3251		if (IS_ERR(eb->requests[i])) {
   3252			out_fence = ERR_CAST(eb->requests[i]);
   3253			eb->requests[i] = NULL;
   3254			return out_fence;
   3255		}
   3256
   3257		/*
   3258		 * Only the first request added (committed to backend) has to
   3259		 * take the in fences into account as all subsequent requests
   3260		 * will have fences inserted inbetween them.
   3261		 */
   3262		if (i + 1 == eb->num_batches) {
   3263			out_fence = eb_fences_add(eb, eb->requests[i],
   3264						  in_fence, out_fence_fd);
   3265			if (IS_ERR(out_fence))
   3266				return out_fence;
   3267		}
   3268
   3269		/*
   3270		 * Not really on stack, but we don't want to call
   3271		 * kfree on the batch_snapshot when we put it, so use the
   3272		 * _onstack interface.
   3273		 */
   3274		if (eb->batches[i]->vma)
   3275			eb->requests[i]->batch_res =
   3276				i915_vma_resource_get(eb->batches[i]->vma->resource);
   3277		if (eb->batch_pool) {
   3278			GEM_BUG_ON(intel_context_is_parallel(eb->context));
   3279			intel_gt_buffer_pool_mark_active(eb->batch_pool,
   3280							 eb->requests[i]);
   3281		}
   3282	}
   3283
   3284	return out_fence;
   3285}
   3286
   3287static int
   3288i915_gem_do_execbuffer(struct drm_device *dev,
   3289		       struct drm_file *file,
   3290		       struct drm_i915_gem_execbuffer2 *args,
   3291		       struct drm_i915_gem_exec_object2 *exec)
   3292{
   3293	struct drm_i915_private *i915 = to_i915(dev);
   3294	struct i915_execbuffer eb;
   3295	struct dma_fence *in_fence = NULL;
   3296	struct sync_file *out_fence = NULL;
   3297	int out_fence_fd = -1;
   3298	int err;
   3299
   3300	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
   3301	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
   3302		     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
   3303
   3304	eb.i915 = i915;
   3305	eb.file = file;
   3306	eb.args = args;
   3307	if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
   3308		args->flags |= __EXEC_HAS_RELOC;
   3309
   3310	eb.exec = exec;
   3311	eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
   3312	eb.vma[0].vma = NULL;
   3313	eb.batch_pool = NULL;
   3314
   3315	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
   3316	reloc_cache_init(&eb.reloc_cache, eb.i915);
   3317
   3318	eb.buffer_count = args->buffer_count;
   3319	eb.batch_start_offset = args->batch_start_offset;
   3320	eb.trampoline = NULL;
   3321
   3322	eb.fences = NULL;
   3323	eb.num_fences = 0;
   3324
   3325	eb_capture_list_clear(&eb);
   3326
   3327	memset(eb.requests, 0, sizeof(struct i915_request *) *
   3328	       ARRAY_SIZE(eb.requests));
   3329	eb.composite_fence = NULL;
   3330
   3331	eb.batch_flags = 0;
   3332	if (args->flags & I915_EXEC_SECURE) {
   3333		if (GRAPHICS_VER(i915) >= 11)
   3334			return -ENODEV;
   3335
   3336		/* Return -EPERM to trigger fallback code on old binaries. */
   3337		if (!HAS_SECURE_BATCHES(i915))
   3338			return -EPERM;
   3339
   3340		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
   3341			return -EPERM;
   3342
   3343		eb.batch_flags |= I915_DISPATCH_SECURE;
   3344	}
   3345	if (args->flags & I915_EXEC_IS_PINNED)
   3346		eb.batch_flags |= I915_DISPATCH_PINNED;
   3347
   3348	err = parse_execbuf2_extensions(args, &eb);
   3349	if (err)
   3350		goto err_ext;
   3351
   3352	err = add_fence_array(&eb);
   3353	if (err)
   3354		goto err_ext;
   3355
   3356#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT)
   3357	if (args->flags & IN_FENCES) {
   3358		if ((args->flags & IN_FENCES) == IN_FENCES)
   3359			return -EINVAL;
   3360
   3361		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
   3362		if (!in_fence) {
   3363			err = -EINVAL;
   3364			goto err_ext;
   3365		}
   3366	}
   3367#undef IN_FENCES
   3368
   3369	if (args->flags & I915_EXEC_FENCE_OUT) {
   3370		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
   3371		if (out_fence_fd < 0) {
   3372			err = out_fence_fd;
   3373			goto err_in_fence;
   3374		}
   3375	}
   3376
   3377	err = eb_create(&eb);
   3378	if (err)
   3379		goto err_out_fence;
   3380
   3381	GEM_BUG_ON(!eb.lut_size);
   3382
   3383	err = eb_select_context(&eb);
   3384	if (unlikely(err))
   3385		goto err_destroy;
   3386
   3387	err = eb_select_engine(&eb);
   3388	if (unlikely(err))
   3389		goto err_context;
   3390
   3391	err = eb_lookup_vmas(&eb);
   3392	if (err) {
   3393		eb_release_vmas(&eb, true);
   3394		goto err_engine;
   3395	}
   3396
   3397	i915_gem_ww_ctx_init(&eb.ww, true);
   3398
   3399	err = eb_relocate_parse(&eb);
   3400	if (err) {
   3401		/*
   3402		 * If the user expects the execobject.offset and
   3403		 * reloc.presumed_offset to be an exact match,
   3404		 * as for using NO_RELOC, then we cannot update
   3405		 * the execobject.offset until we have completed
   3406		 * relocation.
   3407		 */
   3408		args->flags &= ~__EXEC_HAS_RELOC;
   3409		goto err_vma;
   3410	}
   3411
   3412	ww_acquire_done(&eb.ww.ctx);
   3413	eb_capture_stage(&eb);
   3414
   3415	out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
   3416	if (IS_ERR(out_fence)) {
   3417		err = PTR_ERR(out_fence);
   3418		out_fence = NULL;
   3419		if (eb.requests[0])
   3420			goto err_request;
   3421		else
   3422			goto err_vma;
   3423	}
   3424
   3425	err = eb_submit(&eb);
   3426
   3427err_request:
   3428	eb_requests_get(&eb);
   3429	err = eb_requests_add(&eb, err);
   3430
   3431	if (eb.fences)
   3432		signal_fence_array(&eb, eb.composite_fence ?
   3433				   eb.composite_fence :
   3434				   &eb.requests[0]->fence);
   3435
   3436	if (out_fence) {
   3437		if (err == 0) {
   3438			fd_install(out_fence_fd, out_fence->file);
   3439			args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
   3440			args->rsvd2 |= (u64)out_fence_fd << 32;
   3441			out_fence_fd = -1;
   3442		} else {
   3443			fput(out_fence->file);
   3444		}
   3445	}
   3446
   3447	if (unlikely(eb.gem_context->syncobj)) {
   3448		drm_syncobj_replace_fence(eb.gem_context->syncobj,
   3449					  eb.composite_fence ?
   3450					  eb.composite_fence :
   3451					  &eb.requests[0]->fence);
   3452	}
   3453
   3454	if (!out_fence && eb.composite_fence)
   3455		dma_fence_put(eb.composite_fence);
   3456
   3457	eb_requests_put(&eb);
   3458
   3459err_vma:
   3460	eb_release_vmas(&eb, true);
   3461	WARN_ON(err == -EDEADLK);
   3462	i915_gem_ww_ctx_fini(&eb.ww);
   3463
   3464	if (eb.batch_pool)
   3465		intel_gt_buffer_pool_put(eb.batch_pool);
   3466err_engine:
   3467	eb_put_engine(&eb);
   3468err_context:
   3469	i915_gem_context_put(eb.gem_context);
   3470err_destroy:
   3471	eb_destroy(&eb);
   3472err_out_fence:
   3473	if (out_fence_fd != -1)
   3474		put_unused_fd(out_fence_fd);
   3475err_in_fence:
   3476	dma_fence_put(in_fence);
   3477err_ext:
   3478	put_fence_array(eb.fences, eb.num_fences);
   3479	return err;
   3480}
   3481
   3482static size_t eb_element_size(void)
   3483{
   3484	return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
   3485}
   3486
   3487static bool check_buffer_count(size_t count)
   3488{
   3489	const size_t sz = eb_element_size();
   3490
   3491	/*
   3492	 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
   3493	 * array size (see eb_create()). Otherwise, we can accept an array as
   3494	 * large as can be addressed (though use large arrays at your peril)!
   3495	 */
   3496
   3497	return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
   3498}
   3499
   3500int
   3501i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
   3502			   struct drm_file *file)
   3503{
   3504	struct drm_i915_private *i915 = to_i915(dev);
   3505	struct drm_i915_gem_execbuffer2 *args = data;
   3506	struct drm_i915_gem_exec_object2 *exec2_list;
   3507	const size_t count = args->buffer_count;
   3508	int err;
   3509
   3510	if (!check_buffer_count(count)) {
   3511		drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count);
   3512		return -EINVAL;
   3513	}
   3514
   3515	err = i915_gem_check_execbuffer(args);
   3516	if (err)
   3517		return err;
   3518
   3519	/* Allocate extra slots for use by the command parser */
   3520	exec2_list = kvmalloc_array(count + 2, eb_element_size(),
   3521				    __GFP_NOWARN | GFP_KERNEL);
   3522	if (exec2_list == NULL) {
   3523		drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
   3524			count);
   3525		return -ENOMEM;
   3526	}
   3527	if (copy_from_user(exec2_list,
   3528			   u64_to_user_ptr(args->buffers_ptr),
   3529			   sizeof(*exec2_list) * count)) {
   3530		drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count);
   3531		kvfree(exec2_list);
   3532		return -EFAULT;
   3533	}
   3534
   3535	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
   3536
   3537	/*
   3538	 * Now that we have begun execution of the batchbuffer, we ignore
   3539	 * any new error after this point. Also given that we have already
   3540	 * updated the associated relocations, we try to write out the current
   3541	 * object locations irrespective of any error.
   3542	 */
   3543	if (args->flags & __EXEC_HAS_RELOC) {
   3544		struct drm_i915_gem_exec_object2 __user *user_exec_list =
   3545			u64_to_user_ptr(args->buffers_ptr);
   3546		unsigned int i;
   3547
   3548		/* Copy the new buffer offsets back to the user's exec list. */
   3549		/*
   3550		 * Note: count * sizeof(*user_exec_list) does not overflow,
   3551		 * because we checked 'count' in check_buffer_count().
   3552		 *
   3553		 * And this range already got effectively checked earlier
   3554		 * when we did the "copy_from_user()" above.
   3555		 */
   3556		if (!user_write_access_begin(user_exec_list,
   3557					     count * sizeof(*user_exec_list)))
   3558			goto end;
   3559
   3560		for (i = 0; i < args->buffer_count; i++) {
   3561			if (!(exec2_list[i].offset & UPDATE))
   3562				continue;
   3563
   3564			exec2_list[i].offset =
   3565				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
   3566			unsafe_put_user(exec2_list[i].offset,
   3567					&user_exec_list[i].offset,
   3568					end_user);
   3569		}
   3570end_user:
   3571		user_write_access_end();
   3572end:;
   3573	}
   3574
   3575	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
   3576	kvfree(exec2_list);
   3577	return err;
   3578}