cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

i915_gem_domain.c (19713B)


      1/*
      2 * SPDX-License-Identifier: MIT
      3 *
      4 * Copyright © 2014-2016 Intel Corporation
      5 */
      6
      7#include "display/intel_frontbuffer.h"
      8#include "gt/intel_gt.h"
      9
     10#include "i915_drv.h"
     11#include "i915_gem_clflush.h"
     12#include "i915_gem_domain.h"
     13#include "i915_gem_gtt.h"
     14#include "i915_gem_ioctls.h"
     15#include "i915_gem_lmem.h"
     16#include "i915_gem_mman.h"
     17#include "i915_gem_object.h"
     18#include "i915_vma.h"
     19
     20static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
     21{
     22	struct drm_i915_private *i915 = to_i915(obj->base.dev);
     23
     24	if (IS_DGFX(i915))
     25		return false;
     26
     27	return !(obj->cache_level == I915_CACHE_NONE ||
     28		 obj->cache_level == I915_CACHE_WT);
     29}
     30
     31bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
     32{
     33	struct drm_i915_private *i915 = to_i915(obj->base.dev);
     34
     35	if (obj->cache_dirty)
     36		return false;
     37
     38	if (IS_DGFX(i915))
     39		return false;
     40
     41	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
     42		return true;
     43
     44	/* Currently in use by HW (display engine)? Keep flushed. */
     45	return i915_gem_object_is_framebuffer(obj);
     46}
     47
     48static void
     49flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
     50{
     51	struct i915_vma *vma;
     52
     53	assert_object_held(obj);
     54
     55	if (!(obj->write_domain & flush_domains))
     56		return;
     57
     58	switch (obj->write_domain) {
     59	case I915_GEM_DOMAIN_GTT:
     60		spin_lock(&obj->vma.lock);
     61		for_each_ggtt_vma(vma, obj) {
     62			if (i915_vma_unset_ggtt_write(vma))
     63				intel_gt_flush_ggtt_writes(vma->vm->gt);
     64		}
     65		spin_unlock(&obj->vma.lock);
     66
     67		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
     68		break;
     69
     70	case I915_GEM_DOMAIN_WC:
     71		wmb();
     72		break;
     73
     74	case I915_GEM_DOMAIN_CPU:
     75		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
     76		break;
     77
     78	case I915_GEM_DOMAIN_RENDER:
     79		if (gpu_write_needs_clflush(obj))
     80			obj->cache_dirty = true;
     81		break;
     82	}
     83
     84	obj->write_domain = 0;
     85}
     86
     87static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
     88{
     89	/*
     90	 * We manually flush the CPU domain so that we can override and
     91	 * force the flush for the display, and perform it asyncrhonously.
     92	 */
     93	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
     94	if (obj->cache_dirty)
     95		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
     96	obj->write_domain = 0;
     97}
     98
     99void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
    100{
    101	if (!i915_gem_object_is_framebuffer(obj))
    102		return;
    103
    104	i915_gem_object_lock(obj, NULL);
    105	__i915_gem_object_flush_for_display(obj);
    106	i915_gem_object_unlock(obj);
    107}
    108
    109void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
    110{
    111	if (i915_gem_object_is_framebuffer(obj))
    112		__i915_gem_object_flush_for_display(obj);
    113}
    114
    115/**
    116 * Moves a single object to the WC read, and possibly write domain.
    117 * @obj: object to act on
    118 * @write: ask for write access or read only
    119 *
    120 * This function returns when the move is complete, including waiting on
    121 * flushes to occur.
    122 */
    123int
    124i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
    125{
    126	int ret;
    127
    128	assert_object_held(obj);
    129
    130	ret = i915_gem_object_wait(obj,
    131				   I915_WAIT_INTERRUPTIBLE |
    132				   (write ? I915_WAIT_ALL : 0),
    133				   MAX_SCHEDULE_TIMEOUT);
    134	if (ret)
    135		return ret;
    136
    137	if (obj->write_domain == I915_GEM_DOMAIN_WC)
    138		return 0;
    139
    140	/* Flush and acquire obj->pages so that we are coherent through
    141	 * direct access in memory with previous cached writes through
    142	 * shmemfs and that our cache domain tracking remains valid.
    143	 * For example, if the obj->filp was moved to swap without us
    144	 * being notified and releasing the pages, we would mistakenly
    145	 * continue to assume that the obj remained out of the CPU cached
    146	 * domain.
    147	 */
    148	ret = i915_gem_object_pin_pages(obj);
    149	if (ret)
    150		return ret;
    151
    152	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
    153
    154	/* Serialise direct access to this object with the barriers for
    155	 * coherent writes from the GPU, by effectively invalidating the
    156	 * WC domain upon first access.
    157	 */
    158	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
    159		mb();
    160
    161	/* It should now be out of any other write domains, and we can update
    162	 * the domain values for our changes.
    163	 */
    164	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
    165	obj->read_domains |= I915_GEM_DOMAIN_WC;
    166	if (write) {
    167		obj->read_domains = I915_GEM_DOMAIN_WC;
    168		obj->write_domain = I915_GEM_DOMAIN_WC;
    169		obj->mm.dirty = true;
    170	}
    171
    172	i915_gem_object_unpin_pages(obj);
    173	return 0;
    174}
    175
    176/**
    177 * Moves a single object to the GTT read, and possibly write domain.
    178 * @obj: object to act on
    179 * @write: ask for write access or read only
    180 *
    181 * This function returns when the move is complete, including waiting on
    182 * flushes to occur.
    183 */
    184int
    185i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
    186{
    187	int ret;
    188
    189	assert_object_held(obj);
    190
    191	ret = i915_gem_object_wait(obj,
    192				   I915_WAIT_INTERRUPTIBLE |
    193				   (write ? I915_WAIT_ALL : 0),
    194				   MAX_SCHEDULE_TIMEOUT);
    195	if (ret)
    196		return ret;
    197
    198	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
    199		return 0;
    200
    201	/* Flush and acquire obj->pages so that we are coherent through
    202	 * direct access in memory with previous cached writes through
    203	 * shmemfs and that our cache domain tracking remains valid.
    204	 * For example, if the obj->filp was moved to swap without us
    205	 * being notified and releasing the pages, we would mistakenly
    206	 * continue to assume that the obj remained out of the CPU cached
    207	 * domain.
    208	 */
    209	ret = i915_gem_object_pin_pages(obj);
    210	if (ret)
    211		return ret;
    212
    213	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
    214
    215	/* Serialise direct access to this object with the barriers for
    216	 * coherent writes from the GPU, by effectively invalidating the
    217	 * GTT domain upon first access.
    218	 */
    219	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
    220		mb();
    221
    222	/* It should now be out of any other write domains, and we can update
    223	 * the domain values for our changes.
    224	 */
    225	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
    226	obj->read_domains |= I915_GEM_DOMAIN_GTT;
    227	if (write) {
    228		struct i915_vma *vma;
    229
    230		obj->read_domains = I915_GEM_DOMAIN_GTT;
    231		obj->write_domain = I915_GEM_DOMAIN_GTT;
    232		obj->mm.dirty = true;
    233
    234		spin_lock(&obj->vma.lock);
    235		for_each_ggtt_vma(vma, obj)
    236			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
    237				i915_vma_set_ggtt_write(vma);
    238		spin_unlock(&obj->vma.lock);
    239	}
    240
    241	i915_gem_object_unpin_pages(obj);
    242	return 0;
    243}
    244
    245/**
    246 * Changes the cache-level of an object across all VMA.
    247 * @obj: object to act on
    248 * @cache_level: new cache level to set for the object
    249 *
    250 * After this function returns, the object will be in the new cache-level
    251 * across all GTT and the contents of the backing storage will be coherent,
    252 * with respect to the new cache-level. In order to keep the backing storage
    253 * coherent for all users, we only allow a single cache level to be set
    254 * globally on the object and prevent it from being changed whilst the
    255 * hardware is reading from the object. That is if the object is currently
    256 * on the scanout it will be set to uncached (or equivalent display
    257 * cache coherency) and all non-MOCS GPU access will also be uncached so
    258 * that all direct access to the scanout remains coherent.
    259 */
    260int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
    261				    enum i915_cache_level cache_level)
    262{
    263	int ret;
    264
    265	if (obj->cache_level == cache_level)
    266		return 0;
    267
    268	ret = i915_gem_object_wait(obj,
    269				   I915_WAIT_INTERRUPTIBLE |
    270				   I915_WAIT_ALL,
    271				   MAX_SCHEDULE_TIMEOUT);
    272	if (ret)
    273		return ret;
    274
    275	/* Always invalidate stale cachelines */
    276	if (obj->cache_level != cache_level) {
    277		i915_gem_object_set_cache_coherency(obj, cache_level);
    278		obj->cache_dirty = true;
    279	}
    280
    281	/* The cache-level will be applied when each vma is rebound. */
    282	return i915_gem_object_unbind(obj,
    283				      I915_GEM_OBJECT_UNBIND_ACTIVE |
    284				      I915_GEM_OBJECT_UNBIND_BARRIER);
    285}
    286
    287int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
    288			       struct drm_file *file)
    289{
    290	struct drm_i915_gem_caching *args = data;
    291	struct drm_i915_gem_object *obj;
    292	int err = 0;
    293
    294	if (IS_DGFX(to_i915(dev)))
    295		return -ENODEV;
    296
    297	rcu_read_lock();
    298	obj = i915_gem_object_lookup_rcu(file, args->handle);
    299	if (!obj) {
    300		err = -ENOENT;
    301		goto out;
    302	}
    303
    304	switch (obj->cache_level) {
    305	case I915_CACHE_LLC:
    306	case I915_CACHE_L3_LLC:
    307		args->caching = I915_CACHING_CACHED;
    308		break;
    309
    310	case I915_CACHE_WT:
    311		args->caching = I915_CACHING_DISPLAY;
    312		break;
    313
    314	default:
    315		args->caching = I915_CACHING_NONE;
    316		break;
    317	}
    318out:
    319	rcu_read_unlock();
    320	return err;
    321}
    322
    323int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
    324			       struct drm_file *file)
    325{
    326	struct drm_i915_private *i915 = to_i915(dev);
    327	struct drm_i915_gem_caching *args = data;
    328	struct drm_i915_gem_object *obj;
    329	enum i915_cache_level level;
    330	int ret = 0;
    331
    332	if (IS_DGFX(i915))
    333		return -ENODEV;
    334
    335	switch (args->caching) {
    336	case I915_CACHING_NONE:
    337		level = I915_CACHE_NONE;
    338		break;
    339	case I915_CACHING_CACHED:
    340		/*
    341		 * Due to a HW issue on BXT A stepping, GPU stores via a
    342		 * snooped mapping may leave stale data in a corresponding CPU
    343		 * cacheline, whereas normally such cachelines would get
    344		 * invalidated.
    345		 */
    346		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
    347			return -ENODEV;
    348
    349		level = I915_CACHE_LLC;
    350		break;
    351	case I915_CACHING_DISPLAY:
    352		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
    353		break;
    354	default:
    355		return -EINVAL;
    356	}
    357
    358	obj = i915_gem_object_lookup(file, args->handle);
    359	if (!obj)
    360		return -ENOENT;
    361
    362	/*
    363	 * The caching mode of proxy object is handled by its generator, and
    364	 * not allowed to be changed by userspace.
    365	 */
    366	if (i915_gem_object_is_proxy(obj)) {
    367		/*
    368		 * Silently allow cached for userptr; the vulkan driver
    369		 * sets all objects to cached
    370		 */
    371		if (!i915_gem_object_is_userptr(obj) ||
    372		    args->caching != I915_CACHING_CACHED)
    373			ret = -ENXIO;
    374
    375		goto out;
    376	}
    377
    378	ret = i915_gem_object_lock_interruptible(obj, NULL);
    379	if (ret)
    380		goto out;
    381
    382	ret = i915_gem_object_set_cache_level(obj, level);
    383	i915_gem_object_unlock(obj);
    384
    385out:
    386	i915_gem_object_put(obj);
    387	return ret;
    388}
    389
    390/*
    391 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
    392 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
    393 * (for pageflips). We only flush the caches while preparing the buffer for
    394 * display, the callers are responsible for frontbuffer flush.
    395 */
    396struct i915_vma *
    397i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
    398				     struct i915_gem_ww_ctx *ww,
    399				     u32 alignment,
    400				     const struct i915_ggtt_view *view,
    401				     unsigned int flags)
    402{
    403	struct drm_i915_private *i915 = to_i915(obj->base.dev);
    404	struct i915_vma *vma;
    405	int ret;
    406
    407	/* Frame buffer must be in LMEM */
    408	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
    409		return ERR_PTR(-EINVAL);
    410
    411	/*
    412	 * The display engine is not coherent with the LLC cache on gen6.  As
    413	 * a result, we make sure that the pinning that is about to occur is
    414	 * done with uncached PTEs. This is lowest common denominator for all
    415	 * chipsets.
    416	 *
    417	 * However for gen6+, we could do better by using the GFDT bit instead
    418	 * of uncaching, which would allow us to flush all the LLC-cached data
    419	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
    420	 */
    421	ret = i915_gem_object_set_cache_level(obj,
    422					      HAS_WT(i915) ?
    423					      I915_CACHE_WT : I915_CACHE_NONE);
    424	if (ret)
    425		return ERR_PTR(ret);
    426
    427	/*
    428	 * As the user may map the buffer once pinned in the display plane
    429	 * (e.g. libkms for the bootup splash), we have to ensure that we
    430	 * always use map_and_fenceable for all scanout buffers. However,
    431	 * it may simply be too big to fit into mappable, in which case
    432	 * put it anyway and hope that userspace can cope (but always first
    433	 * try to preserve the existing ABI).
    434	 */
    435	vma = ERR_PTR(-ENOSPC);
    436	if ((flags & PIN_MAPPABLE) == 0 &&
    437	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
    438		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
    439						  flags | PIN_MAPPABLE |
    440						  PIN_NONBLOCK);
    441	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
    442		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
    443						  alignment, flags);
    444	if (IS_ERR(vma))
    445		return vma;
    446
    447	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
    448	i915_vma_mark_scanout(vma);
    449
    450	i915_gem_object_flush_if_display_locked(obj);
    451
    452	return vma;
    453}
    454
    455/**
    456 * Moves a single object to the CPU read, and possibly write domain.
    457 * @obj: object to act on
    458 * @write: requesting write or read-only access
    459 *
    460 * This function returns when the move is complete, including waiting on
    461 * flushes to occur.
    462 */
    463int
    464i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
    465{
    466	int ret;
    467
    468	assert_object_held(obj);
    469
    470	ret = i915_gem_object_wait(obj,
    471				   I915_WAIT_INTERRUPTIBLE |
    472				   (write ? I915_WAIT_ALL : 0),
    473				   MAX_SCHEDULE_TIMEOUT);
    474	if (ret)
    475		return ret;
    476
    477	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
    478
    479	/* Flush the CPU cache if it's still invalid. */
    480	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
    481		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
    482		obj->read_domains |= I915_GEM_DOMAIN_CPU;
    483	}
    484
    485	/* It should now be out of any other write domains, and we can update
    486	 * the domain values for our changes.
    487	 */
    488	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
    489
    490	/* If we're writing through the CPU, then the GPU read domains will
    491	 * need to be invalidated at next use.
    492	 */
    493	if (write)
    494		__start_cpu_write(obj);
    495
    496	return 0;
    497}
    498
    499/**
    500 * Called when user space prepares to use an object with the CPU, either
    501 * through the mmap ioctl's mapping or a GTT mapping.
    502 * @dev: drm device
    503 * @data: ioctl data blob
    504 * @file: drm file
    505 */
    506int
    507i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
    508			  struct drm_file *file)
    509{
    510	struct drm_i915_gem_set_domain *args = data;
    511	struct drm_i915_gem_object *obj;
    512	u32 read_domains = args->read_domains;
    513	u32 write_domain = args->write_domain;
    514	int err;
    515
    516	if (IS_DGFX(to_i915(dev)))
    517		return -ENODEV;
    518
    519	/* Only handle setting domains to types used by the CPU. */
    520	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
    521		return -EINVAL;
    522
    523	/*
    524	 * Having something in the write domain implies it's in the read
    525	 * domain, and only that read domain.  Enforce that in the request.
    526	 */
    527	if (write_domain && read_domains != write_domain)
    528		return -EINVAL;
    529
    530	if (!read_domains)
    531		return 0;
    532
    533	obj = i915_gem_object_lookup(file, args->handle);
    534	if (!obj)
    535		return -ENOENT;
    536
    537	/*
    538	 * Try to flush the object off the GPU without holding the lock.
    539	 * We will repeat the flush holding the lock in the normal manner
    540	 * to catch cases where we are gazumped.
    541	 */
    542	err = i915_gem_object_wait(obj,
    543				   I915_WAIT_INTERRUPTIBLE |
    544				   I915_WAIT_PRIORITY |
    545				   (write_domain ? I915_WAIT_ALL : 0),
    546				   MAX_SCHEDULE_TIMEOUT);
    547	if (err)
    548		goto out;
    549
    550	if (i915_gem_object_is_userptr(obj)) {
    551		/*
    552		 * Try to grab userptr pages, iris uses set_domain to check
    553		 * userptr validity
    554		 */
    555		err = i915_gem_object_userptr_validate(obj);
    556		if (!err)
    557			err = i915_gem_object_wait(obj,
    558						   I915_WAIT_INTERRUPTIBLE |
    559						   I915_WAIT_PRIORITY |
    560						   (write_domain ? I915_WAIT_ALL : 0),
    561						   MAX_SCHEDULE_TIMEOUT);
    562		goto out;
    563	}
    564
    565	/*
    566	 * Proxy objects do not control access to the backing storage, ergo
    567	 * they cannot be used as a means to manipulate the cache domain
    568	 * tracking for that backing storage. The proxy object is always
    569	 * considered to be outside of any cache domain.
    570	 */
    571	if (i915_gem_object_is_proxy(obj)) {
    572		err = -ENXIO;
    573		goto out;
    574	}
    575
    576	err = i915_gem_object_lock_interruptible(obj, NULL);
    577	if (err)
    578		goto out;
    579
    580	/*
    581	 * Flush and acquire obj->pages so that we are coherent through
    582	 * direct access in memory with previous cached writes through
    583	 * shmemfs and that our cache domain tracking remains valid.
    584	 * For example, if the obj->filp was moved to swap without us
    585	 * being notified and releasing the pages, we would mistakenly
    586	 * continue to assume that the obj remained out of the CPU cached
    587	 * domain.
    588	 */
    589	err = i915_gem_object_pin_pages(obj);
    590	if (err)
    591		goto out_unlock;
    592
    593	/*
    594	 * Already in the desired write domain? Nothing for us to do!
    595	 *
    596	 * We apply a little bit of cunning here to catch a broader set of
    597	 * no-ops. If obj->write_domain is set, we must be in the same
    598	 * obj->read_domains, and only that domain. Therefore, if that
    599	 * obj->write_domain matches the request read_domains, we are
    600	 * already in the same read/write domain and can skip the operation,
    601	 * without having to further check the requested write_domain.
    602	 */
    603	if (READ_ONCE(obj->write_domain) == read_domains)
    604		goto out_unpin;
    605
    606	if (read_domains & I915_GEM_DOMAIN_WC)
    607		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
    608	else if (read_domains & I915_GEM_DOMAIN_GTT)
    609		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
    610	else
    611		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
    612
    613out_unpin:
    614	i915_gem_object_unpin_pages(obj);
    615
    616out_unlock:
    617	i915_gem_object_unlock(obj);
    618
    619	if (!err && write_domain)
    620		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
    621
    622out:
    623	i915_gem_object_put(obj);
    624	return err;
    625}
    626
    627/*
    628 * Pins the specified object's pages and synchronizes the object with
    629 * GPU accesses. Sets needs_clflush to non-zero if the caller should
    630 * flush the object from the CPU cache.
    631 */
    632int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
    633				 unsigned int *needs_clflush)
    634{
    635	int ret;
    636
    637	*needs_clflush = 0;
    638	if (!i915_gem_object_has_struct_page(obj))
    639		return -ENODEV;
    640
    641	assert_object_held(obj);
    642
    643	ret = i915_gem_object_wait(obj,
    644				   I915_WAIT_INTERRUPTIBLE,
    645				   MAX_SCHEDULE_TIMEOUT);
    646	if (ret)
    647		return ret;
    648
    649	ret = i915_gem_object_pin_pages(obj);
    650	if (ret)
    651		return ret;
    652
    653	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
    654	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
    655		ret = i915_gem_object_set_to_cpu_domain(obj, false);
    656		if (ret)
    657			goto err_unpin;
    658		else
    659			goto out;
    660	}
    661
    662	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
    663
    664	/* If we're not in the cpu read domain, set ourself into the gtt
    665	 * read domain and manually flush cachelines (if required). This
    666	 * optimizes for the case when the gpu will dirty the data
    667	 * anyway again before the next pread happens.
    668	 */
    669	if (!obj->cache_dirty &&
    670	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
    671		*needs_clflush = CLFLUSH_BEFORE;
    672
    673out:
    674	/* return with the pages pinned */
    675	return 0;
    676
    677err_unpin:
    678	i915_gem_object_unpin_pages(obj);
    679	return ret;
    680}
    681
    682int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
    683				  unsigned int *needs_clflush)
    684{
    685	int ret;
    686
    687	*needs_clflush = 0;
    688	if (!i915_gem_object_has_struct_page(obj))
    689		return -ENODEV;
    690
    691	assert_object_held(obj);
    692
    693	ret = i915_gem_object_wait(obj,
    694				   I915_WAIT_INTERRUPTIBLE |
    695				   I915_WAIT_ALL,
    696				   MAX_SCHEDULE_TIMEOUT);
    697	if (ret)
    698		return ret;
    699
    700	ret = i915_gem_object_pin_pages(obj);
    701	if (ret)
    702		return ret;
    703
    704	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
    705	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
    706		ret = i915_gem_object_set_to_cpu_domain(obj, true);
    707		if (ret)
    708			goto err_unpin;
    709		else
    710			goto out;
    711	}
    712
    713	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
    714
    715	/* If we're not in the cpu write domain, set ourself into the
    716	 * gtt write domain and manually flush cachelines (as required).
    717	 * This optimizes for the case when the gpu will use the data
    718	 * right away and we therefore have to clflush anyway.
    719	 */
    720	if (!obj->cache_dirty) {
    721		*needs_clflush |= CLFLUSH_AFTER;
    722
    723		/*
    724		 * Same trick applies to invalidate partially written
    725		 * cachelines read before writing.
    726		 */
    727		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
    728			*needs_clflush |= CLFLUSH_BEFORE;
    729	}
    730
    731out:
    732	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
    733	obj->mm.dirty = true;
    734	/* return with the pages pinned */
    735	return 0;
    736
    737err_unpin:
    738	i915_gem_object_unpin_pages(obj);
    739	return ret;
    740}