i915_gem_shrinker.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
i915_gem_shrinker.c (17707B)
      1/*
      2 * SPDX-License-Identifier: MIT
      3 *
      4 * Copyright © 2008-2015 Intel Corporation
      5 */
      6
      7#include <linux/oom.h>
      8#include <linux/sched/mm.h>
      9#include <linux/shmem_fs.h>
     10#include <linux/slab.h>
     11#include <linux/swap.h>
     12#include <linux/pci.h>
     13#include <linux/dma-buf.h>
     14#include <linux/vmalloc.h>
     15
     16#include "gt/intel_gt_requests.h"
     17
     18#include "i915_trace.h"
     19
     20static bool swap_available(void)
     21{
     22	return get_nr_swap_pages() > 0;
     23}
     24
     25static bool can_release_pages(struct drm_i915_gem_object *obj)
     26{
     27	/* Consider only shrinkable ojects. */
     28	if (!i915_gem_object_is_shrinkable(obj))
     29		return false;
     30
     31	/*
     32	 * We can only return physical pages to the system if we can either
     33	 * discard the contents (because the user has marked them as being
     34	 * purgeable) or if we can move their contents out to swap.
     35	 */
     36	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
     37}
     38
     39static int drop_pages(struct drm_i915_gem_object *obj,
     40		       unsigned long shrink, bool trylock_vm)
     41{
     42	unsigned long flags;
     43
     44	flags = 0;
     45	if (shrink & I915_SHRINK_ACTIVE)
     46		flags |= I915_GEM_OBJECT_UNBIND_ACTIVE;
     47	if (!(shrink & I915_SHRINK_BOUND))
     48		flags |= I915_GEM_OBJECT_UNBIND_TEST;
     49	if (trylock_vm)
     50		flags |= I915_GEM_OBJECT_UNBIND_VM_TRYLOCK;
     51
     52	if (i915_gem_object_unbind(obj, flags) == 0)
     53		return true;
     54
     55	return false;
     56}
     57
     58static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
     59{
     60	if (obj->ops->shrink) {
     61		unsigned int shrink_flags = 0;
     62
     63		if (!(flags & I915_SHRINK_ACTIVE))
     64			shrink_flags |= I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT;
     65
     66		if (flags & I915_SHRINK_WRITEBACK)
     67			shrink_flags |= I915_GEM_OBJECT_SHRINK_WRITEBACK;
     68
     69		return obj->ops->shrink(obj, shrink_flags);
     70	}
     71
     72	return 0;
     73}
     74
     75/**
     76 * i915_gem_shrink - Shrink buffer object caches
     77 * @ww: i915 gem ww acquire ctx, or NULL
     78 * @i915: i915 device
     79 * @target: amount of memory to make available, in pages
     80 * @nr_scanned: optional output for number of pages scanned (incremental)
     81 * @shrink: control flags for selecting cache types
     82 *
     83 * This function is the main interface to the shrinker. It will try to release
     84 * up to @target pages of main memory backing storage from buffer objects.
     85 * Selection of the specific caches can be done with @flags. This is e.g. useful
     86 * when purgeable objects should be removed from caches preferentially.
     87 *
     88 * Note that it's not guaranteed that released amount is actually available as
     89 * free system memory - the pages might still be in-used to due to other reasons
     90 * (like cpu mmaps) or the mm core has reused them before we could grab them.
     91 * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to
     92 * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all().
     93 *
     94 * Also note that any kind of pinning (both per-vma address space pins and
     95 * backing storage pins at the buffer object level) result in the shrinker code
     96 * having to skip the object.
     97 *
     98 * Returns:
     99 * The number of pages of backing storage actually released.
    100 */
    101unsigned long
    102i915_gem_shrink(struct i915_gem_ww_ctx *ww,
    103		struct drm_i915_private *i915,
    104		unsigned long target,
    105		unsigned long *nr_scanned,
    106		unsigned int shrink)
    107{
    108	const struct {
    109		struct list_head *list;
    110		unsigned int bit;
    111	} phases[] = {
    112		{ &i915->mm.purge_list, ~0u },
    113		{
    114			&i915->mm.shrink_list,
    115			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND
    116		},
    117		{ NULL, 0 },
    118	}, *phase;
    119	intel_wakeref_t wakeref = 0;
    120	unsigned long count = 0;
    121	unsigned long scanned = 0;
    122	int err = 0;
    123
    124	/* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */
    125	bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915);
    126
    127	trace_i915_gem_shrink(i915, target, shrink);
    128
    129	/*
    130	 * Unbinding of objects will require HW access; Let us not wake the
    131	 * device just to recover a little memory. If absolutely necessary,
    132	 * we will force the wake during oom-notifier.
    133	 */
    134	if (shrink & I915_SHRINK_BOUND) {
    135		wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm);
    136		if (!wakeref)
    137			shrink &= ~I915_SHRINK_BOUND;
    138	}
    139
    140	/*
    141	 * When shrinking the active list, we should also consider active
    142	 * contexts. Active contexts are pinned until they are retired, and
    143	 * so can not be simply unbound to retire and unpin their pages. To
    144	 * shrink the contexts, we must wait until the gpu is idle and
    145	 * completed its switch to the kernel context. In short, we do
    146	 * not have a good mechanism for idling a specific context, but
    147	 * what we can do is give them a kick so that we do not keep idle
    148	 * contexts around longer than is necessary.
    149	 */
    150	if (shrink & I915_SHRINK_ACTIVE)
    151		/* Retire requests to unpin all idle contexts */
    152		intel_gt_retire_requests(to_gt(i915));
    153
    154	/*
    155	 * As we may completely rewrite the (un)bound list whilst unbinding
    156	 * (due to retiring requests) we have to strictly process only
    157	 * one element of the list at the time, and recheck the list
    158	 * on every iteration.
    159	 *
    160	 * In particular, we must hold a reference whilst removing the
    161	 * object as we may end up waiting for and/or retiring the objects.
    162	 * This might release the final reference (held by the active list)
    163	 * and result in the object being freed from under us. This is
    164	 * similar to the precautions the eviction code must take whilst
    165	 * removing objects.
    166	 *
    167	 * Also note that although these lists do not hold a reference to
    168	 * the object we can safely grab one here: The final object
    169	 * unreferencing and the bound_list are both protected by the
    170	 * dev->struct_mutex and so we won't ever be able to observe an
    171	 * object on the bound_list with a reference count equals 0.
    172	 */
    173	for (phase = phases; phase->list; phase++) {
    174		struct list_head still_in_list;
    175		struct drm_i915_gem_object *obj;
    176		unsigned long flags;
    177
    178		if ((shrink & phase->bit) == 0)
    179			continue;
    180
    181		INIT_LIST_HEAD(&still_in_list);
    182
    183		/*
    184		 * We serialize our access to unreferenced objects through
    185		 * the use of the struct_mutex. While the objects are not
    186		 * yet freed (due to RCU then a workqueue) we still want
    187		 * to be able to shrink their pages, so they remain on
    188		 * the unbound/bound list until actually freed.
    189		 */
    190		spin_lock_irqsave(&i915->mm.obj_lock, flags);
    191		while (count < target &&
    192		       (obj = list_first_entry_or_null(phase->list,
    193						       typeof(*obj),
    194						       mm.link))) {
    195			list_move_tail(&obj->mm.link, &still_in_list);
    196
    197			if (shrink & I915_SHRINK_VMAPS &&
    198			    !is_vmalloc_addr(obj->mm.mapping))
    199				continue;
    200
    201			if (!(shrink & I915_SHRINK_ACTIVE) &&
    202			    i915_gem_object_is_framebuffer(obj))
    203				continue;
    204
    205			if (!can_release_pages(obj))
    206				continue;
    207
    208			if (!kref_get_unless_zero(&obj->base.refcount))
    209				continue;
    210
    211			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
    212
    213			/* May arrive from get_pages on another bo */
    214			if (!ww) {
    215				if (!i915_gem_object_trylock(obj, NULL))
    216					goto skip;
    217			} else {
    218				err = i915_gem_object_lock(obj, ww);
    219				if (err)
    220					goto skip;
    221			}
    222
    223			if (drop_pages(obj, shrink, trylock_vm) &&
    224			    !__i915_gem_object_put_pages(obj) &&
    225			    !try_to_writeback(obj, shrink))
    226				count += obj->base.size >> PAGE_SHIFT;
    227
    228			if (!ww)
    229				i915_gem_object_unlock(obj);
    230
    231			scanned += obj->base.size >> PAGE_SHIFT;
    232skip:
    233			i915_gem_object_put(obj);
    234
    235			spin_lock_irqsave(&i915->mm.obj_lock, flags);
    236			if (err)
    237				break;
    238		}
    239		list_splice_tail(&still_in_list, phase->list);
    240		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
    241		if (err)
    242			break;
    243	}
    244
    245	if (shrink & I915_SHRINK_BOUND)
    246		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
    247
    248	if (err)
    249		return err;
    250
    251	if (nr_scanned)
    252		*nr_scanned += scanned;
    253	return count;
    254}
    255
    256/**
    257 * i915_gem_shrink_all - Shrink buffer object caches completely
    258 * @i915: i915 device
    259 *
    260 * This is a simple wraper around i915_gem_shrink() to aggressively shrink all
    261 * caches completely. It also first waits for and retires all outstanding
    262 * requests to also be able to release backing storage for active objects.
    263 *
    264 * This should only be used in code to intentionally quiescent the gpu or as a
    265 * last-ditch effort when memory seems to have run out.
    266 *
    267 * Returns:
    268 * The number of pages of backing storage actually released.
    269 */
    270unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
    271{
    272	intel_wakeref_t wakeref;
    273	unsigned long freed = 0;
    274
    275	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
    276		freed = i915_gem_shrink(NULL, i915, -1UL, NULL,
    277					I915_SHRINK_BOUND |
    278					I915_SHRINK_UNBOUND);
    279	}
    280
    281	return freed;
    282}
    283
    284static unsigned long
    285i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
    286{
    287	struct drm_i915_private *i915 =
    288		container_of(shrinker, struct drm_i915_private, mm.shrinker);
    289	unsigned long num_objects;
    290	unsigned long count;
    291
    292	count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT;
    293	num_objects = READ_ONCE(i915->mm.shrink_count);
    294
    295	/*
    296	 * Update our preferred vmscan batch size for the next pass.
    297	 * Our rough guess for an effective batch size is roughly 2
    298	 * available GEM objects worth of pages. That is we don't want
    299	 * the shrinker to fire, until it is worth the cost of freeing an
    300	 * entire GEM object.
    301	 */
    302	if (num_objects) {
    303		unsigned long avg = 2 * count / num_objects;
    304
    305		i915->mm.shrinker.batch =
    306			max((i915->mm.shrinker.batch + avg) >> 1,
    307			    128ul /* default SHRINK_BATCH */);
    308	}
    309
    310	return count;
    311}
    312
    313static unsigned long
    314i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
    315{
    316	struct drm_i915_private *i915 =
    317		container_of(shrinker, struct drm_i915_private, mm.shrinker);
    318	unsigned long freed;
    319
    320	sc->nr_scanned = 0;
    321
    322	freed = i915_gem_shrink(NULL, i915,
    323				sc->nr_to_scan,
    324				&sc->nr_scanned,
    325				I915_SHRINK_BOUND |
    326				I915_SHRINK_UNBOUND);
    327	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
    328		intel_wakeref_t wakeref;
    329
    330		with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
    331			freed += i915_gem_shrink(NULL, i915,
    332						 sc->nr_to_scan - sc->nr_scanned,
    333						 &sc->nr_scanned,
    334						 I915_SHRINK_ACTIVE |
    335						 I915_SHRINK_BOUND |
    336						 I915_SHRINK_UNBOUND |
    337						 I915_SHRINK_WRITEBACK);
    338		}
    339	}
    340
    341	return sc->nr_scanned ? freed : SHRINK_STOP;
    342}
    343
    344static int
    345i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
    346{
    347	struct drm_i915_private *i915 =
    348		container_of(nb, struct drm_i915_private, mm.oom_notifier);
    349	struct drm_i915_gem_object *obj;
    350	unsigned long unevictable, available, freed_pages;
    351	intel_wakeref_t wakeref;
    352	unsigned long flags;
    353
    354	freed_pages = 0;
    355	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
    356		freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL,
    357					       I915_SHRINK_BOUND |
    358					       I915_SHRINK_UNBOUND |
    359					       I915_SHRINK_WRITEBACK);
    360
    361	/* Because we may be allocating inside our own driver, we cannot
    362	 * assert that there are no objects with pinned pages that are not
    363	 * being pointed to by hardware.
    364	 */
    365	available = unevictable = 0;
    366	spin_lock_irqsave(&i915->mm.obj_lock, flags);
    367	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
    368		if (!can_release_pages(obj))
    369			unevictable += obj->base.size >> PAGE_SHIFT;
    370		else
    371			available += obj->base.size >> PAGE_SHIFT;
    372	}
    373	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
    374
    375	if (freed_pages || available)
    376		pr_info("Purging GPU memory, %lu pages freed, "
    377			"%lu pages still pinned, %lu pages left available.\n",
    378			freed_pages, unevictable, available);
    379
    380	*(unsigned long *)ptr += freed_pages;
    381	return NOTIFY_DONE;
    382}
    383
    384static int
    385i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
    386{
    387	struct drm_i915_private *i915 =
    388		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
    389	struct i915_vma *vma, *next;
    390	unsigned long freed_pages = 0;
    391	intel_wakeref_t wakeref;
    392
    393	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
    394		freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL,
    395					       I915_SHRINK_BOUND |
    396					       I915_SHRINK_UNBOUND |
    397					       I915_SHRINK_VMAPS);
    398
    399	/* We also want to clear any cached iomaps as they wrap vmap */
    400	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
    401	list_for_each_entry_safe(vma, next,
    402				 &to_gt(i915)->ggtt->vm.bound_list, vm_link) {
    403		unsigned long count = vma->node.size >> PAGE_SHIFT;
    404		struct drm_i915_gem_object *obj = vma->obj;
    405
    406		if (!vma->iomap || i915_vma_is_active(vma))
    407			continue;
    408
    409		if (!i915_gem_object_trylock(obj, NULL))
    410			continue;
    411
    412		if (__i915_vma_unbind(vma) == 0)
    413			freed_pages += count;
    414
    415		i915_gem_object_unlock(obj);
    416	}
    417	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
    418
    419	*(unsigned long *)ptr += freed_pages;
    420	return NOTIFY_DONE;
    421}
    422
    423void i915_gem_driver_register__shrinker(struct drm_i915_private *i915)
    424{
    425	i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
    426	i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
    427	i915->mm.shrinker.seeks = DEFAULT_SEEKS;
    428	i915->mm.shrinker.batch = 4096;
    429	drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker));
    430
    431	i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
    432	drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier));
    433
    434	i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
    435	drm_WARN_ON(&i915->drm,
    436		    register_vmap_purge_notifier(&i915->mm.vmap_notifier));
    437}
    438
    439void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915)
    440{
    441	drm_WARN_ON(&i915->drm,
    442		    unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
    443	drm_WARN_ON(&i915->drm,
    444		    unregister_oom_notifier(&i915->mm.oom_notifier));
    445	unregister_shrinker(&i915->mm.shrinker);
    446}
    447
    448void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
    449				    struct mutex *mutex)
    450{
    451	if (!IS_ENABLED(CONFIG_LOCKDEP))
    452		return;
    453
    454	fs_reclaim_acquire(GFP_KERNEL);
    455
    456	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
    457	mutex_release(&mutex->dep_map, _RET_IP_);
    458
    459	fs_reclaim_release(GFP_KERNEL);
    460}
    461
    462#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
    463
    464/**
    465 * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
    466 * default all object types that support shrinking(see IS_SHRINKABLE), will also
    467 * make the object visible to the shrinker after allocating the system memory
    468 * pages.
    469 * @obj: The GEM object.
    470 *
    471 * This is typically used for special kernel internal objects that can't be
    472 * easily processed by the shrinker, like if they are perma-pinned.
    473 */
    474void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
    475{
    476	struct drm_i915_private *i915 = obj_to_i915(obj);
    477	unsigned long flags;
    478
    479	/*
    480	 * We can only be called while the pages are pinned or when
    481	 * the pages are released. If pinned, we should only be called
    482	 * from a single caller under controlled conditions; and on release
    483	 * only one caller may release us. Neither the two may cross.
    484	 */
    485	if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0))
    486		return;
    487
    488	spin_lock_irqsave(&i915->mm.obj_lock, flags);
    489	if (!atomic_fetch_inc(&obj->mm.shrink_pin) &&
    490	    !list_empty(&obj->mm.link)) {
    491		list_del_init(&obj->mm.link);
    492		i915->mm.shrink_count--;
    493		i915->mm.shrink_memory -= obj->base.size;
    494	}
    495	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
    496}
    497
    498static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
    499					       struct list_head *head)
    500{
    501	struct drm_i915_private *i915 = obj_to_i915(obj);
    502	unsigned long flags;
    503
    504	if (!i915_gem_object_is_shrinkable(obj))
    505		return;
    506
    507	if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1))
    508		return;
    509
    510	spin_lock_irqsave(&i915->mm.obj_lock, flags);
    511	GEM_BUG_ON(!kref_read(&obj->base.refcount));
    512	if (atomic_dec_and_test(&obj->mm.shrink_pin)) {
    513		GEM_BUG_ON(!list_empty(&obj->mm.link));
    514
    515		list_add_tail(&obj->mm.link, head);
    516		i915->mm.shrink_count++;
    517		i915->mm.shrink_memory += obj->base.size;
    518
    519	}
    520	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
    521}
    522
    523/**
    524 * __i915_gem_object_make_shrinkable - Move the object to the tail of the
    525 * shrinkable list. Objects on this list might be swapped out. Used with
    526 * WILLNEED objects.
    527 * @obj: The GEM object.
    528 *
    529 * DO NOT USE. This is intended to be called on very special objects that don't
    530 * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
    531 * underneath.
    532 */
    533void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
    534{
    535	___i915_gem_object_make_shrinkable(obj,
    536					   &obj_to_i915(obj)->mm.shrink_list);
    537}
    538
    539/**
    540 * __i915_gem_object_make_purgeable - Move the object to the tail of the
    541 * purgeable list. Objects on this list might be swapped out. Used with
    542 * DONTNEED objects.
    543 * @obj: The GEM object.
    544 *
    545 * DO NOT USE. This is intended to be called on very special objects that don't
    546 * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
    547 * underneath.
    548 */
    549void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
    550{
    551	___i915_gem_object_make_shrinkable(obj,
    552					   &obj_to_i915(obj)->mm.purge_list);
    553}
    554
    555/**
    556 * i915_gem_object_make_shrinkable - Move the object to the tail of the
    557 * shrinkable list. Objects on this list might be swapped out. Used with
    558 * WILLNEED objects.
    559 * @obj: The GEM object.
    560 *
    561 * MUST only be called on objects which have backing pages.
    562 *
    563 * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
    564 */
    565void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
    566{
    567	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
    568	__i915_gem_object_make_shrinkable(obj);
    569}
    570
    571/**
    572 * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable
    573 * list. Used with DONTNEED objects. Unlike with shrinkable objects, the
    574 * shrinker will attempt to discard the backing pages, instead of trying to swap
    575 * them out.
    576 * @obj: The GEM object.
    577 *
    578 * MUST only be called on objects which have backing pages.
    579 *
    580 * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
    581 */
    582void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
    583{
    584	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
    585	__i915_gem_object_make_purgeable(obj);
    586}