cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel_memory_region.c (31253B)


      1// SPDX-License-Identifier: MIT
      2/*
      3 * Copyright © 2019 Intel Corporation
      4 */
      5
      6#include <linux/prime_numbers.h>
      7#include <linux/sort.h>
      8
      9#include <drm/drm_buddy.h>
     10
     11#include "../i915_selftest.h"
     12
     13#include "mock_drm.h"
     14#include "mock_gem_device.h"
     15#include "mock_region.h"
     16
     17#include "gem/i915_gem_context.h"
     18#include "gem/i915_gem_lmem.h"
     19#include "gem/i915_gem_region.h"
     20#include "gem/i915_gem_ttm.h"
     21#include "gem/selftests/igt_gem_utils.h"
     22#include "gem/selftests/mock_context.h"
     23#include "gt/intel_engine_pm.h"
     24#include "gt/intel_engine_user.h"
     25#include "gt/intel_gt.h"
     26#include "gt/intel_migrate.h"
     27#include "i915_memcpy.h"
     28#include "i915_ttm_buddy_manager.h"
     29#include "selftests/igt_flush_test.h"
     30#include "selftests/i915_random.h"
     31
     32static void close_objects(struct intel_memory_region *mem,
     33			  struct list_head *objects)
     34{
     35	struct drm_i915_private *i915 = mem->i915;
     36	struct drm_i915_gem_object *obj, *on;
     37
     38	list_for_each_entry_safe(obj, on, objects, st_link) {
     39		i915_gem_object_lock(obj, NULL);
     40		if (i915_gem_object_has_pinned_pages(obj))
     41			i915_gem_object_unpin_pages(obj);
     42		/* No polluting the memory region between tests */
     43		__i915_gem_object_put_pages(obj);
     44		i915_gem_object_unlock(obj);
     45		list_del(&obj->st_link);
     46		i915_gem_object_put(obj);
     47	}
     48
     49	cond_resched();
     50
     51	i915_gem_drain_freed_objects(i915);
     52}
     53
     54static int igt_mock_fill(void *arg)
     55{
     56	struct intel_memory_region *mem = arg;
     57	resource_size_t total = resource_size(&mem->region);
     58	resource_size_t page_size;
     59	resource_size_t rem;
     60	unsigned long max_pages;
     61	unsigned long page_num;
     62	LIST_HEAD(objects);
     63	int err = 0;
     64
     65	page_size = PAGE_SIZE;
     66	max_pages = div64_u64(total, page_size);
     67	rem = total;
     68
     69	for_each_prime_number_from(page_num, 1, max_pages) {
     70		resource_size_t size = page_num * page_size;
     71		struct drm_i915_gem_object *obj;
     72
     73		obj = i915_gem_object_create_region(mem, size, 0, 0);
     74		if (IS_ERR(obj)) {
     75			err = PTR_ERR(obj);
     76			break;
     77		}
     78
     79		err = i915_gem_object_pin_pages_unlocked(obj);
     80		if (err) {
     81			i915_gem_object_put(obj);
     82			break;
     83		}
     84
     85		list_add(&obj->st_link, &objects);
     86		rem -= size;
     87	}
     88
     89	if (err == -ENOMEM)
     90		err = 0;
     91	if (err == -ENXIO) {
     92		if (page_num * page_size <= rem) {
     93			pr_err("%s failed, space still left in region\n",
     94			       __func__);
     95			err = -EINVAL;
     96		} else {
     97			err = 0;
     98		}
     99	}
    100
    101	close_objects(mem, &objects);
    102
    103	return err;
    104}
    105
    106static struct drm_i915_gem_object *
    107igt_object_create(struct intel_memory_region *mem,
    108		  struct list_head *objects,
    109		  u64 size,
    110		  unsigned int flags)
    111{
    112	struct drm_i915_gem_object *obj;
    113	int err;
    114
    115	obj = i915_gem_object_create_region(mem, size, 0, flags);
    116	if (IS_ERR(obj))
    117		return obj;
    118
    119	err = i915_gem_object_pin_pages_unlocked(obj);
    120	if (err)
    121		goto put;
    122
    123	list_add(&obj->st_link, objects);
    124	return obj;
    125
    126put:
    127	i915_gem_object_put(obj);
    128	return ERR_PTR(err);
    129}
    130
    131static void igt_object_release(struct drm_i915_gem_object *obj)
    132{
    133	i915_gem_object_lock(obj, NULL);
    134	i915_gem_object_unpin_pages(obj);
    135	__i915_gem_object_put_pages(obj);
    136	i915_gem_object_unlock(obj);
    137	list_del(&obj->st_link);
    138	i915_gem_object_put(obj);
    139}
    140
    141static bool is_contiguous(struct drm_i915_gem_object *obj)
    142{
    143	struct scatterlist *sg;
    144	dma_addr_t addr = -1;
    145
    146	for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
    147		if (addr != -1 && sg_dma_address(sg) != addr)
    148			return false;
    149
    150		addr = sg_dma_address(sg) + sg_dma_len(sg);
    151	}
    152
    153	return true;
    154}
    155
    156static int igt_mock_reserve(void *arg)
    157{
    158	struct intel_memory_region *mem = arg;
    159	struct drm_i915_private *i915 = mem->i915;
    160	resource_size_t avail = resource_size(&mem->region);
    161	struct drm_i915_gem_object *obj;
    162	const u32 chunk_size = SZ_32M;
    163	u32 i, offset, count, *order;
    164	u64 allocated, cur_avail;
    165	I915_RND_STATE(prng);
    166	LIST_HEAD(objects);
    167	int err = 0;
    168
    169	count = avail / chunk_size;
    170	order = i915_random_order(count, &prng);
    171	if (!order)
    172		return 0;
    173
    174	mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
    175	if (IS_ERR(mem)) {
    176		pr_err("failed to create memory region\n");
    177		err = PTR_ERR(mem);
    178		goto out_free_order;
    179	}
    180
    181	/* Reserve a bunch of ranges within the region */
    182	for (i = 0; i < count; ++i) {
    183		u64 start = order[i] * chunk_size;
    184		u64 size = i915_prandom_u32_max_state(chunk_size, &prng);
    185
    186		/* Allow for some really big holes */
    187		if (!size)
    188			continue;
    189
    190		size = round_up(size, PAGE_SIZE);
    191		offset = igt_random_offset(&prng, 0, chunk_size, size,
    192					   PAGE_SIZE);
    193
    194		err = intel_memory_region_reserve(mem, start + offset, size);
    195		if (err) {
    196			pr_err("%s failed to reserve range", __func__);
    197			goto out_close;
    198		}
    199
    200		/* XXX: maybe sanity check the block range here? */
    201		avail -= size;
    202	}
    203
    204	/* Try to see if we can allocate from the remaining space */
    205	allocated = 0;
    206	cur_avail = avail;
    207	do {
    208		u32 size = i915_prandom_u32_max_state(cur_avail, &prng);
    209
    210		size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE);
    211		obj = igt_object_create(mem, &objects, size, 0);
    212		if (IS_ERR(obj)) {
    213			if (PTR_ERR(obj) == -ENXIO)
    214				break;
    215
    216			err = PTR_ERR(obj);
    217			goto out_close;
    218		}
    219		cur_avail -= size;
    220		allocated += size;
    221	} while (1);
    222
    223	if (allocated != avail) {
    224		pr_err("%s mismatch between allocation and free space", __func__);
    225		err = -EINVAL;
    226	}
    227
    228out_close:
    229	close_objects(mem, &objects);
    230	intel_memory_region_destroy(mem);
    231out_free_order:
    232	kfree(order);
    233	return err;
    234}
    235
    236static int igt_mock_contiguous(void *arg)
    237{
    238	struct intel_memory_region *mem = arg;
    239	struct drm_i915_gem_object *obj;
    240	unsigned long n_objects;
    241	LIST_HEAD(objects);
    242	LIST_HEAD(holes);
    243	I915_RND_STATE(prng);
    244	resource_size_t total;
    245	resource_size_t min;
    246	u64 target;
    247	int err = 0;
    248
    249	total = resource_size(&mem->region);
    250
    251	/* Min size */
    252	obj = igt_object_create(mem, &objects, PAGE_SIZE,
    253				I915_BO_ALLOC_CONTIGUOUS);
    254	if (IS_ERR(obj))
    255		return PTR_ERR(obj);
    256
    257	if (!is_contiguous(obj)) {
    258		pr_err("%s min object spans disjoint sg entries\n", __func__);
    259		err = -EINVAL;
    260		goto err_close_objects;
    261	}
    262
    263	igt_object_release(obj);
    264
    265	/* Max size */
    266	obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS);
    267	if (IS_ERR(obj))
    268		return PTR_ERR(obj);
    269
    270	if (!is_contiguous(obj)) {
    271		pr_err("%s max object spans disjoint sg entries\n", __func__);
    272		err = -EINVAL;
    273		goto err_close_objects;
    274	}
    275
    276	igt_object_release(obj);
    277
    278	/* Internal fragmentation should not bleed into the object size */
    279	target = i915_prandom_u64_state(&prng);
    280	div64_u64_rem(target, total, &target);
    281	target = round_up(target, PAGE_SIZE);
    282	target = max_t(u64, PAGE_SIZE, target);
    283
    284	obj = igt_object_create(mem, &objects, target,
    285				I915_BO_ALLOC_CONTIGUOUS);
    286	if (IS_ERR(obj))
    287		return PTR_ERR(obj);
    288
    289	if (obj->base.size != target) {
    290		pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__,
    291		       obj->base.size, target);
    292		err = -EINVAL;
    293		goto err_close_objects;
    294	}
    295
    296	if (!is_contiguous(obj)) {
    297		pr_err("%s object spans disjoint sg entries\n", __func__);
    298		err = -EINVAL;
    299		goto err_close_objects;
    300	}
    301
    302	igt_object_release(obj);
    303
    304	/*
    305	 * Try to fragment the address space, such that half of it is free, but
    306	 * the max contiguous block size is SZ_64K.
    307	 */
    308
    309	target = SZ_64K;
    310	n_objects = div64_u64(total, target);
    311
    312	while (n_objects--) {
    313		struct list_head *list;
    314
    315		if (n_objects % 2)
    316			list = &holes;
    317		else
    318			list = &objects;
    319
    320		obj = igt_object_create(mem, list, target,
    321					I915_BO_ALLOC_CONTIGUOUS);
    322		if (IS_ERR(obj)) {
    323			err = PTR_ERR(obj);
    324			goto err_close_objects;
    325		}
    326	}
    327
    328	close_objects(mem, &holes);
    329
    330	min = target;
    331	target = total >> 1;
    332
    333	/* Make sure we can still allocate all the fragmented space */
    334	obj = igt_object_create(mem, &objects, target, 0);
    335	if (IS_ERR(obj)) {
    336		err = PTR_ERR(obj);
    337		goto err_close_objects;
    338	}
    339
    340	igt_object_release(obj);
    341
    342	/*
    343	 * Even though we have enough free space, we don't have a big enough
    344	 * contiguous block. Make sure that holds true.
    345	 */
    346
    347	do {
    348		bool should_fail = target > min;
    349
    350		obj = igt_object_create(mem, &objects, target,
    351					I915_BO_ALLOC_CONTIGUOUS);
    352		if (should_fail != IS_ERR(obj)) {
    353			pr_err("%s target allocation(%llx) mismatch\n",
    354			       __func__, target);
    355			err = -EINVAL;
    356			goto err_close_objects;
    357		}
    358
    359		target >>= 1;
    360	} while (target >= PAGE_SIZE);
    361
    362err_close_objects:
    363	list_splice_tail(&holes, &objects);
    364	close_objects(mem, &objects);
    365	return err;
    366}
    367
    368static int igt_mock_splintered_region(void *arg)
    369{
    370	struct intel_memory_region *mem = arg;
    371	struct drm_i915_private *i915 = mem->i915;
    372	struct i915_ttm_buddy_resource *res;
    373	struct drm_i915_gem_object *obj;
    374	struct drm_buddy *mm;
    375	unsigned int expected_order;
    376	LIST_HEAD(objects);
    377	u64 size;
    378	int err = 0;
    379
    380	/*
    381	 * Sanity check we can still allocate everything even if the
    382	 * mm.max_order != mm.size. i.e our starting address space size is not a
    383	 * power-of-two.
    384	 */
    385
    386	size = (SZ_4G - 1) & PAGE_MASK;
    387	mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
    388	if (IS_ERR(mem))
    389		return PTR_ERR(mem);
    390
    391	obj = igt_object_create(mem, &objects, size, 0);
    392	if (IS_ERR(obj)) {
    393		err = PTR_ERR(obj);
    394		goto out_close;
    395	}
    396
    397	res = to_ttm_buddy_resource(obj->mm.res);
    398	mm = res->mm;
    399	if (mm->size != size) {
    400		pr_err("%s size mismatch(%llu != %llu)\n",
    401		       __func__, mm->size, size);
    402		err = -EINVAL;
    403		goto out_put;
    404	}
    405
    406	expected_order = get_order(rounddown_pow_of_two(size));
    407	if (mm->max_order != expected_order) {
    408		pr_err("%s order mismatch(%u != %u)\n",
    409		       __func__, mm->max_order, expected_order);
    410		err = -EINVAL;
    411		goto out_put;
    412	}
    413
    414	close_objects(mem, &objects);
    415
    416	/*
    417	 * While we should be able allocate everything without any flag
    418	 * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are
    419	 * actually limited to the largest power-of-two for the region size i.e
    420	 * max_order, due to the inner workings of the buddy allocator. So make
    421	 * sure that does indeed hold true.
    422	 */
    423
    424	obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS);
    425	if (!IS_ERR(obj)) {
    426		pr_err("%s too large contiguous allocation was not rejected\n",
    427		       __func__);
    428		err = -EINVAL;
    429		goto out_close;
    430	}
    431
    432	obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size),
    433				I915_BO_ALLOC_CONTIGUOUS);
    434	if (IS_ERR(obj)) {
    435		pr_err("%s largest possible contiguous allocation failed\n",
    436		       __func__);
    437		err = PTR_ERR(obj);
    438		goto out_close;
    439	}
    440
    441out_close:
    442	close_objects(mem, &objects);
    443out_put:
    444	intel_memory_region_destroy(mem);
    445	return err;
    446}
    447
    448#ifndef SZ_8G
    449#define SZ_8G BIT_ULL(33)
    450#endif
    451
    452static int igt_mock_max_segment(void *arg)
    453{
    454	const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE);
    455	struct intel_memory_region *mem = arg;
    456	struct drm_i915_private *i915 = mem->i915;
    457	struct i915_ttm_buddy_resource *res;
    458	struct drm_i915_gem_object *obj;
    459	struct drm_buddy_block *block;
    460	struct drm_buddy *mm;
    461	struct list_head *blocks;
    462	struct scatterlist *sg;
    463	LIST_HEAD(objects);
    464	u64 size;
    465	int err = 0;
    466
    467	/*
    468	 * While we may create very large contiguous blocks, we may need
    469	 * to break those down for consumption elsewhere. In particular,
    470	 * dma-mapping with scatterlist elements have an implicit limit of
    471	 * UINT_MAX on each element.
    472	 */
    473
    474	size = SZ_8G;
    475	mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
    476	if (IS_ERR(mem))
    477		return PTR_ERR(mem);
    478
    479	obj = igt_object_create(mem, &objects, size, 0);
    480	if (IS_ERR(obj)) {
    481		err = PTR_ERR(obj);
    482		goto out_put;
    483	}
    484
    485	res = to_ttm_buddy_resource(obj->mm.res);
    486	blocks = &res->blocks;
    487	mm = res->mm;
    488	size = 0;
    489	list_for_each_entry(block, blocks, link) {
    490		if (drm_buddy_block_size(mm, block) > size)
    491			size = drm_buddy_block_size(mm, block);
    492	}
    493	if (size < max_segment) {
    494		pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n",
    495		       __func__, max_segment, size);
    496		err = -EINVAL;
    497		goto out_close;
    498	}
    499
    500	for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
    501		if (sg->length > max_segment) {
    502			pr_err("%s: Created an oversized scatterlist entry, %u > %u\n",
    503			       __func__, sg->length, max_segment);
    504			err = -EINVAL;
    505			goto out_close;
    506		}
    507	}
    508
    509out_close:
    510	close_objects(mem, &objects);
    511out_put:
    512	intel_memory_region_destroy(mem);
    513	return err;
    514}
    515
    516static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj)
    517{
    518	struct intel_memory_region *mr = obj->mm.region;
    519	struct i915_ttm_buddy_resource *bman_res =
    520		to_ttm_buddy_resource(obj->mm.res);
    521	struct drm_buddy *mm = bman_res->mm;
    522	struct drm_buddy_block *block;
    523	u64 total;
    524
    525	total = 0;
    526	list_for_each_entry(block, &bman_res->blocks, link) {
    527		u64 start = drm_buddy_block_offset(block);
    528		u64 end = start + drm_buddy_block_size(mm, block);
    529
    530		if (start < mr->io_size)
    531			total += min_t(u64, end, mr->io_size) - start;
    532	}
    533
    534	return total;
    535}
    536
    537static int igt_mock_io_size(void *arg)
    538{
    539	struct intel_memory_region *mr = arg;
    540	struct drm_i915_private *i915 = mr->i915;
    541	struct drm_i915_gem_object *obj;
    542	u64 mappable_theft_total;
    543	u64 io_size;
    544	u64 total;
    545	u64 ps;
    546	u64 rem;
    547	u64 size;
    548	I915_RND_STATE(prng);
    549	LIST_HEAD(objects);
    550	int err = 0;
    551
    552	ps = SZ_4K;
    553	if (i915_prandom_u64_state(&prng) & 1)
    554		ps = SZ_64K; /* For something like DG2 */
    555
    556	div64_u64_rem(i915_prandom_u64_state(&prng), SZ_8G, &total);
    557	total = round_down(total, ps);
    558	total = max_t(u64, total, SZ_1G);
    559
    560	div64_u64_rem(i915_prandom_u64_state(&prng), total - ps, &io_size);
    561	io_size = round_down(io_size, ps);
    562	io_size = max_t(u64, io_size, SZ_256M); /* 256M seems to be the common lower limit */
    563
    564	pr_info("%s with ps=%llx, io_size=%llx, total=%llx\n",
    565		__func__, ps, io_size, total);
    566
    567	mr = mock_region_create(i915, 0, total, ps, 0, io_size);
    568	if (IS_ERR(mr)) {
    569		err = PTR_ERR(mr);
    570		goto out_err;
    571	}
    572
    573	mappable_theft_total = 0;
    574	rem = total - io_size;
    575	do {
    576		div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
    577		size = round_down(size, ps);
    578		size = max(size, ps);
    579
    580		obj = igt_object_create(mr, &objects, size,
    581					I915_BO_ALLOC_GPU_ONLY);
    582		if (IS_ERR(obj)) {
    583			pr_err("%s TOPDOWN failed with rem=%llx, size=%llx\n",
    584			       __func__, rem, size);
    585			err = PTR_ERR(obj);
    586			goto out_close;
    587		}
    588
    589		mappable_theft_total += igt_object_mappable_total(obj);
    590		rem -= size;
    591	} while (rem);
    592
    593	pr_info("%s mappable theft=(%lluMiB/%lluMiB), total=%lluMiB\n",
    594		__func__,
    595		(u64)mappable_theft_total >> 20,
    596		(u64)io_size >> 20,
    597		(u64)total >> 20);
    598
    599	/*
    600	 * Even if we allocate all of the non-mappable portion, we should still
    601	 * be able to dip into the mappable portion.
    602	 */
    603	obj = igt_object_create(mr, &objects, io_size,
    604				I915_BO_ALLOC_GPU_ONLY);
    605	if (IS_ERR(obj)) {
    606		pr_err("%s allocation unexpectedly failed\n", __func__);
    607		err = PTR_ERR(obj);
    608		goto out_close;
    609	}
    610
    611	close_objects(mr, &objects);
    612
    613	rem = io_size;
    614	do {
    615		div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
    616		size = round_down(size, ps);
    617		size = max(size, ps);
    618
    619		obj = igt_object_create(mr, &objects, size, 0);
    620		if (IS_ERR(obj)) {
    621			pr_err("%s MAPPABLE failed with rem=%llx, size=%llx\n",
    622			       __func__, rem, size);
    623			err = PTR_ERR(obj);
    624			goto out_close;
    625		}
    626
    627		if (igt_object_mappable_total(obj) != size) {
    628			pr_err("%s allocation is not mappable(size=%llx)\n",
    629			       __func__, size);
    630			err = -EINVAL;
    631			goto out_close;
    632		}
    633		rem -= size;
    634	} while (rem);
    635
    636	/*
    637	 * We assume CPU access is required by default, which should result in a
    638	 * failure here, even though the non-mappable portion is free.
    639	 */
    640	obj = igt_object_create(mr, &objects, ps, 0);
    641	if (!IS_ERR(obj)) {
    642		pr_err("%s allocation unexpectedly succeeded\n", __func__);
    643		err = -EINVAL;
    644		goto out_close;
    645	}
    646
    647out_close:
    648	close_objects(mr, &objects);
    649	intel_memory_region_destroy(mr);
    650out_err:
    651	if (err == -ENOMEM)
    652		err = 0;
    653
    654	return err;
    655}
    656
    657static int igt_gpu_write_dw(struct intel_context *ce,
    658			    struct i915_vma *vma,
    659			    u32 dword,
    660			    u32 value)
    661{
    662	return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32),
    663			       vma->size >> PAGE_SHIFT, value);
    664}
    665
    666static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
    667{
    668	unsigned long n = obj->base.size >> PAGE_SHIFT;
    669	u32 *ptr;
    670	int err;
    671
    672	err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
    673	if (err)
    674		return err;
    675
    676	ptr = i915_gem_object_pin_map(obj, I915_MAP_WC);
    677	if (IS_ERR(ptr))
    678		return PTR_ERR(ptr);
    679
    680	ptr += dword;
    681	while (n--) {
    682		if (*ptr != val) {
    683			pr_err("base[%u]=%08x, val=%08x\n",
    684			       dword, *ptr, val);
    685			err = -EINVAL;
    686			break;
    687		}
    688
    689		ptr += PAGE_SIZE / sizeof(*ptr);
    690	}
    691
    692	i915_gem_object_unpin_map(obj);
    693	return err;
    694}
    695
    696static int igt_gpu_write(struct i915_gem_context *ctx,
    697			 struct drm_i915_gem_object *obj)
    698{
    699	struct i915_gem_engines *engines;
    700	struct i915_gem_engines_iter it;
    701	struct i915_address_space *vm;
    702	struct intel_context *ce;
    703	I915_RND_STATE(prng);
    704	IGT_TIMEOUT(end_time);
    705	unsigned int count;
    706	struct i915_vma *vma;
    707	int *order;
    708	int i, n;
    709	int err = 0;
    710
    711	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
    712
    713	n = 0;
    714	count = 0;
    715	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
    716		count++;
    717		if (!intel_engine_can_store_dword(ce->engine))
    718			continue;
    719
    720		vm = ce->vm;
    721		n++;
    722	}
    723	i915_gem_context_unlock_engines(ctx);
    724	if (!n)
    725		return 0;
    726
    727	order = i915_random_order(count * count, &prng);
    728	if (!order)
    729		return -ENOMEM;
    730
    731	vma = i915_vma_instance(obj, vm, NULL);
    732	if (IS_ERR(vma)) {
    733		err = PTR_ERR(vma);
    734		goto out_free;
    735	}
    736
    737	err = i915_vma_pin(vma, 0, 0, PIN_USER);
    738	if (err)
    739		goto out_free;
    740
    741	i = 0;
    742	engines = i915_gem_context_lock_engines(ctx);
    743	do {
    744		u32 rng = prandom_u32_state(&prng);
    745		u32 dword = offset_in_page(rng) / 4;
    746
    747		ce = engines->engines[order[i] % engines->num_engines];
    748		i = (i + 1) % (count * count);
    749		if (!ce || !intel_engine_can_store_dword(ce->engine))
    750			continue;
    751
    752		err = igt_gpu_write_dw(ce, vma, dword, rng);
    753		if (err)
    754			break;
    755
    756		i915_gem_object_lock(obj, NULL);
    757		err = igt_cpu_check(obj, dword, rng);
    758		i915_gem_object_unlock(obj);
    759		if (err)
    760			break;
    761	} while (!__igt_timeout(end_time, NULL));
    762	i915_gem_context_unlock_engines(ctx);
    763
    764out_free:
    765	kfree(order);
    766
    767	if (err == -ENOMEM)
    768		err = 0;
    769
    770	return err;
    771}
    772
    773static int igt_lmem_create(void *arg)
    774{
    775	struct drm_i915_private *i915 = arg;
    776	struct drm_i915_gem_object *obj;
    777	int err = 0;
    778
    779	obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
    780	if (IS_ERR(obj))
    781		return PTR_ERR(obj);
    782
    783	err = i915_gem_object_pin_pages_unlocked(obj);
    784	if (err)
    785		goto out_put;
    786
    787	i915_gem_object_unpin_pages(obj);
    788out_put:
    789	i915_gem_object_put(obj);
    790
    791	return err;
    792}
    793
    794static int igt_lmem_create_with_ps(void *arg)
    795{
    796	struct drm_i915_private *i915 = arg;
    797	int err = 0;
    798	u32 ps;
    799
    800	for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) {
    801		struct drm_i915_gem_object *obj;
    802		dma_addr_t daddr;
    803
    804		obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0);
    805		if (IS_ERR(obj)) {
    806			err = PTR_ERR(obj);
    807			if (err == -ENXIO || err == -E2BIG) {
    808				pr_info("%s not enough lmem for ps(%u) err=%d\n",
    809					__func__, ps, err);
    810				err = 0;
    811			}
    812
    813			break;
    814		}
    815
    816		if (obj->base.size != ps) {
    817			pr_err("%s size(%zu) != ps(%u)\n",
    818			       __func__, obj->base.size, ps);
    819			err = -EINVAL;
    820			goto out_put;
    821		}
    822
    823		i915_gem_object_lock(obj, NULL);
    824		err = i915_gem_object_pin_pages(obj);
    825		if (err) {
    826			if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
    827				pr_info("%s not enough lmem for ps(%u) err=%d\n",
    828					__func__, ps, err);
    829				err = 0;
    830			}
    831			goto out_put;
    832		}
    833
    834		daddr = i915_gem_object_get_dma_address(obj, 0);
    835		if (!IS_ALIGNED(daddr, ps)) {
    836			pr_err("%s daddr(%pa) not aligned with ps(%u)\n",
    837			       __func__, &daddr, ps);
    838			err = -EINVAL;
    839			goto out_unpin;
    840		}
    841
    842out_unpin:
    843		i915_gem_object_unpin_pages(obj);
    844		__i915_gem_object_put_pages(obj);
    845out_put:
    846		i915_gem_object_unlock(obj);
    847		i915_gem_object_put(obj);
    848
    849		if (err)
    850			break;
    851	}
    852
    853	return err;
    854}
    855
    856static int igt_lmem_create_cleared_cpu(void *arg)
    857{
    858	struct drm_i915_private *i915 = arg;
    859	I915_RND_STATE(prng);
    860	IGT_TIMEOUT(end_time);
    861	u32 size, i;
    862	int err;
    863
    864	i915_gem_drain_freed_objects(i915);
    865
    866	size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng));
    867	size = round_up(size, PAGE_SIZE);
    868	i = 0;
    869
    870	do {
    871		struct drm_i915_gem_object *obj;
    872		unsigned int flags;
    873		u32 dword, val;
    874		void *vaddr;
    875
    876		/*
    877		 * Alternate between cleared and uncleared allocations, while
    878		 * also dirtying the pages each time to check that the pages are
    879		 * always cleared if requested, since we should get some overlap
    880		 * of the underlying pages, if not all, since we are the only
    881		 * user.
    882		 */
    883
    884		flags = I915_BO_ALLOC_CPU_CLEAR;
    885		if (i & 1)
    886			flags = 0;
    887
    888		obj = i915_gem_object_create_lmem(i915, size, flags);
    889		if (IS_ERR(obj))
    890			return PTR_ERR(obj);
    891
    892		i915_gem_object_lock(obj, NULL);
    893		err = i915_gem_object_pin_pages(obj);
    894		if (err)
    895			goto out_put;
    896
    897		dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32),
    898						   &prng);
    899
    900		if (flags & I915_BO_ALLOC_CPU_CLEAR) {
    901			err = igt_cpu_check(obj, dword, 0);
    902			if (err) {
    903				pr_err("%s failed with size=%u, flags=%u\n",
    904				       __func__, size, flags);
    905				goto out_unpin;
    906			}
    907		}
    908
    909		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
    910		if (IS_ERR(vaddr)) {
    911			err = PTR_ERR(vaddr);
    912			goto out_unpin;
    913		}
    914
    915		val = prandom_u32_state(&prng);
    916
    917		memset32(vaddr, val, obj->base.size / sizeof(u32));
    918
    919		i915_gem_object_flush_map(obj);
    920		i915_gem_object_unpin_map(obj);
    921out_unpin:
    922		i915_gem_object_unpin_pages(obj);
    923		__i915_gem_object_put_pages(obj);
    924out_put:
    925		i915_gem_object_unlock(obj);
    926		i915_gem_object_put(obj);
    927
    928		if (err)
    929			break;
    930		++i;
    931	} while (!__igt_timeout(end_time, NULL));
    932
    933	pr_info("%s completed (%u) iterations\n", __func__, i);
    934
    935	return err;
    936}
    937
    938static int igt_lmem_write_gpu(void *arg)
    939{
    940	struct drm_i915_private *i915 = arg;
    941	struct drm_i915_gem_object *obj;
    942	struct i915_gem_context *ctx;
    943	struct file *file;
    944	I915_RND_STATE(prng);
    945	u32 sz;
    946	int err;
    947
    948	file = mock_file(i915);
    949	if (IS_ERR(file))
    950		return PTR_ERR(file);
    951
    952	ctx = live_context(i915, file);
    953	if (IS_ERR(ctx)) {
    954		err = PTR_ERR(ctx);
    955		goto out_file;
    956	}
    957
    958	sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
    959
    960	obj = i915_gem_object_create_lmem(i915, sz, 0);
    961	if (IS_ERR(obj)) {
    962		err = PTR_ERR(obj);
    963		goto out_file;
    964	}
    965
    966	err = i915_gem_object_pin_pages_unlocked(obj);
    967	if (err)
    968		goto out_put;
    969
    970	err = igt_gpu_write(ctx, obj);
    971	if (err)
    972		pr_err("igt_gpu_write failed(%d)\n", err);
    973
    974	i915_gem_object_unpin_pages(obj);
    975out_put:
    976	i915_gem_object_put(obj);
    977out_file:
    978	fput(file);
    979	return err;
    980}
    981
    982static struct intel_engine_cs *
    983random_engine_class(struct drm_i915_private *i915,
    984		    unsigned int class,
    985		    struct rnd_state *prng)
    986{
    987	struct intel_engine_cs *engine;
    988	unsigned int count;
    989
    990	count = 0;
    991	for (engine = intel_engine_lookup_user(i915, class, 0);
    992	     engine && engine->uabi_class == class;
    993	     engine = rb_entry_safe(rb_next(&engine->uabi_node),
    994				    typeof(*engine), uabi_node))
    995		count++;
    996
    997	count = i915_prandom_u32_max_state(count, prng);
    998	return intel_engine_lookup_user(i915, class, count);
    999}
   1000
   1001static int igt_lmem_write_cpu(void *arg)
   1002{
   1003	struct drm_i915_private *i915 = arg;
   1004	struct drm_i915_gem_object *obj;
   1005	I915_RND_STATE(prng);
   1006	IGT_TIMEOUT(end_time);
   1007	u32 bytes[] = {
   1008		0, /* rng placeholder */
   1009		sizeof(u32),
   1010		sizeof(u64),
   1011		64, /* cl */
   1012		PAGE_SIZE,
   1013		PAGE_SIZE - sizeof(u32),
   1014		PAGE_SIZE - sizeof(u64),
   1015		PAGE_SIZE - 64,
   1016	};
   1017	struct intel_engine_cs *engine;
   1018	struct i915_request *rq;
   1019	u32 *vaddr;
   1020	u32 sz;
   1021	u32 i;
   1022	int *order;
   1023	int count;
   1024	int err;
   1025
   1026	engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng);
   1027	if (!engine)
   1028		return 0;
   1029
   1030	pr_info("%s: using %s\n", __func__, engine->name);
   1031
   1032	sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
   1033	sz = max_t(u32, 2 * PAGE_SIZE, sz);
   1034
   1035	obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS);
   1036	if (IS_ERR(obj))
   1037		return PTR_ERR(obj);
   1038
   1039	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
   1040	if (IS_ERR(vaddr)) {
   1041		err = PTR_ERR(vaddr);
   1042		goto out_put;
   1043	}
   1044
   1045	i915_gem_object_lock(obj, NULL);
   1046
   1047	err = dma_resv_reserve_fences(obj->base.resv, 1);
   1048	if (err) {
   1049		i915_gem_object_unlock(obj);
   1050		goto out_put;
   1051	}
   1052
   1053	/* Put the pages into a known state -- from the gpu for added fun */
   1054	intel_engine_pm_get(engine);
   1055	err = intel_context_migrate_clear(engine->gt->migrate.context, NULL,
   1056					  obj->mm.pages->sgl, I915_CACHE_NONE,
   1057					  true, 0xdeadbeaf, &rq);
   1058	if (rq) {
   1059		dma_resv_add_fence(obj->base.resv, &rq->fence,
   1060				   DMA_RESV_USAGE_WRITE);
   1061		i915_request_put(rq);
   1062	}
   1063
   1064	intel_engine_pm_put(engine);
   1065	if (!err)
   1066		err = i915_gem_object_set_to_wc_domain(obj, true);
   1067	i915_gem_object_unlock(obj);
   1068	if (err)
   1069		goto out_unpin;
   1070
   1071	count = ARRAY_SIZE(bytes);
   1072	order = i915_random_order(count * count, &prng);
   1073	if (!order) {
   1074		err = -ENOMEM;
   1075		goto out_unpin;
   1076	}
   1077
   1078	/* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */
   1079	bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32));
   1080	GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32)));
   1081
   1082	i = 0;
   1083	do {
   1084		u32 offset;
   1085		u32 align;
   1086		u32 dword;
   1087		u32 size;
   1088		u32 val;
   1089
   1090		size = bytes[order[i] % count];
   1091		i = (i + 1) % (count * count);
   1092
   1093		align = bytes[order[i] % count];
   1094		i = (i + 1) % (count * count);
   1095
   1096		align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align));
   1097
   1098		offset = igt_random_offset(&prng, 0, obj->base.size,
   1099					   size, align);
   1100
   1101		val = prandom_u32_state(&prng);
   1102		memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf,
   1103			 size / sizeof(u32));
   1104
   1105		/*
   1106		 * Sample random dw -- don't waste precious time reading every
   1107		 * single dw.
   1108		 */
   1109		dword = igt_random_offset(&prng, offset,
   1110					  offset + size,
   1111					  sizeof(u32), sizeof(u32));
   1112		dword /= sizeof(u32);
   1113		if (vaddr[dword] != (val ^ 0xdeadbeaf)) {
   1114			pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n",
   1115			       __func__, dword, vaddr[dword], val ^ 0xdeadbeaf,
   1116			       size, align, offset);
   1117			err = -EINVAL;
   1118			break;
   1119		}
   1120	} while (!__igt_timeout(end_time, NULL));
   1121
   1122out_unpin:
   1123	i915_gem_object_unpin_map(obj);
   1124out_put:
   1125	i915_gem_object_put(obj);
   1126
   1127	return err;
   1128}
   1129
   1130static const char *repr_type(u32 type)
   1131{
   1132	switch (type) {
   1133	case I915_MAP_WB:
   1134		return "WB";
   1135	case I915_MAP_WC:
   1136		return "WC";
   1137	}
   1138
   1139	return "";
   1140}
   1141
   1142static struct drm_i915_gem_object *
   1143create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type,
   1144			  void **out_addr)
   1145{
   1146	struct drm_i915_gem_object *obj;
   1147	void *addr;
   1148
   1149	obj = i915_gem_object_create_region(mr, size, 0, 0);
   1150	if (IS_ERR(obj)) {
   1151		if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */
   1152			return ERR_PTR(-ENODEV);
   1153		return obj;
   1154	}
   1155
   1156	addr = i915_gem_object_pin_map_unlocked(obj, type);
   1157	if (IS_ERR(addr)) {
   1158		i915_gem_object_put(obj);
   1159		if (PTR_ERR(addr) == -ENXIO)
   1160			return ERR_PTR(-ENODEV);
   1161		return addr;
   1162	}
   1163
   1164	*out_addr = addr;
   1165	return obj;
   1166}
   1167
   1168static int wrap_ktime_compare(const void *A, const void *B)
   1169{
   1170	const ktime_t *a = A, *b = B;
   1171
   1172	return ktime_compare(*a, *b);
   1173}
   1174
   1175static void igt_memcpy_long(void *dst, const void *src, size_t size)
   1176{
   1177	unsigned long *tmp = dst;
   1178	const unsigned long *s = src;
   1179
   1180	size = size / sizeof(unsigned long);
   1181	while (size--)
   1182		*tmp++ = *s++;
   1183}
   1184
   1185static inline void igt_memcpy(void *dst, const void *src, size_t size)
   1186{
   1187	memcpy(dst, src, size);
   1188}
   1189
   1190static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size)
   1191{
   1192	i915_memcpy_from_wc(dst, src, size);
   1193}
   1194
   1195static int _perf_memcpy(struct intel_memory_region *src_mr,
   1196			struct intel_memory_region *dst_mr,
   1197			u64 size, u32 src_type, u32 dst_type)
   1198{
   1199	struct drm_i915_private *i915 = src_mr->i915;
   1200	const struct {
   1201		const char *name;
   1202		void (*copy)(void *dst, const void *src, size_t size);
   1203		bool skip;
   1204	} tests[] = {
   1205		{
   1206			"memcpy",
   1207			igt_memcpy,
   1208		},
   1209		{
   1210			"memcpy_long",
   1211			igt_memcpy_long,
   1212		},
   1213		{
   1214			"memcpy_from_wc",
   1215			igt_memcpy_from_wc,
   1216			!i915_has_memcpy_from_wc(),
   1217		},
   1218	};
   1219	struct drm_i915_gem_object *src, *dst;
   1220	void *src_addr, *dst_addr;
   1221	int ret = 0;
   1222	int i;
   1223
   1224	src = create_region_for_mapping(src_mr, size, src_type, &src_addr);
   1225	if (IS_ERR(src)) {
   1226		ret = PTR_ERR(src);
   1227		goto out;
   1228	}
   1229
   1230	dst = create_region_for_mapping(dst_mr, size, dst_type, &dst_addr);
   1231	if (IS_ERR(dst)) {
   1232		ret = PTR_ERR(dst);
   1233		goto out_unpin_src;
   1234	}
   1235
   1236	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
   1237		ktime_t t[5];
   1238		int pass;
   1239
   1240		if (tests[i].skip)
   1241			continue;
   1242
   1243		for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
   1244			ktime_t t0, t1;
   1245
   1246			t0 = ktime_get();
   1247
   1248			tests[i].copy(dst_addr, src_addr, size);
   1249
   1250			t1 = ktime_get();
   1251			t[pass] = ktime_sub(t1, t0);
   1252		}
   1253
   1254		sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
   1255		if (t[0] <= 0) {
   1256			/* ignore the impossible to protect our sanity */
   1257			pr_debug("Skipping %s src(%s, %s) -> dst(%s, %s) %14s %4lluKiB copy, unstable measurement [%lld, %lld]\n",
   1258				 __func__,
   1259				 src_mr->name, repr_type(src_type),
   1260				 dst_mr->name, repr_type(dst_type),
   1261				 tests[i].name, size >> 10,
   1262				 t[0], t[4]);
   1263			continue;
   1264		}
   1265
   1266		pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n",
   1267			__func__,
   1268			src_mr->name, repr_type(src_type),
   1269			dst_mr->name, repr_type(dst_type),
   1270			tests[i].name, size >> 10,
   1271			div64_u64(mul_u32_u32(4 * size,
   1272					      1000 * 1000 * 1000),
   1273				  t[1] + 2 * t[2] + t[3]) >> 20);
   1274
   1275		cond_resched();
   1276	}
   1277
   1278	i915_gem_object_unpin_map(dst);
   1279	i915_gem_object_put(dst);
   1280out_unpin_src:
   1281	i915_gem_object_unpin_map(src);
   1282	i915_gem_object_put(src);
   1283
   1284	i915_gem_drain_freed_objects(i915);
   1285out:
   1286	if (ret == -ENODEV)
   1287		ret = 0;
   1288
   1289	return ret;
   1290}
   1291
   1292static int perf_memcpy(void *arg)
   1293{
   1294	struct drm_i915_private *i915 = arg;
   1295	static const u32 types[] = {
   1296		I915_MAP_WB,
   1297		I915_MAP_WC,
   1298	};
   1299	static const u32 sizes[] = {
   1300		SZ_4K,
   1301		SZ_64K,
   1302		SZ_4M,
   1303	};
   1304	struct intel_memory_region *src_mr, *dst_mr;
   1305	int src_id, dst_id;
   1306	int i, j, k;
   1307	int ret;
   1308
   1309	for_each_memory_region(src_mr, i915, src_id) {
   1310		for_each_memory_region(dst_mr, i915, dst_id) {
   1311			for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
   1312				for (j = 0; j < ARRAY_SIZE(types); ++j) {
   1313					for (k = 0; k < ARRAY_SIZE(types); ++k) {
   1314						ret = _perf_memcpy(src_mr,
   1315								   dst_mr,
   1316								   sizes[i],
   1317								   types[j],
   1318								   types[k]);
   1319						if (ret)
   1320							return ret;
   1321					}
   1322				}
   1323			}
   1324		}
   1325	}
   1326
   1327	return 0;
   1328}
   1329
   1330int intel_memory_region_mock_selftests(void)
   1331{
   1332	static const struct i915_subtest tests[] = {
   1333		SUBTEST(igt_mock_reserve),
   1334		SUBTEST(igt_mock_fill),
   1335		SUBTEST(igt_mock_contiguous),
   1336		SUBTEST(igt_mock_splintered_region),
   1337		SUBTEST(igt_mock_max_segment),
   1338		SUBTEST(igt_mock_io_size),
   1339	};
   1340	struct intel_memory_region *mem;
   1341	struct drm_i915_private *i915;
   1342	int err;
   1343
   1344	i915 = mock_gem_device();
   1345	if (!i915)
   1346		return -ENOMEM;
   1347
   1348	mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
   1349	if (IS_ERR(mem)) {
   1350		pr_err("failed to create memory region\n");
   1351		err = PTR_ERR(mem);
   1352		goto out_unref;
   1353	}
   1354
   1355	err = i915_subtests(tests, mem);
   1356
   1357	intel_memory_region_destroy(mem);
   1358out_unref:
   1359	mock_destroy_device(i915);
   1360	return err;
   1361}
   1362
   1363int intel_memory_region_live_selftests(struct drm_i915_private *i915)
   1364{
   1365	static const struct i915_subtest tests[] = {
   1366		SUBTEST(igt_lmem_create),
   1367		SUBTEST(igt_lmem_create_with_ps),
   1368		SUBTEST(igt_lmem_create_cleared_cpu),
   1369		SUBTEST(igt_lmem_write_cpu),
   1370		SUBTEST(igt_lmem_write_gpu),
   1371	};
   1372
   1373	if (!HAS_LMEM(i915)) {
   1374		pr_info("device lacks LMEM support, skipping\n");
   1375		return 0;
   1376	}
   1377
   1378	if (intel_gt_is_wedged(to_gt(i915)))
   1379		return 0;
   1380
   1381	return i915_live_subtests(tests, i915);
   1382}
   1383
   1384int intel_memory_region_perf_selftests(struct drm_i915_private *i915)
   1385{
   1386	static const struct i915_subtest tests[] = {
   1387		SUBTEST(perf_memcpy),
   1388	};
   1389
   1390	if (intel_gt_is_wedged(to_gt(i915)))
   1391		return 0;
   1392
   1393	return i915_live_subtests(tests, i915);
   1394}