radeon_object.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
radeon_object.c (21643B)
      1/*
      2 * Copyright 2009 Jerome Glisse.
      3 * All Rights Reserved.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the
      7 * "Software"), to deal in the Software without restriction, including
      8 * without limitation the rights to use, copy, modify, merge, publish,
      9 * distribute, sub license, and/or sell copies of the Software, and to
     10 * permit persons to whom the Software is furnished to do so, subject to
     11 * the following conditions:
     12 *
     13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
     20 *
     21 * The above copyright notice and this permission notice (including the
     22 * next paragraph) shall be included in all copies or substantial portions
     23 * of the Software.
     24 *
     25 */
     26/*
     27 * Authors:
     28 *    Jerome Glisse <glisse@freedesktop.org>
     29 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
     30 *    Dave Airlie
     31 */
     32
     33#include <linux/io.h>
     34#include <linux/list.h>
     35#include <linux/slab.h>
     36
     37#include <drm/drm_cache.h>
     38#include <drm/drm_prime.h>
     39#include <drm/radeon_drm.h>
     40
     41#include "radeon.h"
     42#include "radeon_trace.h"
     43#include "radeon_ttm.h"
     44
     45static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
     46
     47/*
     48 * To exclude mutual BO access we rely on bo_reserve exclusion, as all
     49 * function are calling it.
     50 */
     51
     52static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
     53{
     54	struct radeon_bo *bo;
     55
     56	bo = container_of(tbo, struct radeon_bo, tbo);
     57
     58	mutex_lock(&bo->rdev->gem.mutex);
     59	list_del_init(&bo->list);
     60	mutex_unlock(&bo->rdev->gem.mutex);
     61	radeon_bo_clear_surface_reg(bo);
     62	WARN_ON_ONCE(!list_empty(&bo->va));
     63	if (bo->tbo.base.import_attach)
     64		drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
     65	drm_gem_object_release(&bo->tbo.base);
     66	kfree(bo);
     67}
     68
     69bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
     70{
     71	if (bo->destroy == &radeon_ttm_bo_destroy)
     72		return true;
     73	return false;
     74}
     75
     76void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
     77{
     78	u32 c = 0, i;
     79
     80	rbo->placement.placement = rbo->placements;
     81	rbo->placement.busy_placement = rbo->placements;
     82	if (domain & RADEON_GEM_DOMAIN_VRAM) {
     83		/* Try placing BOs which don't need CPU access outside of the
     84		 * CPU accessible part of VRAM
     85		 */
     86		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
     87		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
     88			rbo->placements[c].fpfn =
     89				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
     90			rbo->placements[c].mem_type = TTM_PL_VRAM;
     91			rbo->placements[c++].flags = 0;
     92		}
     93
     94		rbo->placements[c].fpfn = 0;
     95		rbo->placements[c].mem_type = TTM_PL_VRAM;
     96		rbo->placements[c++].flags = 0;
     97	}
     98
     99	if (domain & RADEON_GEM_DOMAIN_GTT) {
    100		rbo->placements[c].fpfn = 0;
    101		rbo->placements[c].mem_type = TTM_PL_TT;
    102		rbo->placements[c++].flags = 0;
    103	}
    104
    105	if (domain & RADEON_GEM_DOMAIN_CPU) {
    106		rbo->placements[c].fpfn = 0;
    107		rbo->placements[c].mem_type = TTM_PL_SYSTEM;
    108		rbo->placements[c++].flags = 0;
    109	}
    110	if (!c) {
    111		rbo->placements[c].fpfn = 0;
    112		rbo->placements[c].mem_type = TTM_PL_SYSTEM;
    113		rbo->placements[c++].flags = 0;
    114	}
    115
    116	rbo->placement.num_placement = c;
    117	rbo->placement.num_busy_placement = c;
    118
    119	for (i = 0; i < c; ++i) {
    120		if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
    121		    (rbo->placements[i].mem_type == TTM_PL_VRAM) &&
    122		    !rbo->placements[i].fpfn)
    123			rbo->placements[i].lpfn =
    124				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
    125		else
    126			rbo->placements[i].lpfn = 0;
    127	}
    128}
    129
    130int radeon_bo_create(struct radeon_device *rdev,
    131		     unsigned long size, int byte_align, bool kernel,
    132		     u32 domain, u32 flags, struct sg_table *sg,
    133		     struct dma_resv *resv,
    134		     struct radeon_bo **bo_ptr)
    135{
    136	struct radeon_bo *bo;
    137	enum ttm_bo_type type;
    138	unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
    139	int r;
    140
    141	size = ALIGN(size, PAGE_SIZE);
    142
    143	if (kernel) {
    144		type = ttm_bo_type_kernel;
    145	} else if (sg) {
    146		type = ttm_bo_type_sg;
    147	} else {
    148		type = ttm_bo_type_device;
    149	}
    150	*bo_ptr = NULL;
    151
    152	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
    153	if (bo == NULL)
    154		return -ENOMEM;
    155	drm_gem_private_object_init(rdev->ddev, &bo->tbo.base, size);
    156	bo->rdev = rdev;
    157	bo->surface_reg = -1;
    158	INIT_LIST_HEAD(&bo->list);
    159	INIT_LIST_HEAD(&bo->va);
    160	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
    161				       RADEON_GEM_DOMAIN_GTT |
    162				       RADEON_GEM_DOMAIN_CPU);
    163
    164	bo->flags = flags;
    165	/* PCI GART is always snooped */
    166	if (!(rdev->flags & RADEON_IS_PCIE))
    167		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
    168
    169	/* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
    170	 * See https://bugs.freedesktop.org/show_bug.cgi?id=91268
    171	 */
    172	if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
    173		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
    174
    175#ifdef CONFIG_X86_32
    176	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
    177	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
    178	 */
    179	bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
    180#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
    181	/* Don't try to enable write-combining when it can't work, or things
    182	 * may be slow
    183	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
    184	 */
    185#ifndef CONFIG_COMPILE_TEST
    186#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
    187	 thanks to write-combining
    188#endif
    189
    190	if (bo->flags & RADEON_GEM_GTT_WC)
    191		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
    192			      "better performance thanks to write-combining\n");
    193	bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
    194#else
    195	/* For architectures that don't support WC memory,
    196	 * mask out the WC flag from the BO
    197	 */
    198	if (!drm_arch_can_wc_memory())
    199		bo->flags &= ~RADEON_GEM_GTT_WC;
    200#endif
    201
    202	radeon_ttm_placement_from_domain(bo, domain);
    203	/* Kernel allocation are uninterruptible */
    204	down_read(&rdev->pm.mclk_lock);
    205	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
    206			&bo->placement, page_align, !kernel, sg, resv,
    207			&radeon_ttm_bo_destroy);
    208	up_read(&rdev->pm.mclk_lock);
    209	if (unlikely(r != 0)) {
    210		return r;
    211	}
    212	*bo_ptr = bo;
    213
    214	trace_radeon_bo_create(bo);
    215
    216	return 0;
    217}
    218
    219int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
    220{
    221	bool is_iomem;
    222	long r;
    223
    224	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
    225				  false, MAX_SCHEDULE_TIMEOUT);
    226	if (r < 0)
    227		return r;
    228
    229	if (bo->kptr) {
    230		if (ptr) {
    231			*ptr = bo->kptr;
    232		}
    233		return 0;
    234	}
    235	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap);
    236	if (r) {
    237		return r;
    238	}
    239	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
    240	if (ptr) {
    241		*ptr = bo->kptr;
    242	}
    243	radeon_bo_check_tiling(bo, 0, 0);
    244	return 0;
    245}
    246
    247void radeon_bo_kunmap(struct radeon_bo *bo)
    248{
    249	if (bo->kptr == NULL)
    250		return;
    251	bo->kptr = NULL;
    252	radeon_bo_check_tiling(bo, 0, 0);
    253	ttm_bo_kunmap(&bo->kmap);
    254}
    255
    256struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
    257{
    258	if (bo == NULL)
    259		return NULL;
    260
    261	ttm_bo_get(&bo->tbo);
    262	return bo;
    263}
    264
    265void radeon_bo_unref(struct radeon_bo **bo)
    266{
    267	struct ttm_buffer_object *tbo;
    268
    269	if ((*bo) == NULL)
    270		return;
    271	tbo = &((*bo)->tbo);
    272	ttm_bo_put(tbo);
    273	*bo = NULL;
    274}
    275
    276int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
    277			     u64 *gpu_addr)
    278{
    279	struct ttm_operation_ctx ctx = { false, false };
    280	int r, i;
    281
    282	if (radeon_ttm_tt_has_userptr(bo->rdev, bo->tbo.ttm))
    283		return -EPERM;
    284
    285	if (bo->tbo.pin_count) {
    286		ttm_bo_pin(&bo->tbo);
    287		if (gpu_addr)
    288			*gpu_addr = radeon_bo_gpu_offset(bo);
    289
    290		if (max_offset != 0) {
    291			u64 domain_start;
    292
    293			if (domain == RADEON_GEM_DOMAIN_VRAM)
    294				domain_start = bo->rdev->mc.vram_start;
    295			else
    296				domain_start = bo->rdev->mc.gtt_start;
    297			WARN_ON_ONCE(max_offset <
    298				     (radeon_bo_gpu_offset(bo) - domain_start));
    299		}
    300
    301		return 0;
    302	}
    303	if (bo->prime_shared_count && domain == RADEON_GEM_DOMAIN_VRAM) {
    304		/* A BO shared as a dma-buf cannot be sensibly migrated to VRAM */
    305		return -EINVAL;
    306	}
    307
    308	radeon_ttm_placement_from_domain(bo, domain);
    309	for (i = 0; i < bo->placement.num_placement; i++) {
    310		/* force to pin into visible video ram */
    311		if ((bo->placements[i].mem_type == TTM_PL_VRAM) &&
    312		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
    313		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
    314			bo->placements[i].lpfn =
    315				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
    316		else
    317			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
    318	}
    319
    320	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
    321	if (likely(r == 0)) {
    322		ttm_bo_pin(&bo->tbo);
    323		if (gpu_addr != NULL)
    324			*gpu_addr = radeon_bo_gpu_offset(bo);
    325		if (domain == RADEON_GEM_DOMAIN_VRAM)
    326			bo->rdev->vram_pin_size += radeon_bo_size(bo);
    327		else
    328			bo->rdev->gart_pin_size += radeon_bo_size(bo);
    329	} else {
    330		dev_err(bo->rdev->dev, "%p pin failed\n", bo);
    331	}
    332	return r;
    333}
    334
    335int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
    336{
    337	return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
    338}
    339
    340void radeon_bo_unpin(struct radeon_bo *bo)
    341{
    342	ttm_bo_unpin(&bo->tbo);
    343	if (!bo->tbo.pin_count) {
    344		if (bo->tbo.resource->mem_type == TTM_PL_VRAM)
    345			bo->rdev->vram_pin_size -= radeon_bo_size(bo);
    346		else
    347			bo->rdev->gart_pin_size -= radeon_bo_size(bo);
    348	}
    349}
    350
    351int radeon_bo_evict_vram(struct radeon_device *rdev)
    352{
    353	struct ttm_device *bdev = &rdev->mman.bdev;
    354	struct ttm_resource_manager *man;
    355
    356	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
    357#ifndef CONFIG_HIBERNATION
    358	if (rdev->flags & RADEON_IS_IGP) {
    359		if (rdev->mc.igp_sideport_enabled == false)
    360			/* Useless to evict on IGP chips */
    361			return 0;
    362	}
    363#endif
    364	man = ttm_manager_type(bdev, TTM_PL_VRAM);
    365	if (!man)
    366		return 0;
    367	return ttm_resource_manager_evict_all(bdev, man);
    368}
    369
    370void radeon_bo_force_delete(struct radeon_device *rdev)
    371{
    372	struct radeon_bo *bo, *n;
    373
    374	if (list_empty(&rdev->gem.objects)) {
    375		return;
    376	}
    377	dev_err(rdev->dev, "Userspace still has active objects !\n");
    378	list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
    379		dev_err(rdev->dev, "%p %p %lu %lu force free\n",
    380			&bo->tbo.base, bo, (unsigned long)bo->tbo.base.size,
    381			*((unsigned long *)&bo->tbo.base.refcount));
    382		mutex_lock(&bo->rdev->gem.mutex);
    383		list_del_init(&bo->list);
    384		mutex_unlock(&bo->rdev->gem.mutex);
    385		/* this should unref the ttm bo */
    386		drm_gem_object_put(&bo->tbo.base);
    387	}
    388}
    389
    390int radeon_bo_init(struct radeon_device *rdev)
    391{
    392	/* reserve PAT memory space to WC for VRAM */
    393	arch_io_reserve_memtype_wc(rdev->mc.aper_base,
    394				   rdev->mc.aper_size);
    395
    396	/* Add an MTRR for the VRAM */
    397	if (!rdev->fastfb_working) {
    398		rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
    399						      rdev->mc.aper_size);
    400	}
    401	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
    402		rdev->mc.mc_vram_size >> 20,
    403		(unsigned long long)rdev->mc.aper_size >> 20);
    404	DRM_INFO("RAM width %dbits %cDR\n",
    405			rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
    406	return radeon_ttm_init(rdev);
    407}
    408
    409void radeon_bo_fini(struct radeon_device *rdev)
    410{
    411	radeon_ttm_fini(rdev);
    412	arch_phys_wc_del(rdev->mc.vram_mtrr);
    413	arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
    414}
    415
    416/* Returns how many bytes TTM can move per IB.
    417 */
    418static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
    419{
    420	u64 real_vram_size = rdev->mc.real_vram_size;
    421	struct ttm_resource_manager *man =
    422		ttm_manager_type(&rdev->mman.bdev, TTM_PL_VRAM);
    423	u64 vram_usage = ttm_resource_manager_usage(man);
    424
    425	/* This function is based on the current VRAM usage.
    426	 *
    427	 * - If all of VRAM is free, allow relocating the number of bytes that
    428	 *   is equal to 1/4 of the size of VRAM for this IB.
    429
    430	 * - If more than one half of VRAM is occupied, only allow relocating
    431	 *   1 MB of data for this IB.
    432	 *
    433	 * - From 0 to one half of used VRAM, the threshold decreases
    434	 *   linearly.
    435	 *         __________________
    436	 * 1/4 of -|\               |
    437	 * VRAM    | \              |
    438	 *         |  \             |
    439	 *         |   \            |
    440	 *         |    \           |
    441	 *         |     \          |
    442	 *         |      \         |
    443	 *         |       \________|1 MB
    444	 *         |----------------|
    445	 *    VRAM 0 %             100 %
    446	 *         used            used
    447	 *
    448	 * Note: It's a threshold, not a limit. The threshold must be crossed
    449	 * for buffer relocations to stop, so any buffer of an arbitrary size
    450	 * can be moved as long as the threshold isn't crossed before
    451	 * the relocation takes place. We don't want to disable buffer
    452	 * relocations completely.
    453	 *
    454	 * The idea is that buffers should be placed in VRAM at creation time
    455	 * and TTM should only do a minimum number of relocations during
    456	 * command submission. In practice, you need to submit at least
    457	 * a dozen IBs to move all buffers to VRAM if they are in GTT.
    458	 *
    459	 * Also, things can get pretty crazy under memory pressure and actual
    460	 * VRAM usage can change a lot, so playing safe even at 50% does
    461	 * consistently increase performance.
    462	 */
    463
    464	u64 half_vram = real_vram_size >> 1;
    465	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
    466	u64 bytes_moved_threshold = half_free_vram >> 1;
    467	return max(bytes_moved_threshold, 1024*1024ull);
    468}
    469
    470int radeon_bo_list_validate(struct radeon_device *rdev,
    471			    struct ww_acquire_ctx *ticket,
    472			    struct list_head *head, int ring)
    473{
    474	struct ttm_operation_ctx ctx = { true, false };
    475	struct radeon_bo_list *lobj;
    476	struct list_head duplicates;
    477	int r;
    478	u64 bytes_moved = 0, initial_bytes_moved;
    479	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
    480
    481	INIT_LIST_HEAD(&duplicates);
    482	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
    483	if (unlikely(r != 0)) {
    484		return r;
    485	}
    486
    487	list_for_each_entry(lobj, head, tv.head) {
    488		struct radeon_bo *bo = lobj->robj;
    489		if (!bo->tbo.pin_count) {
    490			u32 domain = lobj->preferred_domains;
    491			u32 allowed = lobj->allowed_domains;
    492			u32 current_domain =
    493				radeon_mem_type_to_domain(bo->tbo.resource->mem_type);
    494
    495			/* Check if this buffer will be moved and don't move it
    496			 * if we have moved too many buffers for this IB already.
    497			 *
    498			 * Note that this allows moving at least one buffer of
    499			 * any size, because it doesn't take the current "bo"
    500			 * into account. We don't want to disallow buffer moves
    501			 * completely.
    502			 */
    503			if ((allowed & current_domain) != 0 &&
    504			    (domain & current_domain) == 0 && /* will be moved */
    505			    bytes_moved > bytes_moved_threshold) {
    506				/* don't move it */
    507				domain = current_domain;
    508			}
    509
    510		retry:
    511			radeon_ttm_placement_from_domain(bo, domain);
    512			if (ring == R600_RING_TYPE_UVD_INDEX)
    513				radeon_uvd_force_into_uvd_segment(bo, allowed);
    514
    515			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
    516			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
    517			bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
    518				       initial_bytes_moved;
    519
    520			if (unlikely(r)) {
    521				if (r != -ERESTARTSYS &&
    522				    domain != lobj->allowed_domains) {
    523					domain = lobj->allowed_domains;
    524					goto retry;
    525				}
    526				ttm_eu_backoff_reservation(ticket, head);
    527				return r;
    528			}
    529		}
    530		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
    531		lobj->tiling_flags = bo->tiling_flags;
    532	}
    533
    534	list_for_each_entry(lobj, &duplicates, tv.head) {
    535		lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
    536		lobj->tiling_flags = lobj->robj->tiling_flags;
    537	}
    538
    539	return 0;
    540}
    541
    542int radeon_bo_get_surface_reg(struct radeon_bo *bo)
    543{
    544	struct radeon_device *rdev = bo->rdev;
    545	struct radeon_surface_reg *reg;
    546	struct radeon_bo *old_object;
    547	int steal;
    548	int i;
    549
    550	dma_resv_assert_held(bo->tbo.base.resv);
    551
    552	if (!bo->tiling_flags)
    553		return 0;
    554
    555	if (bo->surface_reg >= 0) {
    556		i = bo->surface_reg;
    557		goto out;
    558	}
    559
    560	steal = -1;
    561	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
    562
    563		reg = &rdev->surface_regs[i];
    564		if (!reg->bo)
    565			break;
    566
    567		old_object = reg->bo;
    568		if (old_object->tbo.pin_count == 0)
    569			steal = i;
    570	}
    571
    572	/* if we are all out */
    573	if (i == RADEON_GEM_MAX_SURFACES) {
    574		if (steal == -1)
    575			return -ENOMEM;
    576		/* find someone with a surface reg and nuke their BO */
    577		reg = &rdev->surface_regs[steal];
    578		old_object = reg->bo;
    579		/* blow away the mapping */
    580		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
    581		ttm_bo_unmap_virtual(&old_object->tbo);
    582		old_object->surface_reg = -1;
    583		i = steal;
    584	}
    585
    586	bo->surface_reg = i;
    587	reg->bo = bo;
    588
    589out:
    590	radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
    591			       bo->tbo.resource->start << PAGE_SHIFT,
    592			       bo->tbo.base.size);
    593	return 0;
    594}
    595
    596static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
    597{
    598	struct radeon_device *rdev = bo->rdev;
    599	struct radeon_surface_reg *reg;
    600
    601	if (bo->surface_reg == -1)
    602		return;
    603
    604	reg = &rdev->surface_regs[bo->surface_reg];
    605	radeon_clear_surface_reg(rdev, bo->surface_reg);
    606
    607	reg->bo = NULL;
    608	bo->surface_reg = -1;
    609}
    610
    611int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
    612				uint32_t tiling_flags, uint32_t pitch)
    613{
    614	struct radeon_device *rdev = bo->rdev;
    615	int r;
    616
    617	if (rdev->family >= CHIP_CEDAR) {
    618		unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;
    619
    620		bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
    621		bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
    622		mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
    623		tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
    624		stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
    625		switch (bankw) {
    626		case 0:
    627		case 1:
    628		case 2:
    629		case 4:
    630		case 8:
    631			break;
    632		default:
    633			return -EINVAL;
    634		}
    635		switch (bankh) {
    636		case 0:
    637		case 1:
    638		case 2:
    639		case 4:
    640		case 8:
    641			break;
    642		default:
    643			return -EINVAL;
    644		}
    645		switch (mtaspect) {
    646		case 0:
    647		case 1:
    648		case 2:
    649		case 4:
    650		case 8:
    651			break;
    652		default:
    653			return -EINVAL;
    654		}
    655		if (tilesplit > 6) {
    656			return -EINVAL;
    657		}
    658		if (stilesplit > 6) {
    659			return -EINVAL;
    660		}
    661	}
    662	r = radeon_bo_reserve(bo, false);
    663	if (unlikely(r != 0))
    664		return r;
    665	bo->tiling_flags = tiling_flags;
    666	bo->pitch = pitch;
    667	radeon_bo_unreserve(bo);
    668	return 0;
    669}
    670
    671void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
    672				uint32_t *tiling_flags,
    673				uint32_t *pitch)
    674{
    675	dma_resv_assert_held(bo->tbo.base.resv);
    676
    677	if (tiling_flags)
    678		*tiling_flags = bo->tiling_flags;
    679	if (pitch)
    680		*pitch = bo->pitch;
    681}
    682
    683int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
    684				bool force_drop)
    685{
    686	if (!force_drop)
    687		dma_resv_assert_held(bo->tbo.base.resv);
    688
    689	if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
    690		return 0;
    691
    692	if (force_drop) {
    693		radeon_bo_clear_surface_reg(bo);
    694		return 0;
    695	}
    696
    697	if (bo->tbo.resource->mem_type != TTM_PL_VRAM) {
    698		if (!has_moved)
    699			return 0;
    700
    701		if (bo->surface_reg >= 0)
    702			radeon_bo_clear_surface_reg(bo);
    703		return 0;
    704	}
    705
    706	if ((bo->surface_reg >= 0) && !has_moved)
    707		return 0;
    708
    709	return radeon_bo_get_surface_reg(bo);
    710}
    711
    712void radeon_bo_move_notify(struct ttm_buffer_object *bo)
    713{
    714	struct radeon_bo *rbo;
    715
    716	if (!radeon_ttm_bo_is_radeon_bo(bo))
    717		return;
    718
    719	rbo = container_of(bo, struct radeon_bo, tbo);
    720	radeon_bo_check_tiling(rbo, 0, 1);
    721	radeon_vm_bo_invalidate(rbo->rdev, rbo);
    722}
    723
    724vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
    725{
    726	struct ttm_operation_ctx ctx = { false, false };
    727	struct radeon_device *rdev;
    728	struct radeon_bo *rbo;
    729	unsigned long offset, size, lpfn;
    730	int i, r;
    731
    732	if (!radeon_ttm_bo_is_radeon_bo(bo))
    733		return 0;
    734	rbo = container_of(bo, struct radeon_bo, tbo);
    735	radeon_bo_check_tiling(rbo, 0, 0);
    736	rdev = rbo->rdev;
    737	if (bo->resource->mem_type != TTM_PL_VRAM)
    738		return 0;
    739
    740	size = bo->resource->num_pages << PAGE_SHIFT;
    741	offset = bo->resource->start << PAGE_SHIFT;
    742	if ((offset + size) <= rdev->mc.visible_vram_size)
    743		return 0;
    744
    745	/* Can't move a pinned BO to visible VRAM */
    746	if (rbo->tbo.pin_count > 0)
    747		return VM_FAULT_SIGBUS;
    748
    749	/* hurrah the memory is not visible ! */
    750	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
    751	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
    752	for (i = 0; i < rbo->placement.num_placement; i++) {
    753		/* Force into visible VRAM */
    754		if ((rbo->placements[i].mem_type == TTM_PL_VRAM) &&
    755		    (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
    756			rbo->placements[i].lpfn = lpfn;
    757	}
    758	r = ttm_bo_validate(bo, &rbo->placement, &ctx);
    759	if (unlikely(r == -ENOMEM)) {
    760		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
    761		r = ttm_bo_validate(bo, &rbo->placement, &ctx);
    762	} else if (likely(!r)) {
    763		offset = bo->resource->start << PAGE_SHIFT;
    764		/* this should never happen */
    765		if ((offset + size) > rdev->mc.visible_vram_size)
    766			return VM_FAULT_SIGBUS;
    767	}
    768
    769	if (unlikely(r == -EBUSY || r == -ERESTARTSYS))
    770		return VM_FAULT_NOPAGE;
    771	else if (unlikely(r))
    772		return VM_FAULT_SIGBUS;
    773
    774	ttm_bo_move_to_lru_tail_unlocked(bo);
    775	return 0;
    776}
    777
    778/**
    779 * radeon_bo_fence - add fence to buffer object
    780 *
    781 * @bo: buffer object in question
    782 * @fence: fence to add
    783 * @shared: true if fence should be added shared
    784 *
    785 */
    786void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
    787		     bool shared)
    788{
    789	struct dma_resv *resv = bo->tbo.base.resv;
    790	int r;
    791
    792	r = dma_resv_reserve_fences(resv, 1);
    793	if (r) {
    794		/* As last resort on OOM we block for the fence */
    795		dma_fence_wait(&fence->base, false);
    796		return;
    797	}
    798
    799	dma_resv_add_fence(resv, &fence->base, shared ?
    800			   DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
    801}