cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

radeon_sa.c (11494B)


      1/*
      2 * Copyright 2011 Red Hat Inc.
      3 * All Rights Reserved.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the
      7 * "Software"), to deal in the Software without restriction, including
      8 * without limitation the rights to use, copy, modify, merge, publish,
      9 * distribute, sub license, and/or sell copies of the Software, and to
     10 * permit persons to whom the Software is furnished to do so, subject to
     11 * the following conditions:
     12 *
     13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
     20 *
     21 * The above copyright notice and this permission notice (including the
     22 * next paragraph) shall be included in all copies or substantial portions
     23 * of the Software.
     24 *
     25 */
     26/*
     27 * Authors:
     28 *    Jerome Glisse <glisse@freedesktop.org>
     29 */
     30/* Algorithm:
     31 *
     32 * We store the last allocated bo in "hole", we always try to allocate
     33 * after the last allocated bo. Principle is that in a linear GPU ring
     34 * progression was is after last is the oldest bo we allocated and thus
     35 * the first one that should no longer be in use by the GPU.
     36 *
     37 * If it's not the case we skip over the bo after last to the closest
     38 * done bo if such one exist. If none exist and we are not asked to
     39 * block we report failure to allocate.
     40 *
     41 * If we are asked to block we wait on all the oldest fence of all
     42 * rings. We just wait for any of those fence to complete.
     43 */
     44
     45#include "radeon.h"
     46
     47static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
     48static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
     49
     50int radeon_sa_bo_manager_init(struct radeon_device *rdev,
     51			      struct radeon_sa_manager *sa_manager,
     52			      unsigned size, u32 align, u32 domain, u32 flags)
     53{
     54	int i, r;
     55
     56	init_waitqueue_head(&sa_manager->wq);
     57	sa_manager->bo = NULL;
     58	sa_manager->size = size;
     59	sa_manager->domain = domain;
     60	sa_manager->align = align;
     61	sa_manager->hole = &sa_manager->olist;
     62	INIT_LIST_HEAD(&sa_manager->olist);
     63	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
     64		INIT_LIST_HEAD(&sa_manager->flist[i]);
     65	}
     66
     67	r = radeon_bo_create(rdev, size, align, true,
     68			     domain, flags, NULL, NULL, &sa_manager->bo);
     69	if (r) {
     70		dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r);
     71		return r;
     72	}
     73
     74	return r;
     75}
     76
     77void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
     78			       struct radeon_sa_manager *sa_manager)
     79{
     80	struct radeon_sa_bo *sa_bo, *tmp;
     81
     82	if (!list_empty(&sa_manager->olist)) {
     83		sa_manager->hole = &sa_manager->olist,
     84		radeon_sa_bo_try_free(sa_manager);
     85		if (!list_empty(&sa_manager->olist)) {
     86			dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
     87		}
     88	}
     89	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
     90		radeon_sa_bo_remove_locked(sa_bo);
     91	}
     92	radeon_bo_unref(&sa_manager->bo);
     93	sa_manager->size = 0;
     94}
     95
     96int radeon_sa_bo_manager_start(struct radeon_device *rdev,
     97			       struct radeon_sa_manager *sa_manager)
     98{
     99	int r;
    100
    101	if (sa_manager->bo == NULL) {
    102		dev_err(rdev->dev, "no bo for sa manager\n");
    103		return -EINVAL;
    104	}
    105
    106	/* map the buffer */
    107	r = radeon_bo_reserve(sa_manager->bo, false);
    108	if (r) {
    109		dev_err(rdev->dev, "(%d) failed to reserve manager bo\n", r);
    110		return r;
    111	}
    112	r = radeon_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr);
    113	if (r) {
    114		radeon_bo_unreserve(sa_manager->bo);
    115		dev_err(rdev->dev, "(%d) failed to pin manager bo\n", r);
    116		return r;
    117	}
    118	r = radeon_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
    119	radeon_bo_unreserve(sa_manager->bo);
    120	return r;
    121}
    122
    123int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
    124				 struct radeon_sa_manager *sa_manager)
    125{
    126	int r;
    127
    128	if (sa_manager->bo == NULL) {
    129		dev_err(rdev->dev, "no bo for sa manager\n");
    130		return -EINVAL;
    131	}
    132
    133	r = radeon_bo_reserve(sa_manager->bo, false);
    134	if (!r) {
    135		radeon_bo_kunmap(sa_manager->bo);
    136		radeon_bo_unpin(sa_manager->bo);
    137		radeon_bo_unreserve(sa_manager->bo);
    138	}
    139	return r;
    140}
    141
    142static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
    143{
    144	struct radeon_sa_manager *sa_manager = sa_bo->manager;
    145	if (sa_manager->hole == &sa_bo->olist) {
    146		sa_manager->hole = sa_bo->olist.prev;
    147	}
    148	list_del_init(&sa_bo->olist);
    149	list_del_init(&sa_bo->flist);
    150	radeon_fence_unref(&sa_bo->fence);
    151	kfree(sa_bo);
    152}
    153
    154static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
    155{
    156	struct radeon_sa_bo *sa_bo, *tmp;
    157
    158	if (sa_manager->hole->next == &sa_manager->olist)
    159		return;
    160
    161	sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist);
    162	list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
    163		if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) {
    164			return;
    165		}
    166		radeon_sa_bo_remove_locked(sa_bo);
    167	}
    168}
    169
    170static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager)
    171{
    172	struct list_head *hole = sa_manager->hole;
    173
    174	if (hole != &sa_manager->olist) {
    175		return list_entry(hole, struct radeon_sa_bo, olist)->eoffset;
    176	}
    177	return 0;
    178}
    179
    180static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager)
    181{
    182	struct list_head *hole = sa_manager->hole;
    183
    184	if (hole->next != &sa_manager->olist) {
    185		return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset;
    186	}
    187	return sa_manager->size;
    188}
    189
    190static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
    191				   struct radeon_sa_bo *sa_bo,
    192				   unsigned size, unsigned align)
    193{
    194	unsigned soffset, eoffset, wasted;
    195
    196	soffset = radeon_sa_bo_hole_soffset(sa_manager);
    197	eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
    198	wasted = (align - (soffset % align)) % align;
    199
    200	if ((eoffset - soffset) >= (size + wasted)) {
    201		soffset += wasted;
    202
    203		sa_bo->manager = sa_manager;
    204		sa_bo->soffset = soffset;
    205		sa_bo->eoffset = soffset + size;
    206		list_add(&sa_bo->olist, sa_manager->hole);
    207		INIT_LIST_HEAD(&sa_bo->flist);
    208		sa_manager->hole = &sa_bo->olist;
    209		return true;
    210	}
    211	return false;
    212}
    213
    214/**
    215 * radeon_sa_event - Check if we can stop waiting
    216 *
    217 * @sa_manager: pointer to the sa_manager
    218 * @size: number of bytes we want to allocate
    219 * @align: alignment we need to match
    220 *
    221 * Check if either there is a fence we can wait for or
    222 * enough free memory to satisfy the allocation directly
    223 */
    224static bool radeon_sa_event(struct radeon_sa_manager *sa_manager,
    225			    unsigned size, unsigned align)
    226{
    227	unsigned soffset, eoffset, wasted;
    228	int i;
    229
    230	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    231		if (!list_empty(&sa_manager->flist[i])) {
    232			return true;
    233		}
    234	}
    235
    236	soffset = radeon_sa_bo_hole_soffset(sa_manager);
    237	eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
    238	wasted = (align - (soffset % align)) % align;
    239
    240	if ((eoffset - soffset) >= (size + wasted)) {
    241		return true;
    242	}
    243
    244	return false;
    245}
    246
    247static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
    248				   struct radeon_fence **fences,
    249				   unsigned *tries)
    250{
    251	struct radeon_sa_bo *best_bo = NULL;
    252	unsigned i, soffset, best, tmp;
    253
    254	/* if hole points to the end of the buffer */
    255	if (sa_manager->hole->next == &sa_manager->olist) {
    256		/* try again with its beginning */
    257		sa_manager->hole = &sa_manager->olist;
    258		return true;
    259	}
    260
    261	soffset = radeon_sa_bo_hole_soffset(sa_manager);
    262	/* to handle wrap around we add sa_manager->size */
    263	best = sa_manager->size * 2;
    264	/* go over all fence list and try to find the closest sa_bo
    265	 * of the current last
    266	 */
    267	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    268		struct radeon_sa_bo *sa_bo;
    269
    270		if (list_empty(&sa_manager->flist[i])) {
    271			continue;
    272		}
    273
    274		sa_bo = list_first_entry(&sa_manager->flist[i],
    275					 struct radeon_sa_bo, flist);
    276
    277		if (!radeon_fence_signaled(sa_bo->fence)) {
    278			fences[i] = sa_bo->fence;
    279			continue;
    280		}
    281
    282		/* limit the number of tries each ring gets */
    283		if (tries[i] > 2) {
    284			continue;
    285		}
    286
    287		tmp = sa_bo->soffset;
    288		if (tmp < soffset) {
    289			/* wrap around, pretend it's after */
    290			tmp += sa_manager->size;
    291		}
    292		tmp -= soffset;
    293		if (tmp < best) {
    294			/* this sa bo is the closest one */
    295			best = tmp;
    296			best_bo = sa_bo;
    297		}
    298	}
    299
    300	if (best_bo) {
    301		++tries[best_bo->fence->ring];
    302		sa_manager->hole = best_bo->olist.prev;
    303
    304		/* we knew that this one is signaled,
    305		   so it's save to remote it */
    306		radeon_sa_bo_remove_locked(best_bo);
    307		return true;
    308	}
    309	return false;
    310}
    311
    312int radeon_sa_bo_new(struct radeon_device *rdev,
    313		     struct radeon_sa_manager *sa_manager,
    314		     struct radeon_sa_bo **sa_bo,
    315		     unsigned size, unsigned align)
    316{
    317	struct radeon_fence *fences[RADEON_NUM_RINGS];
    318	unsigned tries[RADEON_NUM_RINGS];
    319	int i, r;
    320
    321	BUG_ON(align > sa_manager->align);
    322	BUG_ON(size > sa_manager->size);
    323
    324	*sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
    325	if ((*sa_bo) == NULL) {
    326		return -ENOMEM;
    327	}
    328	(*sa_bo)->manager = sa_manager;
    329	(*sa_bo)->fence = NULL;
    330	INIT_LIST_HEAD(&(*sa_bo)->olist);
    331	INIT_LIST_HEAD(&(*sa_bo)->flist);
    332
    333	spin_lock(&sa_manager->wq.lock);
    334	do {
    335		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    336			fences[i] = NULL;
    337			tries[i] = 0;
    338		}
    339
    340		do {
    341			radeon_sa_bo_try_free(sa_manager);
    342
    343			if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
    344						   size, align)) {
    345				spin_unlock(&sa_manager->wq.lock);
    346				return 0;
    347			}
    348
    349			/* see if we can skip over some allocations */
    350		} while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
    351
    352		for (i = 0; i < RADEON_NUM_RINGS; ++i)
    353			radeon_fence_ref(fences[i]);
    354
    355		spin_unlock(&sa_manager->wq.lock);
    356		r = radeon_fence_wait_any(rdev, fences, false);
    357		for (i = 0; i < RADEON_NUM_RINGS; ++i)
    358			radeon_fence_unref(&fences[i]);
    359		spin_lock(&sa_manager->wq.lock);
    360		/* if we have nothing to wait for block */
    361		if (r == -ENOENT) {
    362			r = wait_event_interruptible_locked(
    363				sa_manager->wq, 
    364				radeon_sa_event(sa_manager, size, align)
    365			);
    366		}
    367
    368	} while (!r);
    369
    370	spin_unlock(&sa_manager->wq.lock);
    371	kfree(*sa_bo);
    372	*sa_bo = NULL;
    373	return r;
    374}
    375
    376void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
    377		       struct radeon_fence *fence)
    378{
    379	struct radeon_sa_manager *sa_manager;
    380
    381	if (sa_bo == NULL || *sa_bo == NULL) {
    382		return;
    383	}
    384
    385	sa_manager = (*sa_bo)->manager;
    386	spin_lock(&sa_manager->wq.lock);
    387	if (fence && !radeon_fence_signaled(fence)) {
    388		(*sa_bo)->fence = radeon_fence_ref(fence);
    389		list_add_tail(&(*sa_bo)->flist,
    390			      &sa_manager->flist[fence->ring]);
    391	} else {
    392		radeon_sa_bo_remove_locked(*sa_bo);
    393	}
    394	wake_up_all_locked(&sa_manager->wq);
    395	spin_unlock(&sa_manager->wq.lock);
    396	*sa_bo = NULL;
    397}
    398
    399#if defined(CONFIG_DEBUG_FS)
    400void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
    401				  struct seq_file *m)
    402{
    403	struct radeon_sa_bo *i;
    404
    405	spin_lock(&sa_manager->wq.lock);
    406	list_for_each_entry(i, &sa_manager->olist, olist) {
    407		uint64_t soffset = i->soffset + sa_manager->gpu_addr;
    408		uint64_t eoffset = i->eoffset + sa_manager->gpu_addr;
    409		if (&i->olist == sa_manager->hole) {
    410			seq_printf(m, ">");
    411		} else {
    412			seq_printf(m, " ");
    413		}
    414		seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
    415			   soffset, eoffset, eoffset - soffset);
    416		if (i->fence) {
    417			seq_printf(m, " protected by 0x%016llx on ring %d",
    418				   i->fence->seq, i->fence->ring);
    419		}
    420		seq_printf(m, "\n");
    421	}
    422	spin_unlock(&sa_manager->wq.lock);
    423}
    424#endif