cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

i915_syncmap.c (11361B)


      1/*
      2 * Copyright © 2017 Intel Corporation
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 */
     24
     25#include <linux/slab.h>
     26
     27#include "i915_syncmap.h"
     28
     29#include "i915_gem.h" /* GEM_BUG_ON() */
     30#include "i915_selftest.h"
     31
     32#define SHIFT ilog2(KSYNCMAP)
     33#define MASK (KSYNCMAP - 1)
     34
     35/*
     36 * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
     37 * context id to the last u32 fence seqno waited upon from that context.
     38 * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
     39 * the root. This allows us to access the whole tree via a single pointer
     40 * to the most recently used layer. We expect fence contexts to be dense
     41 * and most reuse to be on the same i915_gem_context but on neighbouring
     42 * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
     43 * effective lookup cache. If the new lookup is not on the same leaf, we
     44 * expect it to be on the neighbouring branch.
     45 *
     46 * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
     47 * allows us to store whether a particular seqno is valid (i.e. allows us
     48 * to distinguish unset from 0).
     49 *
     50 * A branch holds an array of layer pointers, and has height > 0, and always
     51 * has at least 2 layers (either branches or leaves) below it.
     52 *
     53 * For example,
     54 *	for x in
     55 *	  0 1 2 0x10 0x11 0x200 0x201
     56 *	  0x500000 0x500001 0x503000 0x503001
     57 *	  0xE<<60:
     58 *		i915_syncmap_set(&sync, x, lower_32_bits(x));
     59 * will build a tree like:
     60 *	0xXXXXXXXXXXXXXXXX
     61 *	0-> 0x0000000000XXXXXX
     62 *	|   0-> 0x0000000000000XXX
     63 *	|   |   0-> 0x00000000000000XX
     64 *	|   |   |   0-> 0x000000000000000X 0:0, 1:1, 2:2
     65 *	|   |   |   1-> 0x000000000000001X 0:10, 1:11
     66 *	|   |   2-> 0x000000000000020X 0:200, 1:201
     67 *	|   5-> 0x000000000050XXXX
     68 *	|       0-> 0x000000000050000X 0:500000, 1:500001
     69 *	|       3-> 0x000000000050300X 0:503000, 1:503001
     70 *	e-> 0xe00000000000000X e:e
     71 */
     72
     73struct i915_syncmap {
     74	u64 prefix;
     75	unsigned int height;
     76	unsigned int bitmap;
     77	struct i915_syncmap *parent;
     78	/*
     79	 * Following this header is an array of either seqno or child pointers:
     80	 * union {
     81	 *	u32 seqno[KSYNCMAP];
     82	 *	struct i915_syncmap *child[KSYNCMAP];
     83	 * };
     84	 */
     85};
     86
     87/**
     88 * i915_syncmap_init -- initialise the #i915_syncmap
     89 * @root: pointer to the #i915_syncmap
     90 */
     91void i915_syncmap_init(struct i915_syncmap **root)
     92{
     93	BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
     94	BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
     95	BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap));
     96	*root = NULL;
     97}
     98
     99static inline u32 *__sync_seqno(struct i915_syncmap *p)
    100{
    101	GEM_BUG_ON(p->height);
    102	return (u32 *)(p + 1);
    103}
    104
    105static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
    106{
    107	GEM_BUG_ON(!p->height);
    108	return (struct i915_syncmap **)(p + 1);
    109}
    110
    111static inline unsigned int
    112__sync_branch_idx(const struct i915_syncmap *p, u64 id)
    113{
    114	return (id >> p->height) & MASK;
    115}
    116
    117static inline unsigned int
    118__sync_leaf_idx(const struct i915_syncmap *p, u64 id)
    119{
    120	GEM_BUG_ON(p->height);
    121	return id & MASK;
    122}
    123
    124static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
    125{
    126	return id >> p->height >> SHIFT;
    127}
    128
    129static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
    130{
    131	GEM_BUG_ON(p->height);
    132	return id >> SHIFT;
    133}
    134
    135static inline bool seqno_later(u32 a, u32 b)
    136{
    137	return (s32)(a - b) >= 0;
    138}
    139
    140/**
    141 * i915_syncmap_is_later -- compare against the last know sync point
    142 * @root: pointer to the #i915_syncmap
    143 * @id: the context id (other timeline) we are synchronising to
    144 * @seqno: the sequence number along the other timeline
    145 *
    146 * If we have already synchronised this @root timeline with another (@id) then
    147 * we can omit any repeated or earlier synchronisation requests. If the two
    148 * timelines are already coupled, we can also omit the dependency between the
    149 * two as that is already known via the timeline.
    150 *
    151 * Returns true if the two timelines are already synchronised wrt to @seqno,
    152 * false if not and the synchronisation must be emitted.
    153 */
    154bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
    155{
    156	struct i915_syncmap *p;
    157	unsigned int idx;
    158
    159	p = *root;
    160	if (!p)
    161		return false;
    162
    163	if (likely(__sync_leaf_prefix(p, id) == p->prefix))
    164		goto found;
    165
    166	/* First climb the tree back to a parent branch */
    167	do {
    168		p = p->parent;
    169		if (!p)
    170			return false;
    171
    172		if (__sync_branch_prefix(p, id) == p->prefix)
    173			break;
    174	} while (1);
    175
    176	/* And then descend again until we find our leaf */
    177	do {
    178		if (!p->height)
    179			break;
    180
    181		p = __sync_child(p)[__sync_branch_idx(p, id)];
    182		if (!p)
    183			return false;
    184
    185		if (__sync_branch_prefix(p, id) != p->prefix)
    186			return false;
    187	} while (1);
    188
    189	*root = p;
    190found:
    191	idx = __sync_leaf_idx(p, id);
    192	if (!(p->bitmap & BIT(idx)))
    193		return false;
    194
    195	return seqno_later(__sync_seqno(p)[idx], seqno);
    196}
    197
    198static struct i915_syncmap *
    199__sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
    200{
    201	struct i915_syncmap *p;
    202
    203	p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
    204	if (unlikely(!p))
    205		return NULL;
    206
    207	p->parent = parent;
    208	p->height = 0;
    209	p->bitmap = 0;
    210	p->prefix = __sync_leaf_prefix(p, id);
    211	return p;
    212}
    213
    214static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
    215{
    216	unsigned int idx = __sync_leaf_idx(p, id);
    217
    218	p->bitmap |= BIT(idx);
    219	__sync_seqno(p)[idx] = seqno;
    220}
    221
    222static inline void __sync_set_child(struct i915_syncmap *p,
    223				    unsigned int idx,
    224				    struct i915_syncmap *child)
    225{
    226	p->bitmap |= BIT(idx);
    227	__sync_child(p)[idx] = child;
    228}
    229
    230static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
    231{
    232	struct i915_syncmap *p = *root;
    233	unsigned int idx;
    234
    235	if (!p) {
    236		p = __sync_alloc_leaf(NULL, id);
    237		if (unlikely(!p))
    238			return -ENOMEM;
    239
    240		goto found;
    241	}
    242
    243	/* Caller handled the likely cached case */
    244	GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
    245
    246	/* Climb back up the tree until we find a common prefix */
    247	do {
    248		if (!p->parent)
    249			break;
    250
    251		p = p->parent;
    252
    253		if (__sync_branch_prefix(p, id) == p->prefix)
    254			break;
    255	} while (1);
    256
    257	/*
    258	 * No shortcut, we have to descend the tree to find the right layer
    259	 * containing this fence.
    260	 *
    261	 * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
    262	 * or lower layers. Leaf nodes (height = 0) contain the fences, all
    263	 * other nodes (height > 0) are internal layers that point to a lower
    264	 * node. Each internal layer has at least 2 descendents.
    265	 *
    266	 * Starting at the top, we check whether the current prefix matches. If
    267	 * it doesn't, we have gone past our target and need to insert a join
    268	 * into the tree, and a new leaf node for the target as a descendent
    269	 * of the join, as well as the original layer.
    270	 *
    271	 * The matching prefix means we are still following the right branch
    272	 * of the tree. If it has height 0, we have found our leaf and just
    273	 * need to replace the fence slot with ourselves. If the height is
    274	 * not zero, our slot contains the next layer in the tree (unless
    275	 * it is empty, in which case we can add ourselves as a new leaf).
    276	 * As descend the tree the prefix grows (and height decreases).
    277	 */
    278	do {
    279		struct i915_syncmap *next;
    280
    281		if (__sync_branch_prefix(p, id) != p->prefix) {
    282			unsigned int above;
    283
    284			/* Insert a join above the current layer */
    285			next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
    286				       GFP_KERNEL);
    287			if (unlikely(!next))
    288				return -ENOMEM;
    289
    290			/* Compute the height at which these two diverge */
    291			above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
    292			above = round_up(above, SHIFT);
    293			next->height = above + p->height;
    294			next->prefix = __sync_branch_prefix(next, id);
    295
    296			/* Insert the join into the parent */
    297			if (p->parent) {
    298				idx = __sync_branch_idx(p->parent, id);
    299				__sync_child(p->parent)[idx] = next;
    300				GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
    301			}
    302			next->parent = p->parent;
    303
    304			/* Compute the idx of the other branch, not our id! */
    305			idx = p->prefix >> (above - SHIFT) & MASK;
    306			__sync_set_child(next, idx, p);
    307			p->parent = next;
    308
    309			/* Ascend to the join */
    310			p = next;
    311		} else {
    312			if (!p->height)
    313				break;
    314		}
    315
    316		/* Descend into the next layer */
    317		GEM_BUG_ON(!p->height);
    318		idx = __sync_branch_idx(p, id);
    319		next = __sync_child(p)[idx];
    320		if (!next) {
    321			next = __sync_alloc_leaf(p, id);
    322			if (unlikely(!next))
    323				return -ENOMEM;
    324
    325			__sync_set_child(p, idx, next);
    326			p = next;
    327			break;
    328		}
    329
    330		p = next;
    331	} while (1);
    332
    333found:
    334	GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
    335	__sync_set_seqno(p, id, seqno);
    336	*root = p;
    337	return 0;
    338}
    339
    340/**
    341 * i915_syncmap_set -- mark the most recent syncpoint between contexts
    342 * @root: pointer to the #i915_syncmap
    343 * @id: the context id (other timeline) we have synchronised to
    344 * @seqno: the sequence number along the other timeline
    345 *
    346 * When we synchronise this @root timeline with another (@id), we also know
    347 * that we have synchronized with all previous seqno along that timeline. If
    348 * we then have a request to synchronise with the same seqno or older, we can
    349 * omit it, see i915_syncmap_is_later()
    350 *
    351 * Returns 0 on success, or a negative error code.
    352 */
    353int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
    354{
    355	struct i915_syncmap *p = *root;
    356
    357	/*
    358	 * We expect to be called in sequence following is_later(id), which
    359	 * should have preloaded the root for us.
    360	 */
    361	if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
    362		__sync_set_seqno(p, id, seqno);
    363		return 0;
    364	}
    365
    366	return __sync_set(root, id, seqno);
    367}
    368
    369static void __sync_free(struct i915_syncmap *p)
    370{
    371	if (p->height) {
    372		unsigned int i;
    373
    374		while ((i = ffs(p->bitmap))) {
    375			p->bitmap &= ~0u << i;
    376			__sync_free(__sync_child(p)[i - 1]);
    377		}
    378	}
    379
    380	kfree(p);
    381}
    382
    383/**
    384 * i915_syncmap_free -- free all memory associated with the syncmap
    385 * @root: pointer to the #i915_syncmap
    386 *
    387 * Either when the timeline is to be freed and we no longer need the sync
    388 * point tracking, or when the fences are all known to be signaled and the
    389 * sync point tracking is redundant, we can free the #i915_syncmap to recover
    390 * its allocations.
    391 *
    392 * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
    393 * reuse.
    394 */
    395void i915_syncmap_free(struct i915_syncmap **root)
    396{
    397	struct i915_syncmap *p;
    398
    399	p = *root;
    400	if (!p)
    401		return;
    402
    403	while (p->parent)
    404		p = p->parent;
    405
    406	__sync_free(p);
    407	*root = NULL;
    408}
    409
    410#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    411#include "selftests/i915_syncmap.c"
    412#endif