cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sync.h (7823B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2#ifndef __MIPS_ASM_SYNC_H__
      3#define __MIPS_ASM_SYNC_H__
      4
      5/*
      6 * sync types are defined by the MIPS64 Instruction Set documentation in Volume
      7 * II-A of the MIPS Architecture Reference Manual, which can be found here:
      8 *
      9 *   https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
     10 *
     11 * Two types of barrier are provided:
     12 *
     13 *   1) Completion barriers, which ensure that a memory operation has actually
     14 *      completed & often involve stalling the CPU pipeline to do so.
     15 *
     16 *   2) Ordering barriers, which only ensure that affected memory operations
     17 *      won't be reordered in the CPU pipeline in a manner that violates the
     18 *      restrictions imposed by the barrier.
     19 *
     20 * Ordering barriers can be more efficient than completion barriers, since:
     21 *
     22 *   a) Ordering barriers only require memory access instructions which preceed
     23 *      them in program order (older instructions) to reach a point in the
     24 *      load/store datapath beyond which reordering is not possible before
     25 *      allowing memory access instructions which follow them (younger
     26 *      instructions) to be performed.  That is, older instructions don't
     27 *      actually need to complete - they just need to get far enough that all
     28 *      other coherent CPUs will observe their completion before they observe
     29 *      the effects of younger instructions.
     30 *
     31 *   b) Multiple variants of ordering barrier are provided which allow the
     32 *      effects to be restricted to different combinations of older or younger
     33 *      loads or stores. By way of example, if we only care that stores older
     34 *      than a barrier are observed prior to stores that are younger than a
     35 *      barrier & don't care about the ordering of loads then the 'wmb'
     36 *      ordering barrier can be used. Limiting the barrier's effects to stores
     37 *      allows loads to continue unaffected & potentially allows the CPU to
     38 *      make progress faster than if younger loads had to wait for older stores
     39 *      to complete.
     40 */
     41
     42/*
     43 * No sync instruction at all; used to allow code to nullify the effect of the
     44 * __SYNC() macro without needing lots of #ifdefery.
     45 */
     46#define __SYNC_none	-1
     47
     48/*
     49 * A full completion barrier; all memory accesses appearing prior to this sync
     50 * instruction in program order must complete before any memory accesses
     51 * appearing after this sync instruction in program order.
     52 */
     53#define __SYNC_full	0x00
     54
     55/*
     56 * For now we use a full completion barrier to implement all sync types, until
     57 * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
     58 * sufficient to uphold our desired memory model.
     59 */
     60#define __SYNC_aq	__SYNC_full
     61#define __SYNC_rl	__SYNC_full
     62#define __SYNC_mb	__SYNC_full
     63
     64/*
     65 * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
     66 * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
     67 * speculative reads.
     68 */
     69#ifdef CONFIG_CPU_CAVIUM_OCTEON
     70# define __SYNC_rmb	__SYNC_none
     71# define __SYNC_wmb	0x04
     72#else
     73# define __SYNC_rmb	__SYNC_full
     74# define __SYNC_wmb	__SYNC_full
     75#endif
     76
     77/*
     78 * A GINV sync is a little different; it doesn't relate directly to loads or
     79 * stores, but instead causes synchronization of an icache or TLB global
     80 * invalidation operation triggered by the ginvi or ginvt instructions
     81 * respectively. In cases where we need to know that a ginvi or ginvt operation
     82 * has been performed by all coherent CPUs, we must issue a sync instruction of
     83 * this type. Once this instruction graduates all coherent CPUs will have
     84 * observed the invalidation.
     85 */
     86#define __SYNC_ginv	0x14
     87
     88/* Trivial; indicate that we always need this sync instruction. */
     89#define __SYNC_always	(1 << 0)
     90
     91/*
     92 * Indicate that we need this sync instruction only on systems with weakly
     93 * ordered memory access. In general this is most MIPS systems, but there are
     94 * exceptions which provide strongly ordered memory.
     95 */
     96#ifdef CONFIG_WEAK_ORDERING
     97# define __SYNC_weak_ordering	(1 << 1)
     98#else
     99# define __SYNC_weak_ordering	0
    100#endif
    101
    102/*
    103 * Indicate that we need this sync instruction only on systems where LL/SC
    104 * don't implicitly provide a memory barrier. In general this is most MIPS
    105 * systems.
    106 */
    107#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
    108# define __SYNC_weak_llsc	(1 << 2)
    109#else
    110# define __SYNC_weak_llsc	0
    111#endif
    112
    113/*
    114 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
    115 * store or prefetch) in between an LL & SC can cause the SC instruction to
    116 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
    117 * containing such sequences, this bug bites harder than we might otherwise
    118 * expect due to reordering & speculation:
    119 *
    120 * 1) A memory access appearing prior to the LL in program order may actually
    121 *    be executed after the LL - this is the reordering case.
    122 *
    123 *    In order to avoid this we need to place a memory barrier (ie. a SYNC
    124 *    instruction) prior to every LL instruction, in between it and any earlier
    125 *    memory access instructions.
    126 *
    127 *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
    128 *
    129 * 2) If a conditional branch exists between an LL & SC with a target outside
    130 *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
    131 *    or similar, then misprediction of the branch may allow speculative
    132 *    execution of memory accesses from outside of the LL-SC loop.
    133 *
    134 *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
    135 *    at each affected branch target.
    136 *
    137 *    This case affects all current Loongson 3 CPUs.
    138 *
    139 * The above described cases cause an error in the cache coherence protocol;
    140 * such that the Invalidate of a competing LL-SC goes 'missing' and SC
    141 * erroneously observes its core still has Exclusive state and lets the SC
    142 * proceed.
    143 *
    144 * Therefore the error only occurs on SMP systems.
    145 */
    146#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
    147# define __SYNC_loongson3_war	(1 << 31)
    148#else
    149# define __SYNC_loongson3_war	0
    150#endif
    151
    152/*
    153 * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
    154 * barrier to be ineffective, requiring the use of 2 in sequence to provide an
    155 * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
    156 * optimized memory barrier primitives."). Here we specify that the affected
    157 * sync instructions should be emitted twice.
    158 * Note that this expression is evaluated by the assembler (not the compiler),
    159 * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
    160 */
    161#ifdef CONFIG_CPU_CAVIUM_OCTEON
    162# define __SYNC_rpt(type)	(1 - (type == __SYNC_wmb))
    163#else
    164# define __SYNC_rpt(type)	1
    165#endif
    166
    167/*
    168 * The main event. Here we actually emit a sync instruction of a given type, if
    169 * reason is non-zero.
    170 *
    171 * In future we have the option of emitting entries in a fixups-style table
    172 * here that would allow us to opportunistically remove some sync instructions
    173 * when we detect at runtime that we're running on a CPU that doesn't need
    174 * them.
    175 */
    176#ifdef CONFIG_CPU_HAS_SYNC
    177# define ____SYNC(_type, _reason, _else)			\
    178	.if	(( _type ) != -1) && ( _reason );		\
    179	.set	push;						\
    180	.set	MIPS_ISA_LEVEL_RAW;				\
    181	.rept	__SYNC_rpt(_type);				\
    182	sync	_type;						\
    183	.endr;							\
    184	.set	pop;						\
    185	.else;							\
    186	_else;							\
    187	.endif
    188#else
    189# define ____SYNC(_type, _reason, _else)
    190#endif
    191
    192/*
    193 * Preprocessor magic to expand macros used as arguments before we insert them
    194 * into assembly code.
    195 */
    196#ifdef __ASSEMBLY__
    197# define ___SYNC(type, reason, else)				\
    198	____SYNC(type, reason, else)
    199#else
    200# define ___SYNC(type, reason, else)				\
    201	__stringify(____SYNC(type, reason, else))
    202#endif
    203
    204#define __SYNC(type, reason)					\
    205	___SYNC(__SYNC_##type, __SYNC_##reason, )
    206#define __SYNC_ELSE(type, reason, else)				\
    207	___SYNC(__SYNC_##type, __SYNC_##reason, else)
    208
    209#endif /* __MIPS_ASM_SYNC_H__ */