cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ring_buffer.h (2454B)


      1#ifndef _TOOLS_LINUX_RING_BUFFER_H_
      2#define _TOOLS_LINUX_RING_BUFFER_H_
      3
      4#include <asm/barrier.h>
      5#include <linux/perf_event.h>
      6
      7/*
      8 * Contract with kernel for walking the perf ring buffer from
      9 * user space requires the following barrier pairing (quote
     10 * from kernel/events/ring_buffer.c):
     11 *
     12 *   Since the mmap() consumer (userspace) can run on a
     13 *   different CPU:
     14 *
     15 *   kernel                             user
     16 *
     17 *   if (LOAD ->data_tail) {            LOAD ->data_head
     18 *                      (A)             smp_rmb()       (C)
     19 *      STORE $data                     LOAD $data
     20 *      smp_wmb()       (B)             smp_mb()        (D)
     21 *      STORE ->data_head               STORE ->data_tail
     22 *   }
     23 *
     24 *   Where A pairs with D, and B pairs with C.
     25 *
     26 *   In our case A is a control dependency that separates the
     27 *   load of the ->data_tail and the stores of $data. In case
     28 *   ->data_tail indicates there is no room in the buffer to
     29 *   store $data we do not.
     30 *
     31 *   D needs to be a full barrier since it separates the data
     32 *   READ from the tail WRITE.
     33 *
     34 *   For B a WMB is sufficient since it separates two WRITEs,
     35 *   and for C an RMB is sufficient since it separates two READs.
     36 *
     37 * Note, instead of B, C, D we could also use smp_store_release()
     38 * in B and D as well as smp_load_acquire() in C.
     39 *
     40 * However, this optimization does not make sense for all kernel
     41 * supported architectures since for a fair number it would
     42 * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
     43 * and smp_mb() + WRITE_ONCE() pair for smp_store_release().
     44 *
     45 * Thus for those smp_wmb() in B and smp_rmb() in C would still
     46 * be less expensive. For the case of D this has either the same
     47 * cost or is less expensive, for example, due to TSO x86 can
     48 * avoid the CPU barrier entirely.
     49 */
     50
     51static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
     52{
     53/*
     54 * Architectures where smp_load_acquire() does not fallback to
     55 * READ_ONCE() + smp_mb() pair.
     56 */
     57#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
     58    defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
     59	return smp_load_acquire(&base->data_head);
     60#else
     61	u64 head = READ_ONCE(base->data_head);
     62
     63	smp_rmb();
     64	return head;
     65#endif
     66}
     67
     68static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
     69					  u64 tail)
     70{
     71	smp_store_release(&base->data_tail, tail);
     72}
     73
     74#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */