cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

rcu.c (12722B)


      1/*
      2 * urcu-mb.c
      3 *
      4 * Userspace RCU library with explicit memory barriers
      5 *
      6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
      7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
      8 * Copyright 2015 Red Hat, Inc.
      9 *
     10 * Ported to QEMU by Paolo Bonzini  <pbonzini@redhat.com>
     11 *
     12 * This library is free software; you can redistribute it and/or
     13 * modify it under the terms of the GNU Lesser General Public
     14 * License as published by the Free Software Foundation; either
     15 * version 2.1 of the License, or (at your option) any later version.
     16 *
     17 * This library is distributed in the hope that it will be useful,
     18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20 * Lesser General Public License for more details.
     21 *
     22 * You should have received a copy of the GNU Lesser General Public
     23 * License along with this library; if not, write to the Free Software
     24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     25 *
     26 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
     27 */
     28
     29#include "qemu/osdep.h"
     30#include "qemu/rcu.h"
     31#include "qemu/atomic.h"
     32#include "qemu/thread.h"
     33#include "qemu/main-loop.h"
     34#include "qemu/lockable.h"
     35#if defined(CONFIG_MALLOC_TRIM)
     36#include <malloc.h>
     37#endif
     38
     39/*
     40 * Global grace period counter.  Bit 0 is always one in rcu_gp_ctr.
     41 * Bits 1 and above are defined in synchronize_rcu.
     42 */
     43#define RCU_GP_LOCKED           (1UL << 0)
     44#define RCU_GP_CTR              (1UL << 1)
     45
     46unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
     47
     48QemuEvent rcu_gp_event;
     49static QemuMutex rcu_registry_lock;
     50static QemuMutex rcu_sync_lock;
     51
     52/*
     53 * Check whether a quiescent state was crossed between the beginning of
     54 * update_counter_and_wait and now.
     55 */
     56static inline int rcu_gp_ongoing(unsigned long *ctr)
     57{
     58    unsigned long v;
     59
     60    v = qatomic_read(ctr);
     61    return v && (v != rcu_gp_ctr);
     62}
     63
     64/* Written to only by each individual reader. Read by both the reader and the
     65 * writers.
     66 */
     67__thread struct rcu_reader_data rcu_reader;
     68
     69/* Protected by rcu_registry_lock.  */
     70typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
     71static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
     72
     73/* Wait for previous parity/grace period to be empty of readers.  */
     74static void wait_for_readers(void)
     75{
     76    ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders);
     77    struct rcu_reader_data *index, *tmp;
     78
     79    for (;;) {
     80        /* We want to be notified of changes made to rcu_gp_ongoing
     81         * while we walk the list.
     82         */
     83        qemu_event_reset(&rcu_gp_event);
     84
     85        /* Instead of using qatomic_mb_set for index->waiting, and
     86         * qatomic_mb_read for index->ctr, memory barriers are placed
     87         * manually since writes to different threads are independent.
     88         * qemu_event_reset has acquire semantics, so no memory barrier
     89         * is needed here.
     90         */
     91        QLIST_FOREACH(index, &registry, node) {
     92            qatomic_set(&index->waiting, true);
     93        }
     94
     95        /* Here, order the stores to index->waiting before the loads of
     96         * index->ctr.  Pairs with smp_mb_placeholder() in rcu_read_unlock(),
     97         * ensuring that the loads of index->ctr are sequentially consistent.
     98         */
     99        smp_mb_global();
    100
    101        QLIST_FOREACH_SAFE(index, &registry, node, tmp) {
    102            if (!rcu_gp_ongoing(&index->ctr)) {
    103                QLIST_REMOVE(index, node);
    104                QLIST_INSERT_HEAD(&qsreaders, index, node);
    105
    106                /* No need for mb_set here, worst of all we
    107                 * get some extra futex wakeups.
    108                 */
    109                qatomic_set(&index->waiting, false);
    110            }
    111        }
    112
    113        if (QLIST_EMPTY(&registry)) {
    114            break;
    115        }
    116
    117        /* Wait for one thread to report a quiescent state and try again.
    118         * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
    119         * wait too much time.
    120         *
    121         * rcu_register_thread() may add nodes to &registry; it will not
    122         * wake up synchronize_rcu, but that is okay because at least another
    123         * thread must exit its RCU read-side critical section before
    124         * synchronize_rcu is done.  The next iteration of the loop will
    125         * move the new thread's rcu_reader from &registry to &qsreaders,
    126         * because rcu_gp_ongoing() will return false.
    127         *
    128         * rcu_unregister_thread() may remove nodes from &qsreaders instead
    129         * of &registry if it runs during qemu_event_wait.  That's okay;
    130         * the node then will not be added back to &registry by QLIST_SWAP
    131         * below.  The invariant is that the node is part of one list when
    132         * rcu_registry_lock is released.
    133         */
    134        qemu_mutex_unlock(&rcu_registry_lock);
    135        qemu_event_wait(&rcu_gp_event);
    136        qemu_mutex_lock(&rcu_registry_lock);
    137    }
    138
    139    /* put back the reader list in the registry */
    140    QLIST_SWAP(&registry, &qsreaders, node);
    141}
    142
    143void synchronize_rcu(void)
    144{
    145    QEMU_LOCK_GUARD(&rcu_sync_lock);
    146
    147    /* Write RCU-protected pointers before reading p_rcu_reader->ctr.
    148     * Pairs with smp_mb_placeholder() in rcu_read_lock().
    149     */
    150    smp_mb_global();
    151
    152    QEMU_LOCK_GUARD(&rcu_registry_lock);
    153    if (!QLIST_EMPTY(&registry)) {
    154        /* In either case, the qatomic_mb_set below blocks stores that free
    155         * old RCU-protected pointers.
    156         */
    157        if (sizeof(rcu_gp_ctr) < 8) {
    158            /* For architectures with 32-bit longs, a two-subphases algorithm
    159             * ensures we do not encounter overflow bugs.
    160             *
    161             * Switch parity: 0 -> 1, 1 -> 0.
    162             */
    163            qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
    164            wait_for_readers();
    165            qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
    166        } else {
    167            /* Increment current grace period.  */
    168            qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
    169        }
    170
    171        wait_for_readers();
    172    }
    173}
    174
    175
    176#define RCU_CALL_MIN_SIZE        30
    177
    178/* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h
    179 * from liburcu.  Note that head is only used by the consumer.
    180 */
    181static struct rcu_head dummy;
    182static struct rcu_head *head = &dummy, **tail = &dummy.next;
    183static int rcu_call_count;
    184static QemuEvent rcu_call_ready_event;
    185
    186static void enqueue(struct rcu_head *node)
    187{
    188    struct rcu_head **old_tail;
    189
    190    node->next = NULL;
    191    old_tail = qatomic_xchg(&tail, &node->next);
    192    qatomic_mb_set(old_tail, node);
    193}
    194
    195static struct rcu_head *try_dequeue(void)
    196{
    197    struct rcu_head *node, *next;
    198
    199retry:
    200    /* Test for an empty list, which we do not expect.  Note that for
    201     * the consumer head and tail are always consistent.  The head
    202     * is consistent because only the consumer reads/writes it.
    203     * The tail, because it is the first step in the enqueuing.
    204     * It is only the next pointers that might be inconsistent.
    205     */
    206    if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
    207        abort();
    208    }
    209
    210    /* If the head node has NULL in its next pointer, the value is
    211     * wrong and we need to wait until its enqueuer finishes the update.
    212     */
    213    node = head;
    214    next = qatomic_mb_read(&head->next);
    215    if (!next) {
    216        return NULL;
    217    }
    218
    219    /* Since we are the sole consumer, and we excluded the empty case
    220     * above, the queue will always have at least two nodes: the
    221     * dummy node, and the one being removed.  So we do not need to update
    222     * the tail pointer.
    223     */
    224    head = next;
    225
    226    /* If we dequeued the dummy node, add it back at the end and retry.  */
    227    if (node == &dummy) {
    228        enqueue(node);
    229        goto retry;
    230    }
    231
    232    return node;
    233}
    234
    235static void *call_rcu_thread(void *opaque)
    236{
    237    struct rcu_head *node;
    238
    239    rcu_register_thread();
    240
    241    for (;;) {
    242        int tries = 0;
    243        int n = qatomic_read(&rcu_call_count);
    244
    245        /* Heuristically wait for a decent number of callbacks to pile up.
    246         * Fetch rcu_call_count now, we only must process elements that were
    247         * added before synchronize_rcu() starts.
    248         */
    249        while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) {
    250            g_usleep(10000);
    251            if (n == 0) {
    252                qemu_event_reset(&rcu_call_ready_event);
    253                n = qatomic_read(&rcu_call_count);
    254                if (n == 0) {
    255#if defined(CONFIG_MALLOC_TRIM)
    256                    malloc_trim(4 * 1024 * 1024);
    257#endif
    258                    qemu_event_wait(&rcu_call_ready_event);
    259                }
    260            }
    261            n = qatomic_read(&rcu_call_count);
    262        }
    263
    264        qatomic_sub(&rcu_call_count, n);
    265        synchronize_rcu();
    266        qemu_mutex_lock_iothread();
    267        while (n > 0) {
    268            node = try_dequeue();
    269            while (!node) {
    270                qemu_mutex_unlock_iothread();
    271                qemu_event_reset(&rcu_call_ready_event);
    272                node = try_dequeue();
    273                if (!node) {
    274                    qemu_event_wait(&rcu_call_ready_event);
    275                    node = try_dequeue();
    276                }
    277                qemu_mutex_lock_iothread();
    278            }
    279
    280            n--;
    281            node->func(node);
    282        }
    283        qemu_mutex_unlock_iothread();
    284    }
    285    abort();
    286}
    287
    288void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
    289{
    290    node->func = func;
    291    enqueue(node);
    292    qatomic_inc(&rcu_call_count);
    293    qemu_event_set(&rcu_call_ready_event);
    294}
    295
    296
    297struct rcu_drain {
    298    struct rcu_head rcu;
    299    QemuEvent drain_complete_event;
    300};
    301
    302static void drain_rcu_callback(struct rcu_head *node)
    303{
    304    struct rcu_drain *event = (struct rcu_drain *)node;
    305    qemu_event_set(&event->drain_complete_event);
    306}
    307
    308/*
    309 * This function ensures that all pending RCU callbacks
    310 * on the current thread are done executing
    311
    312 * drops big qemu lock during the wait to allow RCU thread
    313 * to process the callbacks
    314 *
    315 */
    316
    317void drain_call_rcu(void)
    318{
    319    struct rcu_drain rcu_drain;
    320    bool locked = qemu_mutex_iothread_locked();
    321
    322    memset(&rcu_drain, 0, sizeof(struct rcu_drain));
    323    qemu_event_init(&rcu_drain.drain_complete_event, false);
    324
    325    if (locked) {
    326        qemu_mutex_unlock_iothread();
    327    }
    328
    329
    330    /*
    331     * RCU callbacks are invoked in the same order as in which they
    332     * are registered, thus we can be sure that when 'drain_rcu_callback'
    333     * is called, all RCU callbacks that were registered on this thread
    334     * prior to calling this function are completed.
    335     *
    336     * Note that since we have only one global queue of the RCU callbacks,
    337     * we also end up waiting for most of RCU callbacks that were registered
    338     * on the other threads, but this is a side effect that shoudn't be
    339     * assumed.
    340     */
    341
    342    call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
    343    qemu_event_wait(&rcu_drain.drain_complete_event);
    344
    345    if (locked) {
    346        qemu_mutex_lock_iothread();
    347    }
    348
    349}
    350
    351void rcu_register_thread(void)
    352{
    353    assert(rcu_reader.ctr == 0);
    354    qemu_mutex_lock(&rcu_registry_lock);
    355    QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
    356    qemu_mutex_unlock(&rcu_registry_lock);
    357}
    358
    359void rcu_unregister_thread(void)
    360{
    361    qemu_mutex_lock(&rcu_registry_lock);
    362    QLIST_REMOVE(&rcu_reader, node);
    363    qemu_mutex_unlock(&rcu_registry_lock);
    364}
    365
    366static void rcu_init_complete(void)
    367{
    368    QemuThread thread;
    369
    370    qemu_mutex_init(&rcu_registry_lock);
    371    qemu_mutex_init(&rcu_sync_lock);
    372    qemu_event_init(&rcu_gp_event, true);
    373
    374    qemu_event_init(&rcu_call_ready_event, false);
    375
    376    /* The caller is assumed to have iothread lock, so the call_rcu thread
    377     * must have been quiescent even after forking, just recreate it.
    378     */
    379    qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
    380                       NULL, QEMU_THREAD_DETACHED);
    381
    382    rcu_register_thread();
    383}
    384
    385static int atfork_depth = 1;
    386
    387void rcu_enable_atfork(void)
    388{
    389    atfork_depth++;
    390}
    391
    392void rcu_disable_atfork(void)
    393{
    394    atfork_depth--;
    395}
    396
    397#ifdef CONFIG_POSIX
    398static void rcu_init_lock(void)
    399{
    400    if (atfork_depth < 1) {
    401        return;
    402    }
    403
    404    qemu_mutex_lock(&rcu_sync_lock);
    405    qemu_mutex_lock(&rcu_registry_lock);
    406}
    407
    408static void rcu_init_unlock(void)
    409{
    410    if (atfork_depth < 1) {
    411        return;
    412    }
    413
    414    qemu_mutex_unlock(&rcu_registry_lock);
    415    qemu_mutex_unlock(&rcu_sync_lock);
    416}
    417
    418static void rcu_init_child(void)
    419{
    420    if (atfork_depth < 1) {
    421        return;
    422    }
    423
    424    memset(&registry, 0, sizeof(registry));
    425    rcu_init_complete();
    426}
    427#endif
    428
    429static void __attribute__((__constructor__)) rcu_init(void)
    430{
    431    smp_mb_global_init();
    432#ifdef CONFIG_POSIX
    433    pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_child);
    434#endif
    435    rcu_init_complete();
    436}