ring.c (5946B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2016 Red Hat, Inc. 4 * Author: Michael S. Tsirkin <mst@redhat.com> 5 * 6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for 7 * signalling, unconditionally. 8 */ 9#define _GNU_SOURCE 10#include "main.h" 11#include <stdlib.h> 12#include <stdio.h> 13#include <string.h> 14 15/* Next - Where next entry will be written. 16 * Prev - "Next" value when event triggered previously. 17 * Event - Peer requested event after writing this entry. 18 */ 19static inline bool need_event(unsigned short event, 20 unsigned short next, 21 unsigned short prev) 22{ 23 return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); 24} 25 26/* Design: 27 * Guest adds descriptors with unique index values and DESC_HW in flags. 28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear. 29 * Flags are always set last. 30 */ 31#define DESC_HW 0x1 32 33struct desc { 34 unsigned short flags; 35 unsigned short index; 36 unsigned len; 37 unsigned long long addr; 38}; 39 40/* how much padding is needed to avoid false cache sharing */ 41#define HOST_GUEST_PADDING 0x80 42 43/* Mostly read */ 44struct event { 45 unsigned short kick_index; 46 unsigned char reserved0[HOST_GUEST_PADDING - 2]; 47 unsigned short call_index; 48 unsigned char reserved1[HOST_GUEST_PADDING - 2]; 49}; 50 51struct data { 52 void *buf; /* descriptor is writeable, we can't get buf from there */ 53 void *data; 54} *data; 55 56struct desc *ring; 57struct event *event; 58 59struct guest { 60 unsigned avail_idx; 61 unsigned last_used_idx; 62 unsigned num_free; 63 unsigned kicked_avail_idx; 64 unsigned char reserved[HOST_GUEST_PADDING - 12]; 65} guest; 66 67struct host { 68 /* we do not need to track last avail index 69 * unless we have more than one in flight. 70 */ 71 unsigned used_idx; 72 unsigned called_used_idx; 73 unsigned char reserved[HOST_GUEST_PADDING - 4]; 74} host; 75 76/* implemented by ring */ 77void alloc_ring(void) 78{ 79 int ret; 80 int i; 81 82 ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); 83 if (ret) { 84 perror("Unable to allocate ring buffer.\n"); 85 exit(3); 86 } 87 event = calloc(1, sizeof(*event)); 88 if (!event) { 89 perror("Unable to allocate event buffer.\n"); 90 exit(3); 91 } 92 guest.avail_idx = 0; 93 guest.kicked_avail_idx = -1; 94 guest.last_used_idx = 0; 95 host.used_idx = 0; 96 host.called_used_idx = -1; 97 for (i = 0; i < ring_size; ++i) { 98 struct desc desc = { 99 .index = i, 100 }; 101 ring[i] = desc; 102 } 103 guest.num_free = ring_size; 104 data = calloc(ring_size, sizeof(*data)); 105 if (!data) { 106 perror("Unable to allocate data buffer.\n"); 107 exit(3); 108 } 109} 110 111/* guest side */ 112int add_inbuf(unsigned len, void *buf, void *datap) 113{ 114 unsigned head, index; 115 116 if (!guest.num_free) 117 return -1; 118 119 guest.num_free--; 120 head = (ring_size - 1) & (guest.avail_idx++); 121 122 /* Start with a write. On MESI architectures this helps 123 * avoid a shared state with consumer that is polling this descriptor. 124 */ 125 ring[head].addr = (unsigned long)(void*)buf; 126 ring[head].len = len; 127 /* read below might bypass write above. That is OK because it's just an 128 * optimization. If this happens, we will get the cache line in a 129 * shared state which is unfortunate, but probably not worth it to 130 * add an explicit full barrier to avoid this. 131 */ 132 barrier(); 133 index = ring[head].index; 134 data[index].buf = buf; 135 data[index].data = datap; 136 /* Barrier A (for pairing) */ 137 smp_release(); 138 ring[head].flags = DESC_HW; 139 140 return 0; 141} 142 143void *get_buf(unsigned *lenp, void **bufp) 144{ 145 unsigned head = (ring_size - 1) & guest.last_used_idx; 146 unsigned index; 147 void *datap; 148 149 if (ring[head].flags & DESC_HW) 150 return NULL; 151 /* Barrier B (for pairing) */ 152 smp_acquire(); 153 *lenp = ring[head].len; 154 index = ring[head].index & (ring_size - 1); 155 datap = data[index].data; 156 *bufp = data[index].buf; 157 data[index].buf = NULL; 158 data[index].data = NULL; 159 guest.num_free++; 160 guest.last_used_idx++; 161 return datap; 162} 163 164bool used_empty() 165{ 166 unsigned head = (ring_size - 1) & guest.last_used_idx; 167 168 return (ring[head].flags & DESC_HW); 169} 170 171void disable_call() 172{ 173 /* Doing nothing to disable calls might cause 174 * extra interrupts, but reduces the number of cache misses. 175 */ 176} 177 178bool enable_call() 179{ 180 event->call_index = guest.last_used_idx; 181 /* Flush call index write */ 182 /* Barrier D (for pairing) */ 183 smp_mb(); 184 return used_empty(); 185} 186 187void kick_available(void) 188{ 189 bool need; 190 191 /* Flush in previous flags write */ 192 /* Barrier C (for pairing) */ 193 smp_mb(); 194 need = need_event(event->kick_index, 195 guest.avail_idx, 196 guest.kicked_avail_idx); 197 198 guest.kicked_avail_idx = guest.avail_idx; 199 if (need) 200 kick(); 201} 202 203/* host side */ 204void disable_kick() 205{ 206 /* Doing nothing to disable kicks might cause 207 * extra interrupts, but reduces the number of cache misses. 208 */ 209} 210 211bool enable_kick() 212{ 213 event->kick_index = host.used_idx; 214 /* Barrier C (for pairing) */ 215 smp_mb(); 216 return avail_empty(); 217} 218 219bool avail_empty() 220{ 221 unsigned head = (ring_size - 1) & host.used_idx; 222 223 return !(ring[head].flags & DESC_HW); 224} 225 226bool use_buf(unsigned *lenp, void **bufp) 227{ 228 unsigned head = (ring_size - 1) & host.used_idx; 229 230 if (!(ring[head].flags & DESC_HW)) 231 return false; 232 233 /* make sure length read below is not speculated */ 234 /* Barrier A (for pairing) */ 235 smp_acquire(); 236 237 /* simple in-order completion: we don't need 238 * to touch index at all. This also means we 239 * can just modify the descriptor in-place. 240 */ 241 ring[head].len--; 242 /* Make sure len is valid before flags. 243 * Note: alternative is to write len and flags in one access - 244 * possible on 64 bit architectures but wmb is free on Intel anyway 245 * so I have no way to test whether it's a gain. 246 */ 247 /* Barrier B (for pairing) */ 248 smp_release(); 249 ring[head].flags = 0; 250 host.used_idx++; 251 return true; 252} 253 254void call_used(void) 255{ 256 bool need; 257 258 /* Flush in previous flags write */ 259 /* Barrier D (for pairing) */ 260 smp_mb(); 261 262 need = need_event(event->call_index, 263 host.used_idx, 264 host.called_used_idx); 265 266 host.called_used_idx = host.used_idx; 267 268 if (need) 269 call(); 270}