atomic_add-bench.c (4215B)
1#include "qemu/osdep.h" 2#include "qemu/thread.h" 3#include "qemu/host-utils.h" 4#include "qemu/processor.h" 5 6struct thread_info { 7 uint64_t r; 8} QEMU_ALIGNED(64); 9 10struct count { 11 QemuMutex lock; 12 unsigned long val; 13} QEMU_ALIGNED(64); 14 15static QemuThread *threads; 16static struct thread_info *th_info; 17static unsigned int n_threads = 1; 18static unsigned int n_ready_threads; 19static struct count *counts; 20static unsigned int duration = 1; 21static unsigned int range = 1024; 22static bool use_mutex; 23static bool test_start; 24static bool test_stop; 25 26static const char commands_string[] = 27 " -n = number of threads\n" 28 " -m = use mutexes instead of atomic increments\n" 29 " -p = enable sync profiler\n" 30 " -d = duration in seconds\n" 31 " -r = range (will be rounded up to pow2)"; 32 33static void usage_complete(char *argv[]) 34{ 35 fprintf(stderr, "Usage: %s [options]\n", argv[0]); 36 fprintf(stderr, "options:\n%s\n", commands_string); 37} 38 39/* 40 * From: https://en.wikipedia.org/wiki/Xorshift 41 * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only 42 * guaranteed to be >= INT_MAX). 43 */ 44static uint64_t xorshift64star(uint64_t x) 45{ 46 x ^= x >> 12; /* a */ 47 x ^= x << 25; /* b */ 48 x ^= x >> 27; /* c */ 49 return x * UINT64_C(2685821657736338717); 50} 51 52static void *thread_func(void *arg) 53{ 54 struct thread_info *info = arg; 55 56 qatomic_inc(&n_ready_threads); 57 while (!qatomic_read(&test_start)) { 58 cpu_relax(); 59 } 60 61 while (!qatomic_read(&test_stop)) { 62 unsigned int index; 63 64 info->r = xorshift64star(info->r); 65 index = info->r & (range - 1); 66 if (use_mutex) { 67 qemu_mutex_lock(&counts[index].lock); 68 counts[index].val += 1; 69 qemu_mutex_unlock(&counts[index].lock); 70 } else { 71 qatomic_inc(&counts[index].val); 72 } 73 } 74 return NULL; 75} 76 77static void run_test(void) 78{ 79 unsigned int i; 80 81 while (qatomic_read(&n_ready_threads) != n_threads) { 82 cpu_relax(); 83 } 84 85 qatomic_set(&test_start, true); 86 g_usleep(duration * G_USEC_PER_SEC); 87 qatomic_set(&test_stop, true); 88 89 for (i = 0; i < n_threads; i++) { 90 qemu_thread_join(&threads[i]); 91 } 92} 93 94static void create_threads(void) 95{ 96 unsigned int i; 97 98 threads = g_new(QemuThread, n_threads); 99 th_info = g_new(struct thread_info, n_threads); 100 counts = qemu_memalign(64, sizeof(*counts) * range); 101 memset(counts, 0, sizeof(*counts) * range); 102 for (i = 0; i < range; i++) { 103 qemu_mutex_init(&counts[i].lock); 104 } 105 106 for (i = 0; i < n_threads; i++) { 107 struct thread_info *info = &th_info[i]; 108 109 info->r = (i + 1) ^ time(NULL); 110 qemu_thread_create(&threads[i], NULL, thread_func, info, 111 QEMU_THREAD_JOINABLE); 112 } 113} 114 115static void pr_params(void) 116{ 117 printf("Parameters:\n"); 118 printf(" # of threads: %u\n", n_threads); 119 printf(" duration: %u\n", duration); 120 printf(" ops' range: %u\n", range); 121} 122 123static void pr_stats(void) 124{ 125 unsigned long long val = 0; 126 unsigned int i; 127 double tx; 128 129 for (i = 0; i < range; i++) { 130 val += counts[i].val; 131 } 132 tx = val / duration / 1e6; 133 134 printf("Results:\n"); 135 printf("Duration: %u s\n", duration); 136 printf(" Throughput: %.2f Mops/s\n", tx); 137 printf(" Throughput/thread: %.2f Mops/s/thread\n", tx / n_threads); 138} 139 140static void parse_args(int argc, char *argv[]) 141{ 142 int c; 143 144 for (;;) { 145 c = getopt(argc, argv, "hd:n:mpr:"); 146 if (c < 0) { 147 break; 148 } 149 switch (c) { 150 case 'h': 151 usage_complete(argv); 152 exit(0); 153 case 'd': 154 duration = atoi(optarg); 155 break; 156 case 'n': 157 n_threads = atoi(optarg); 158 break; 159 case 'm': 160 use_mutex = true; 161 break; 162 case 'p': 163 qsp_enable(); 164 break; 165 case 'r': 166 range = pow2ceil(atoi(optarg)); 167 break; 168 } 169 } 170} 171 172int main(int argc, char *argv[]) 173{ 174 parse_args(argc, argv); 175 pr_params(); 176 create_threads(); 177 run_test(); 178 pr_stats(); 179 return 0; 180}