qdist.c (9469B)
1/* 2 * qdist.c - QEMU helpers for handling frequency distributions of data. 3 * 4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> 5 * 6 * License: GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9#include "qemu/osdep.h" 10#include "qemu/qdist.h" 11 12#include <math.h> 13#ifndef NAN 14#define NAN (0.0 / 0.0) 15#endif 16 17#define QDIST_EMPTY_STR "(empty)" 18 19void qdist_init(struct qdist *dist) 20{ 21 dist->entries = g_new(struct qdist_entry, 1); 22 dist->size = 1; 23 dist->n = 0; 24} 25 26void qdist_destroy(struct qdist *dist) 27{ 28 g_free(dist->entries); 29} 30 31static inline int qdist_cmp_double(double a, double b) 32{ 33 if (a > b) { 34 return 1; 35 } else if (a < b) { 36 return -1; 37 } 38 return 0; 39} 40 41static int qdist_cmp(const void *ap, const void *bp) 42{ 43 const struct qdist_entry *a = ap; 44 const struct qdist_entry *b = bp; 45 46 return qdist_cmp_double(a->x, b->x); 47} 48 49void qdist_add(struct qdist *dist, double x, long count) 50{ 51 struct qdist_entry *entry = NULL; 52 53 if (dist->n) { 54 struct qdist_entry e; 55 56 e.x = x; 57 entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); 58 } 59 60 if (entry) { 61 entry->count += count; 62 return; 63 } 64 65 if (unlikely(dist->n == dist->size)) { 66 dist->size *= 2; 67 dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size); 68 } 69 dist->n++; 70 entry = &dist->entries[dist->n - 1]; 71 entry->x = x; 72 entry->count = count; 73 qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); 74} 75 76void qdist_inc(struct qdist *dist, double x) 77{ 78 qdist_add(dist, x, 1); 79} 80 81/* 82 * Unicode for block elements. See: 83 * https://en.wikipedia.org/wiki/Block_Elements 84 */ 85static const gunichar qdist_blocks[] = { 86 0x2581, 87 0x2582, 88 0x2583, 89 0x2584, 90 0x2585, 91 0x2586, 92 0x2587, 93 0x2588 94}; 95 96#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) 97 98/* 99 * Print a distribution into a string. 100 * 101 * This function assumes that appropriate binning has been done on the input; 102 * see qdist_bin__internal() and qdist_pr_plain(). 103 * 104 * Callers must free the returned string with g_free(). 105 */ 106static char *qdist_pr_internal(const struct qdist *dist) 107{ 108 double min, max; 109 GString *s = g_string_new(""); 110 size_t i; 111 112 /* if only one entry, its printout will be either full or empty */ 113 if (dist->n == 1) { 114 if (dist->entries[0].count) { 115 g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]); 116 } else { 117 g_string_append_c(s, ' '); 118 } 119 goto out; 120 } 121 122 /* get min and max counts */ 123 min = dist->entries[0].count; 124 max = min; 125 for (i = 0; i < dist->n; i++) { 126 struct qdist_entry *e = &dist->entries[i]; 127 128 if (e->count < min) { 129 min = e->count; 130 } 131 if (e->count > max) { 132 max = e->count; 133 } 134 } 135 136 for (i = 0; i < dist->n; i++) { 137 struct qdist_entry *e = &dist->entries[i]; 138 int index; 139 140 /* make an exception with 0; instead of using block[0], print a space */ 141 if (e->count) { 142 /* divide first to avoid loss of precision when e->count == max */ 143 index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1); 144 g_string_append_unichar(s, qdist_blocks[index]); 145 } else { 146 g_string_append_c(s, ' '); 147 } 148 } 149 out: 150 return g_string_free(s, FALSE); 151} 152 153/* 154 * Bin the distribution in @from into @n bins of consecutive, non-overlapping 155 * intervals, copying the result to @to. 156 * 157 * This function is internal to qdist: only this file and test code should 158 * ever call it. 159 * 160 * Note: calling this function on an already-binned qdist is a bug. 161 * 162 * If @n == 0 or @from->n == 1, use @from->n. 163 */ 164void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n) 165{ 166 double xmin, xmax; 167 double step; 168 size_t i, j; 169 170 qdist_init(to); 171 172 if (from->n == 0) { 173 return; 174 } 175 if (n == 0 || from->n == 1) { 176 n = from->n; 177 } 178 179 /* set equally-sized bins between @from's left and right */ 180 xmin = qdist_xmin(from); 181 xmax = qdist_xmax(from); 182 step = (xmax - xmin) / n; 183 184 if (n == from->n) { 185 /* if @from's entries are equally spaced, no need to re-bin */ 186 for (i = 0; i < from->n; i++) { 187 if (from->entries[i].x != xmin + i * step) { 188 goto rebin; 189 } 190 } 191 /* they're equally spaced, so copy the dist and bail out */ 192 to->entries = g_renew(struct qdist_entry, to->entries, n); 193 to->n = from->n; 194 memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); 195 return; 196 } 197 198 rebin: 199 j = 0; 200 for (i = 0; i < n; i++) { 201 double x; 202 double left, right; 203 204 left = xmin + i * step; 205 right = xmin + (i + 1) * step; 206 207 /* Add x, even if it might not get any counts later */ 208 x = left; 209 qdist_add(to, x, 0); 210 211 /* 212 * To avoid double-counting we capture [left, right) ranges, except for 213 * the righmost bin, which captures a [left, right] range. 214 */ 215 while (j < from->n && (from->entries[j].x < right || i == n - 1)) { 216 struct qdist_entry *o = &from->entries[j]; 217 218 qdist_add(to, x, o->count); 219 j++; 220 } 221 } 222} 223 224/* 225 * Print @dist into a string, after re-binning it into @n bins of consecutive, 226 * non-overlapping intervals. 227 * 228 * If @n == 0, use @orig->n. 229 * 230 * Callers must free the returned string with g_free(). 231 */ 232char *qdist_pr_plain(const struct qdist *dist, size_t n) 233{ 234 struct qdist binned; 235 char *ret; 236 237 if (dist->n == 0) { 238 return g_strdup(QDIST_EMPTY_STR); 239 } 240 qdist_bin__internal(&binned, dist, n); 241 ret = qdist_pr_internal(&binned); 242 qdist_destroy(&binned); 243 return ret; 244} 245 246static char *qdist_pr_label(const struct qdist *dist, size_t n_bins, 247 uint32_t opt, bool is_left) 248{ 249 const char *percent; 250 const char *lparen; 251 const char *rparen; 252 GString *s; 253 double x1, x2, step; 254 double x; 255 double n; 256 int dec; 257 258 s = g_string_new(""); 259 if (!(opt & QDIST_PR_LABELS)) { 260 goto out; 261 } 262 263 dec = opt & QDIST_PR_NODECIMAL ? 0 : 1; 264 percent = opt & QDIST_PR_PERCENT ? "%" : ""; 265 266 n = n_bins ? n_bins : dist->n; 267 x = is_left ? qdist_xmin(dist) : qdist_xmax(dist); 268 step = (qdist_xmax(dist) - qdist_xmin(dist)) / n; 269 270 if (opt & QDIST_PR_100X) { 271 x *= 100.0; 272 step *= 100.0; 273 } 274 if (opt & QDIST_PR_NOBINRANGE) { 275 lparen = rparen = ""; 276 x1 = x; 277 x2 = x; /* unnecessary, but a dumb compiler might not figure it out */ 278 } else { 279 lparen = "["; 280 rparen = is_left ? ")" : "]"; 281 if (is_left) { 282 x1 = x; 283 x2 = x + step; 284 } else { 285 x1 = x - step; 286 x2 = x; 287 } 288 } 289 g_string_append_printf(s, "%s%.*f", lparen, dec, x1); 290 if (!(opt & QDIST_PR_NOBINRANGE)) { 291 g_string_append_printf(s, ",%.*f%s", dec, x2, rparen); 292 } 293 g_string_append(s, percent); 294 out: 295 return g_string_free(s, FALSE); 296} 297 298/* 299 * Print the distribution's histogram into a string. 300 * 301 * See also: qdist_pr_plain(). 302 * 303 * Callers must free the returned string with g_free(). 304 */ 305char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt) 306{ 307 const char *border = opt & QDIST_PR_BORDER ? "|" : ""; 308 char *llabel, *rlabel; 309 char *hgram; 310 GString *s; 311 312 if (dist->n == 0) { 313 return g_strdup(QDIST_EMPTY_STR); 314 } 315 316 s = g_string_new(""); 317 318 llabel = qdist_pr_label(dist, n_bins, opt, true); 319 rlabel = qdist_pr_label(dist, n_bins, opt, false); 320 hgram = qdist_pr_plain(dist, n_bins); 321 g_string_append_printf(s, "%s%s%s%s%s", 322 llabel, border, hgram, border, rlabel); 323 g_free(llabel); 324 g_free(rlabel); 325 g_free(hgram); 326 327 return g_string_free(s, FALSE); 328} 329 330static inline double qdist_x(const struct qdist *dist, int index) 331{ 332 if (dist->n == 0) { 333 return NAN; 334 } 335 return dist->entries[index].x; 336} 337 338double qdist_xmin(const struct qdist *dist) 339{ 340 return qdist_x(dist, 0); 341} 342 343double qdist_xmax(const struct qdist *dist) 344{ 345 return qdist_x(dist, dist->n - 1); 346} 347 348size_t qdist_unique_entries(const struct qdist *dist) 349{ 350 return dist->n; 351} 352 353unsigned long qdist_sample_count(const struct qdist *dist) 354{ 355 unsigned long count = 0; 356 size_t i; 357 358 for (i = 0; i < dist->n; i++) { 359 struct qdist_entry *e = &dist->entries[i]; 360 361 count += e->count; 362 } 363 return count; 364} 365 366static double qdist_pairwise_avg(const struct qdist *dist, size_t index, 367 size_t n, unsigned long count) 368{ 369 /* amortize the recursion by using a base case > 2 */ 370 if (n <= 8) { 371 size_t i; 372 double ret = 0; 373 374 for (i = 0; i < n; i++) { 375 struct qdist_entry *e = &dist->entries[index + i]; 376 377 ret += e->x * e->count / count; 378 } 379 return ret; 380 } else { 381 size_t n2 = n / 2; 382 383 return qdist_pairwise_avg(dist, index, n2, count) + 384 qdist_pairwise_avg(dist, index + n2, n - n2, count); 385 } 386} 387 388double qdist_avg(const struct qdist *dist) 389{ 390 unsigned long count; 391 392 count = qdist_sample_count(dist); 393 if (!count) { 394 return NAN; 395 } 396 return qdist_pairwise_avg(dist, 0, dist->n, count); 397}