398 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			398 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			C
		
	
	
	
| /*
 | |
|  * qdist.c - QEMU helpers for handling frequency distributions of data.
 | |
|  *
 | |
|  * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
 | |
|  *
 | |
|  * License: GNU GPL, version 2 or later.
 | |
|  *   See the COPYING file in the top-level directory.
 | |
|  */
 | |
| #include "qemu/osdep.h"
 | |
| #include "qemu/qdist.h"
 | |
| 
 | |
| #include <math.h>
 | |
| #ifndef NAN
 | |
| #define NAN (0.0 / 0.0)
 | |
| #endif
 | |
| 
 | |
| #define QDIST_EMPTY_STR "(empty)"
 | |
| 
 | |
| void qdist_init(struct qdist *dist)
 | |
| {
 | |
|     dist->entries = g_new(struct qdist_entry, 1);
 | |
|     dist->size = 1;
 | |
|     dist->n = 0;
 | |
| }
 | |
| 
 | |
| void qdist_destroy(struct qdist *dist)
 | |
| {
 | |
|     g_free(dist->entries);
 | |
| }
 | |
| 
 | |
| static inline int qdist_cmp_double(double a, double b)
 | |
| {
 | |
|     if (a > b) {
 | |
|         return 1;
 | |
|     } else if (a < b) {
 | |
|         return -1;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| static int qdist_cmp(const void *ap, const void *bp)
 | |
| {
 | |
|     const struct qdist_entry *a = ap;
 | |
|     const struct qdist_entry *b = bp;
 | |
| 
 | |
|     return qdist_cmp_double(a->x, b->x);
 | |
| }
 | |
| 
 | |
| void qdist_add(struct qdist *dist, double x, long count)
 | |
| {
 | |
|     struct qdist_entry *entry = NULL;
 | |
| 
 | |
|     if (dist->n) {
 | |
|         struct qdist_entry e;
 | |
| 
 | |
|         e.x = x;
 | |
|         entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
 | |
|     }
 | |
| 
 | |
|     if (entry) {
 | |
|         entry->count += count;
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (unlikely(dist->n == dist->size)) {
 | |
|         dist->size *= 2;
 | |
|         dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
 | |
|     }
 | |
|     dist->n++;
 | |
|     entry = &dist->entries[dist->n - 1];
 | |
|     entry->x = x;
 | |
|     entry->count = count;
 | |
|     qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
 | |
| }
 | |
| 
 | |
| void qdist_inc(struct qdist *dist, double x)
 | |
| {
 | |
|     qdist_add(dist, x, 1);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Unicode for block elements. See:
 | |
|  *   https://en.wikipedia.org/wiki/Block_Elements
 | |
|  */
 | |
| static const gunichar qdist_blocks[] = {
 | |
|     0x2581,
 | |
|     0x2582,
 | |
|     0x2583,
 | |
|     0x2584,
 | |
|     0x2585,
 | |
|     0x2586,
 | |
|     0x2587,
 | |
|     0x2588
 | |
| };
 | |
| 
 | |
| #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
 | |
| 
 | |
| /*
 | |
|  * Print a distribution into a string.
 | |
|  *
 | |
|  * This function assumes that appropriate binning has been done on the input;
 | |
|  * see qdist_bin__internal() and qdist_pr_plain().
 | |
|  *
 | |
|  * Callers must free the returned string with g_free().
 | |
|  */
 | |
| static char *qdist_pr_internal(const struct qdist *dist)
 | |
| {
 | |
|     double min, max;
 | |
|     GString *s = g_string_new("");
 | |
|     size_t i;
 | |
| 
 | |
|     /* if only one entry, its printout will be either full or empty */
 | |
|     if (dist->n == 1) {
 | |
|         if (dist->entries[0].count) {
 | |
|             g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
 | |
|         } else {
 | |
|             g_string_append_c(s, ' ');
 | |
|         }
 | |
|         goto out;
 | |
|     }
 | |
| 
 | |
|     /* get min and max counts */
 | |
|     min = dist->entries[0].count;
 | |
|     max = min;
 | |
|     for (i = 0; i < dist->n; i++) {
 | |
|         struct qdist_entry *e = &dist->entries[i];
 | |
| 
 | |
|         if (e->count < min) {
 | |
|             min = e->count;
 | |
|         }
 | |
|         if (e->count > max) {
 | |
|             max = e->count;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < dist->n; i++) {
 | |
|         struct qdist_entry *e = &dist->entries[i];
 | |
|         int index;
 | |
| 
 | |
|         /* make an exception with 0; instead of using block[0], print a space */
 | |
|         if (e->count) {
 | |
|             /* divide first to avoid loss of precision when e->count == max */
 | |
|             index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
 | |
|             g_string_append_unichar(s, qdist_blocks[index]);
 | |
|         } else {
 | |
|             g_string_append_c(s, ' ');
 | |
|         }
 | |
|     }
 | |
|  out:
 | |
|     return g_string_free(s, FALSE);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Bin the distribution in @from into @n bins of consecutive, non-overlapping
 | |
|  * intervals, copying the result to @to.
 | |
|  *
 | |
|  * This function is internal to qdist: only this file and test code should
 | |
|  * ever call it.
 | |
|  *
 | |
|  * Note: calling this function on an already-binned qdist is a bug.
 | |
|  *
 | |
|  * If @n == 0 or @from->n == 1, use @from->n.
 | |
|  */
 | |
| void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
 | |
| {
 | |
|     double xmin, xmax;
 | |
|     double step;
 | |
|     size_t i, j;
 | |
| 
 | |
|     qdist_init(to);
 | |
| 
 | |
|     if (from->n == 0) {
 | |
|         return;
 | |
|     }
 | |
|     if (n == 0 || from->n == 1) {
 | |
|         n = from->n;
 | |
|     }
 | |
| 
 | |
|     /* set equally-sized bins between @from's left and right */
 | |
|     xmin = qdist_xmin(from);
 | |
|     xmax = qdist_xmax(from);
 | |
|     step = (xmax - xmin) / n;
 | |
| 
 | |
|     if (n == from->n) {
 | |
|         /* if @from's entries are equally spaced, no need to re-bin */
 | |
|         for (i = 0; i < from->n; i++) {
 | |
|             if (from->entries[i].x != xmin + i * step) {
 | |
|                 goto rebin;
 | |
|             }
 | |
|         }
 | |
|         /* they're equally spaced, so copy the dist and bail out */
 | |
|         to->entries = g_renew(struct qdist_entry, to->entries, n);
 | |
|         to->n = from->n;
 | |
|         memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|  rebin:
 | |
|     j = 0;
 | |
|     for (i = 0; i < n; i++) {
 | |
|         double x;
 | |
|         double left, right;
 | |
| 
 | |
|         left = xmin + i * step;
 | |
|         right = xmin + (i + 1) * step;
 | |
| 
 | |
|         /* Add x, even if it might not get any counts later */
 | |
|         x = left;
 | |
|         qdist_add(to, x, 0);
 | |
| 
 | |
|         /*
 | |
|          * To avoid double-counting we capture [left, right) ranges, except for
 | |
|          * the righmost bin, which captures a [left, right] range.
 | |
|          */
 | |
|         while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
 | |
|             struct qdist_entry *o = &from->entries[j];
 | |
| 
 | |
|             qdist_add(to, x, o->count);
 | |
|             j++;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Print @dist into a string, after re-binning it into @n bins of consecutive,
 | |
|  * non-overlapping intervals.
 | |
|  *
 | |
|  * If @n == 0, use @orig->n.
 | |
|  *
 | |
|  * Callers must free the returned string with g_free().
 | |
|  */
 | |
| char *qdist_pr_plain(const struct qdist *dist, size_t n)
 | |
| {
 | |
|     struct qdist binned;
 | |
|     char *ret;
 | |
| 
 | |
|     if (dist->n == 0) {
 | |
|         return g_strdup(QDIST_EMPTY_STR);
 | |
|     }
 | |
|     qdist_bin__internal(&binned, dist, n);
 | |
|     ret = qdist_pr_internal(&binned);
 | |
|     qdist_destroy(&binned);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
 | |
|                             uint32_t opt, bool is_left)
 | |
| {
 | |
|     const char *percent;
 | |
|     const char *lparen;
 | |
|     const char *rparen;
 | |
|     GString *s;
 | |
|     double x1, x2, step;
 | |
|     double x;
 | |
|     double n;
 | |
|     int dec;
 | |
| 
 | |
|     s = g_string_new("");
 | |
|     if (!(opt & QDIST_PR_LABELS)) {
 | |
|         goto out;
 | |
|     }
 | |
| 
 | |
|     dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
 | |
|     percent = opt & QDIST_PR_PERCENT ? "%" : "";
 | |
| 
 | |
|     n = n_bins ? n_bins : dist->n;
 | |
|     x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
 | |
|     step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
 | |
| 
 | |
|     if (opt & QDIST_PR_100X) {
 | |
|         x *= 100.0;
 | |
|         step *= 100.0;
 | |
|     }
 | |
|     if (opt & QDIST_PR_NOBINRANGE) {
 | |
|         lparen = rparen = "";
 | |
|         x1 = x;
 | |
|         x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
 | |
|     } else {
 | |
|         lparen = "[";
 | |
|         rparen = is_left ? ")" : "]";
 | |
|         if (is_left) {
 | |
|             x1 = x;
 | |
|             x2 = x + step;
 | |
|         } else {
 | |
|             x1 = x - step;
 | |
|             x2 = x;
 | |
|         }
 | |
|     }
 | |
|     g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
 | |
|     if (!(opt & QDIST_PR_NOBINRANGE)) {
 | |
|         g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
 | |
|     }
 | |
|     g_string_append(s, percent);
 | |
|  out:
 | |
|     return g_string_free(s, FALSE);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Print the distribution's histogram into a string.
 | |
|  *
 | |
|  * See also: qdist_pr_plain().
 | |
|  *
 | |
|  * Callers must free the returned string with g_free().
 | |
|  */
 | |
| char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
 | |
| {
 | |
|     const char *border = opt & QDIST_PR_BORDER ? "|" : "";
 | |
|     char *llabel, *rlabel;
 | |
|     char *hgram;
 | |
|     GString *s;
 | |
| 
 | |
|     if (dist->n == 0) {
 | |
|         return g_strdup(QDIST_EMPTY_STR);
 | |
|     }
 | |
| 
 | |
|     s = g_string_new("");
 | |
| 
 | |
|     llabel = qdist_pr_label(dist, n_bins, opt, true);
 | |
|     rlabel = qdist_pr_label(dist, n_bins, opt, false);
 | |
|     hgram = qdist_pr_plain(dist, n_bins);
 | |
|     g_string_append_printf(s, "%s%s%s%s%s",
 | |
|                            llabel, border, hgram, border, rlabel);
 | |
|     g_free(llabel);
 | |
|     g_free(rlabel);
 | |
|     g_free(hgram);
 | |
| 
 | |
|     return g_string_free(s, FALSE);
 | |
| }
 | |
| 
 | |
| static inline double qdist_x(const struct qdist *dist, int index)
 | |
| {
 | |
|     if (dist->n == 0) {
 | |
|         return NAN;
 | |
|     }
 | |
|     return dist->entries[index].x;
 | |
| }
 | |
| 
 | |
| double qdist_xmin(const struct qdist *dist)
 | |
| {
 | |
|     return qdist_x(dist, 0);
 | |
| }
 | |
| 
 | |
| double qdist_xmax(const struct qdist *dist)
 | |
| {
 | |
|     return qdist_x(dist, dist->n - 1);
 | |
| }
 | |
| 
 | |
| size_t qdist_unique_entries(const struct qdist *dist)
 | |
| {
 | |
|     return dist->n;
 | |
| }
 | |
| 
 | |
| unsigned long qdist_sample_count(const struct qdist *dist)
 | |
| {
 | |
|     unsigned long count = 0;
 | |
|     size_t i;
 | |
| 
 | |
|     for (i = 0; i < dist->n; i++) {
 | |
|         struct qdist_entry *e = &dist->entries[i];
 | |
| 
 | |
|         count += e->count;
 | |
|     }
 | |
|     return count;
 | |
| }
 | |
| 
 | |
| static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
 | |
|                                  size_t n, unsigned long count)
 | |
| {
 | |
|     /* amortize the recursion by using a base case > 2 */
 | |
|     if (n <= 8) {
 | |
|         size_t i;
 | |
|         double ret = 0;
 | |
| 
 | |
|         for (i = 0; i < n; i++) {
 | |
|             struct qdist_entry *e = &dist->entries[index + i];
 | |
| 
 | |
|             ret += e->x * e->count / count;
 | |
|         }
 | |
|         return ret;
 | |
|     } else {
 | |
|         size_t n2 = n / 2;
 | |
| 
 | |
|         return qdist_pairwise_avg(dist, index, n2, count) +
 | |
|                qdist_pairwise_avg(dist, index + n2, n - n2, count);
 | |
|     }
 | |
| }
 | |
| 
 | |
| double qdist_avg(const struct qdist *dist)
 | |
| {
 | |
|     unsigned long count;
 | |
| 
 | |
|     count = qdist_sample_count(dist);
 | |
|     if (!count) {
 | |
|         return NAN;
 | |
|     }
 | |
|     return qdist_pairwise_avg(dist, 0, dist->n, count);
 | |
| }
 |