David Blume commited on 2019-02-03 19:18:33
Showing 2 changed files, with 282 additions and 0 deletions.
| ... | ... |
@@ -0,0 +1,70 @@ |
| 1 |
+# Make Chart |
|
| 2 |
+ |
|
| 3 |
+This is a sample Python 3 script that generates a chart a few different ways. |
|
| 4 |
+ |
|
| 5 |
+### Getting the project |
|
| 6 |
+ |
|
| 7 |
+You can get a copy of this project by clicking on the [ZIP](http://git.dlma.com/make_chart.git/zipball/master) |
|
| 8 |
+or [TAR](http://git.dlma.com/make_chart.git/tarball/master) buttons near the top right of the GitList web page. |
|
| 9 |
+ |
|
| 10 |
+You can clone from the origin with: |
|
| 11 |
+ |
|
| 12 |
+ git clone ssh://USERNAME@dlma.com/~/git/make_chart.git |
|
| 13 |
+ |
|
| 14 |
+### What Does it Do? |
|
| 15 |
+ |
|
| 16 |
+You specify one of four renderers, and it analyzes (currently) some hardcoded data.. |
|
| 17 |
+ |
|
| 18 |
+* **none**: Summary only, just print the mean, median, and standard deviation. |
|
| 19 |
+* **gnuplot**: (default) Print an ASCII art graph. |
|
| 20 |
+* **matplotlib**: Create a PNG file suitable for static reports and email. |
|
| 21 |
+* **google**: A Google chart webpage with responsive features. |
|
| 22 |
+ |
|
| 23 |
+Here's the help text: |
|
| 24 |
+ |
|
| 25 |
+ usage: make\_chart.py [\-h] [-r {none,gnuplot,matplotlib,google}]
|
|
| 26 |
+ |
|
| 27 |
+ Makes histograms from raw samples or prepared buckets. |
|
| 28 |
+ |
|
| 29 |
+ optional arguments: |
|
| 30 |
+ -h, --help show this help message and exit |
|
| 31 |
+ -r {none,gnuplot,matplotlib,google}, --renderer {none,gnuplot,matplotlib,google}
|
|
| 32 |
+ Choose a renderer: summary only, text, png, or |
|
| 33 |
+ webpage. |
|
| 34 |
+ |
|
| 35 |
+#### Example text rendering |
|
| 36 |
+ |
|
| 37 |
+ 40 +-------------------------------------------------------+ |
|
| 38 |
+ | + * * + + + + + + | |
|
| 39 |
+ | * * Pizzas ***A*** | |
|
| 40 |
+ 35 |-+ * * +-| |
|
| 41 |
+ | A * median = 80.00 | |
|
| 42 |
+ 30 |-+ * A*** mean = 81.67 sdev=38.09 +-| |
|
| 43 |
+ | * A** | |
|
| 44 |
+ | * A*** | |
|
| 45 |
+ 25 |-+ A A** +-| |
|
| 46 |
+ | * A*** | |
|
| 47 |
+ 20 |-+* A** +-| |
|
| 48 |
+ | * | |
|
| 49 |
+ | * A*** | |
|
| 50 |
+ 15 |-* A** +-| |
|
| 51 |
+ | * A*** | |
|
| 52 |
+ |* A** | |
|
| 53 |
+ 10 |*+ A +-| |
|
| 54 |
+ | * | |
|
| 55 |
+ 5 |-+ * +-| |
|
| 56 |
+ | * | |
|
| 57 |
+ | + + + + + + + A | |
|
| 58 |
+ 0 +-------------------------------------------------------+ |
|
| 59 |
+ 20 40 60 80 100 120 140 160 180 |
|
| 60 |
+ |
|
| 61 |
+### Is it any good? |
|
| 62 |
+ |
|
| 63 |
+[Yes](https://news.ycombinator.com/item?id=3067434). |
|
| 64 |
+ |
|
| 65 |
+### To Do |
|
| 66 |
+ |
|
| 67 |
+* Make it more general. |
|
| 68 |
+* Allow data to be specified different ways. |
|
| 69 |
+* Support other chart types. |
|
| 70 |
+ |
| ... | ... |
@@ -0,0 +1,212 @@ |
| 1 |
+#!/usr/bin/env python3 |
|
| 2 |
+import os |
|
| 3 |
+import sys |
|
| 4 |
+from argparse import ArgumentParser |
|
| 5 |
+import math |
|
| 6 |
+import operator |
|
| 7 |
+import subprocess |
|
| 8 |
+import platform |
|
| 9 |
+import webbrowser |
|
| 10 |
+ |
|
| 11 |
+ |
|
| 12 |
+html = """<html> |
|
| 13 |
+ <head> |
|
| 14 |
+ <style> |
|
| 15 |
+ h1, h3 { text-align: center; }
|
|
| 16 |
+ </style> |
|
| 17 |
+ <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> |
|
| 18 |
+ <script> |
|
| 19 |
+ google.charts.load('current', {'packages':['corechart']});
|
|
| 20 |
+ google.charts.setOnLoadCallback(chartLoaded); |
|
| 21 |
+ function chartLoaded() {
|
|
| 22 |
+ let data = google.visualization.arrayToDataTable([ |
|
| 23 |
+ REPLACEME_WITH_BITRATES |
|
| 24 |
+ ]); |
|
| 25 |
+ |
|
| 26 |
+ let options = {
|
|
| 27 |
+ title: 'Bitrates (excluding top and bottom 2%)', |
|
| 28 |
+ width: window.innerWidth, |
|
| 29 |
+ height: window.innerHeight - 120, |
|
| 30 |
+ chartArea: { top: '100', left: '80', width: '86%' },
|
|
| 31 |
+ hAxis: {
|
|
| 32 |
+ showTextEvery: SET_SHOWTEXTEVERY |
|
| 33 |
+ }, |
|
| 34 |
+ vAxis: { }
|
|
| 35 |
+ }; |
|
| 36 |
+ let chart = new google.visualization.AreaChart(document.getElementById('chart_div'));
|
|
| 37 |
+ chart.draw(data, options); |
|
| 38 |
+ } |
|
| 39 |
+ </script> |
|
| 40 |
+ </head> |
|
| 41 |
+ |
|
| 42 |
+ <body> |
|
| 43 |
+ <h1>Bitrates</h1> |
|
| 44 |
+ REPLACE_ME_WITH_OVERVIEW |
|
| 45 |
+ <div id="chart_div"></div> |
|
| 46 |
+ </body> |
|
| 47 |
+</html>""" |
|
| 48 |
+ |
|
| 49 |
+ |
|
| 50 |
+def calculate_median_mean_stddev_from_samples(values): |
|
| 51 |
+ """ returns the median, mean, and standard deviation of values """ |
|
| 52 |
+ # Calculate the median |
|
| 53 |
+ values.sort() |
|
| 54 |
+ count = len(values) |
|
| 55 |
+ median = 0.0 |
|
| 56 |
+ if count % 2: |
|
| 57 |
+ median = float(values[count//2]) |
|
| 58 |
+ elif count > 0: |
|
| 59 |
+ median = (values[count // 2 - 1] + values[count // 2]) / 2.0 |
|
| 60 |
+ |
|
| 61 |
+ # Calculate the mean and standard deviation |
|
| 62 |
+ if count > 0: |
|
| 63 |
+ mean = sum(values) / len(values) |
|
| 64 |
+ squares_of_diffs = map(lambda x: pow(x - mean, 2), values) |
|
| 65 |
+ mean_of_squares = sum(squares_of_diffs) / len(values) |
|
| 66 |
+ else: |
|
| 67 |
+ mean = 0 |
|
| 68 |
+ mean_of_squares = 0 |
|
| 69 |
+ std_dev = math.sqrt(mean_of_squares) |
|
| 70 |
+ return median, mean, std_dev |
|
| 71 |
+ |
|
| 72 |
+ |
|
| 73 |
+def calculate_median_mean_stddev(x, y): |
|
| 74 |
+ """ returns the median, mean, and standard deviation given |
|
| 75 |
+ an array of values, x, and an array of counts of those values, y. """ |
|
| 76 |
+ # Median: Walk the sample counts (y) halfway to the sum of sample counts. |
|
| 77 |
+ median_pos = sum(y) // 2 |
|
| 78 |
+ cur_pos = 0 |
|
| 79 |
+ median = None |
|
| 80 |
+ mean = None |
|
| 81 |
+ for i in range(len(y)): |
|
| 82 |
+ cur_pos += y[i] |
|
| 83 |
+ if cur_pos >= median_pos: |
|
| 84 |
+ median = x[i] |
|
| 85 |
+ break |
|
| 86 |
+ # mean = sum(map(lambda x: x[0] * x[1], zip(x,y))) / sum(y) |
|
| 87 |
+ mean = sum(map(operator.mul, x, y)) / sum(y) |
|
| 88 |
+ squares_of_diffs = map(lambda x, y: pow(x - mean, 2) * y, x, y) |
|
| 89 |
+ mean_of_squares = sum(squares_of_diffs) / sum(y) |
|
| 90 |
+ std_dev = math.sqrt(mean_of_squares) |
|
| 91 |
+ return median, mean, std_dev |
|
| 92 |
+ |
|
| 93 |
+ |
|
| 94 |
+def remove_outliers_by_idx(x, y, outlier_count, idx): |
|
| 95 |
+ """Removes outlier_count samples from idx side of the buckets.""" |
|
| 96 |
+ cur_count = 0 |
|
| 97 |
+ while cur_count + y[idx] < outlier_count: |
|
| 98 |
+ cur_count += y[idx] |
|
| 99 |
+ del x[idx] |
|
| 100 |
+ del y[idx] |
|
| 101 |
+ if cur_count < outlier_count: |
|
| 102 |
+ y[idx] = y[idx] - (outlier_count - cur_count) |
|
| 103 |
+ |
|
| 104 |
+ |
|
| 105 |
+def remove_outliers(x, y, outlier_percentile): |
|
| 106 |
+ """Removes outlier_percentile samples from the beginning and end |
|
| 107 |
+ of the sample set.""" |
|
| 108 |
+ outlier_count = sum(y) * outlier_percentile // 100 |
|
| 109 |
+ remove_outliers_by_idx(x, y, outlier_count, 0) |
|
| 110 |
+ remove_outliers_by_idx(x, y, outlier_count, -1) |
|
| 111 |
+ |
|
| 112 |
+ |
|
| 113 |
+def acquire_data(buckets): |
|
| 114 |
+ x = [i*10 for i in range(1,buckets+1)] |
|
| 115 |
+ y = [] |
|
| 116 |
+ for i in range(0, buckets // 4): |
|
| 117 |
+ y.append((i+1) * 8) |
|
| 118 |
+ for i in range(buckets // 4, buckets): |
|
| 119 |
+ y.append((((buckets // 4) + 0) * 8) - (i * 2)) |
|
| 120 |
+ # print(f'x (value) = {x}, len={len(x)}')
|
|
| 121 |
+ # print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')
|
|
| 122 |
+ samples = [] |
|
| 123 |
+ for i in range(len(x)): |
|
| 124 |
+ samples += [x[i]] * y[i] |
|
| 125 |
+ return samples, x, y |
|
| 126 |
+ |
|
| 127 |
+ |
|
| 128 |
+def main(renderer): |
|
| 129 |
+ buckets = 20 |
|
| 130 |
+ samples, x, y = acquire_data(buckets) |
|
| 131 |
+ |
|
| 132 |
+ outlier_percentile = 5 |
|
| 133 |
+ outlier_count = sum(y) * outlier_percentile // 100 |
|
| 134 |
+ middle_samples = samples[outlier_count:-outlier_count] |
|
| 135 |
+ if __debug__: |
|
| 136 |
+ print(f'len(samples) = {len(samples)}, outlier_count={outlier_count}, len(middle_sample) = {len(middle_samples)}')
|
|
| 137 |
+ remove_outliers(x, y, outlier_percentile) |
|
| 138 |
+ if __debug__: |
|
| 139 |
+ print(f'x (value) = {x}, len={len(x)}')
|
|
| 140 |
+ print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')
|
|
| 141 |
+ |
|
| 142 |
+ median, mean, std_dev = calculate_median_mean_stddev(x, y) |
|
| 143 |
+ s_median, s_mean, s_std_dev = calculate_median_mean_stddev_from_samples(middle_samples) |
|
| 144 |
+ assert(math.isclose(median, s_median)) |
|
| 145 |
+ assert(math.isclose(mean, s_mean)) |
|
| 146 |
+ assert(math.isclose(std_dev, s_std_dev)) |
|
| 147 |
+ |
|
| 148 |
+ localdir = os.path.abspath(os.path.dirname(sys.argv[0])) |
|
| 149 |
+ filename = os.path.basename(sys.argv[0]) |
|
| 150 |
+ if renderer == 'matplotlib': |
|
| 151 |
+ plt.figure(figsize=(10,8)) |
|
| 152 |
+ plt.plot(x, y) |
|
| 153 |
+ ax = plt.gca() |
|
| 154 |
+ ax.get_xaxis().set_major_formatter( |
|
| 155 |
+ matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) |
|
| 156 |
+ ax.get_yaxis().set_major_formatter( |
|
| 157 |
+ matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) |
|
| 158 |
+ ax.yaxis.grid(linestyle='--', alpha=0.4) |
|
| 159 |
+ ax.xaxis.grid(linestyle='--', alpha=0.4) |
|
| 160 |
+ plt.title(f"Histogram of sample data less outter {outlier_percentile}%")
|
|
| 161 |
+ plt.figtext(0.14, 0.82, f'median = {median:,.2f}\nmean = {mean:,.2f} sdev={std_dev:,.2f}')
|
|
| 162 |
+ plt.xlabel("Pizzas")
|
|
| 163 |
+ plt.ylabel("Count of samples")
|
|
| 164 |
+ pngname = os.path.splitext(filename)[0] + '.png' |
|
| 165 |
+ plt.savefig(os.path.join(localdir, pngname)) |
|
| 166 |
+ print('Attemtping to display %s' % (os.path.join(localdir, pngname)))
|
|
| 167 |
+ webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, pngname)))
|
|
| 168 |
+ elif renderer == 'google': |
|
| 169 |
+ localdir = os.path.abspath(os.path.dirname(sys.argv[0])) |
|
| 170 |
+ htmlname = os.path.splitext(filename)[0] + '.html' |
|
| 171 |
+ with open(os.path.join(localdir, htmlname), 'w') as f: |
|
| 172 |
+ for line in html.splitlines(): |
|
| 173 |
+ if line.find('REPLACEME_WITH_BITRATES') != -1:
|
|
| 174 |
+ f.write('["Bitrate bucket", "Count of reported rates"],\n')
|
|
| 175 |
+ for i in range(len(x)): |
|
| 176 |
+ f.write(f'["{x[i]:,} Pizzas", {y[i]}],\n')
|
|
| 177 |
+ elif line.find('REPLACE_ME_WITH_OVERVIEW') != -1:
|
|
| 178 |
+ f.write(f'<h3>{filename} less outter {outlier_percentile}%</h3>\n')
|
|
| 179 |
+ f.write(f'median = {median:,.3f} Pizzas<br />\nmean = {mean:,.3f} σ={std_dev:,.3f} Pizzas<br />\n')
|
|
| 180 |
+ elif line.find('NUMBER_ROWS') != -1:
|
|
| 181 |
+ f.write(line.replace('NUMBER_ROWS', str(len(x))) + '\n')
|
|
| 182 |
+ elif line.find('SET_SHOWTEXTEVERY') != -1:
|
|
| 183 |
+ f.write(line.replace('SET_SHOWTEXTEVERY', str(len(x) // 6)) + '\n');
|
|
| 184 |
+ else: |
|
| 185 |
+ f.write(line + '\n') |
|
| 186 |
+ print('Attemtping to display %s' % (os.path.join(localdir, htmlname)))
|
|
| 187 |
+ webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, htmlname)))
|
|
| 188 |
+ if renderer != 'none': |
|
| 189 |
+ if platform.system() == 'Darwin': |
|
| 190 |
+ gnuplot_path = '/usr/local/bin/gnuplot' |
|
| 191 |
+ else: |
|
| 192 |
+ gnuplot_path = '/usr/bin/gnuplot' |
|
| 193 |
+ with subprocess.Popen([gnuplot_path], stdin=subprocess.PIPE, encoding='utf8') as gnuplot: |
|
| 194 |
+ gnuplot.stdin.write("set term dumb `tput cols` `tput lines`*2/3\n")
|
|
| 195 |
+ gnuplot.stdin.write(f'set label "median = {median:,.2f}\\nmean = {mean:,.2f} sdev={std_dev:,.2f}" at graph 0.03, 0.9\n')
|
|
| 196 |
+ gnuplot.stdin.write("plot '-' using 1:2 title 'Pizzas' with linespoints \n")
|
|
| 197 |
+ for i, j in zip(x, y): |
|
| 198 |
+ gnuplot.stdin.write("%f %f\n" % (i, j))
|
|
| 199 |
+ gnuplot.stdin.write("e\n")
|
|
| 200 |
+ gnuplot.stdin.flush() |
|
| 201 |
+ print(f'Processed {len(middle_samples)} individual samples.')
|
|
| 202 |
+ print(f'Median = {median:,.2f}\nmean = {mean:,.2f} std dev = {std_dev:,.2f}')
|
|
| 203 |
+ |
|
| 204 |
+if __name__ == '__main__': |
|
| 205 |
+ parser = ArgumentParser(description='Makes histograms from raw samples or prepared buckets.') |
|
| 206 |
+ parser.add_argument('-r', '--renderer', choices=['none', 'gnuplot', 'matplotlib', 'google'],
|
|
| 207 |
+ default='gnuplot', help='Choose a renderer: summary only, text, png, or webpage.') |
|
| 208 |
+ args = parser.parse_args() |
|
| 209 |
+ if args.renderer == "matplotlib": |
|
| 210 |
+ import matplotlib.pyplot as plt |
|
| 211 |
+ import matplotlib.ticker |
|
| 212 |
+ main(args.renderer) |
|
| 0 | 213 |