David Blume commited on 2019-02-03 19:18:33
Showing 2 changed files, with 282 additions and 0 deletions.
... | ... |
@@ -0,0 +1,70 @@ |
1 |
+# Make Chart |
|
2 |
+ |
|
3 |
+This is a sample Python 3 script that generates a chart a few different ways. |
|
4 |
+ |
|
5 |
+### Getting the project |
|
6 |
+ |
|
7 |
+You can get a copy of this project by clicking on the [ZIP](http://git.dlma.com/make_chart.git/zipball/master) |
|
8 |
+or [TAR](http://git.dlma.com/make_chart.git/tarball/master) buttons near the top right of the GitList web page. |
|
9 |
+ |
|
10 |
+You can clone from the origin with: |
|
11 |
+ |
|
12 |
+ git clone ssh://USERNAME@dlma.com/~/git/make_chart.git |
|
13 |
+ |
|
14 |
+### What Does it Do? |
|
15 |
+ |
|
16 |
+You specify one of four renderers, and it analyzes (currently) some hardcoded data.. |
|
17 |
+ |
|
18 |
+* **none**: Summary only, just print the mean, median, and standard deviation. |
|
19 |
+* **gnuplot**: (default) Print an ASCII art graph. |
|
20 |
+* **matplotlib**: Create a PNG file suitable for static reports and email. |
|
21 |
+* **google**: A Google chart webpage with responsive features. |
|
22 |
+ |
|
23 |
+Here's the help text: |
|
24 |
+ |
|
25 |
+ usage: make\_chart.py [\-h] [-r {none,gnuplot,matplotlib,google}] |
|
26 |
+ |
|
27 |
+ Makes histograms from raw samples or prepared buckets. |
|
28 |
+ |
|
29 |
+ optional arguments: |
|
30 |
+ -h, --help show this help message and exit |
|
31 |
+ -r {none,gnuplot,matplotlib,google}, --renderer {none,gnuplot,matplotlib,google} |
|
32 |
+ Choose a renderer: summary only, text, png, or |
|
33 |
+ webpage. |
|
34 |
+ |
|
35 |
+#### Example text rendering |
|
36 |
+ |
|
37 |
+ 40 +-------------------------------------------------------+ |
|
38 |
+ | + * * + + + + + + | |
|
39 |
+ | * * Pizzas ***A*** | |
|
40 |
+ 35 |-+ * * +-| |
|
41 |
+ | A * median = 80.00 | |
|
42 |
+ 30 |-+ * A*** mean = 81.67 sdev=38.09 +-| |
|
43 |
+ | * A** | |
|
44 |
+ | * A*** | |
|
45 |
+ 25 |-+ A A** +-| |
|
46 |
+ | * A*** | |
|
47 |
+ 20 |-+* A** +-| |
|
48 |
+ | * | |
|
49 |
+ | * A*** | |
|
50 |
+ 15 |-* A** +-| |
|
51 |
+ | * A*** | |
|
52 |
+ |* A** | |
|
53 |
+ 10 |*+ A +-| |
|
54 |
+ | * | |
|
55 |
+ 5 |-+ * +-| |
|
56 |
+ | * | |
|
57 |
+ | + + + + + + + A | |
|
58 |
+ 0 +-------------------------------------------------------+ |
|
59 |
+ 20 40 60 80 100 120 140 160 180 |
|
60 |
+ |
|
61 |
+### Is it any good? |
|
62 |
+ |
|
63 |
+[Yes](https://news.ycombinator.com/item?id=3067434). |
|
64 |
+ |
|
65 |
+### To Do |
|
66 |
+ |
|
67 |
+* Make it more general. |
|
68 |
+* Allow data to be specified different ways. |
|
69 |
+* Support other chart types. |
|
70 |
+ |
... | ... |
@@ -0,0 +1,212 @@ |
1 |
+#!/usr/bin/env python3 |
|
2 |
+import os |
|
3 |
+import sys |
|
4 |
+from argparse import ArgumentParser |
|
5 |
+import math |
|
6 |
+import operator |
|
7 |
+import subprocess |
|
8 |
+import platform |
|
9 |
+import webbrowser |
|
10 |
+ |
|
11 |
+ |
|
12 |
+html = """<html> |
|
13 |
+ <head> |
|
14 |
+ <style> |
|
15 |
+ h1, h3 { text-align: center; } |
|
16 |
+ </style> |
|
17 |
+ <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> |
|
18 |
+ <script> |
|
19 |
+ google.charts.load('current', {'packages':['corechart']}); |
|
20 |
+ google.charts.setOnLoadCallback(chartLoaded); |
|
21 |
+ function chartLoaded() { |
|
22 |
+ let data = google.visualization.arrayToDataTable([ |
|
23 |
+ REPLACEME_WITH_BITRATES |
|
24 |
+ ]); |
|
25 |
+ |
|
26 |
+ let options = { |
|
27 |
+ title: 'Bitrates (excluding top and bottom 2%)', |
|
28 |
+ width: window.innerWidth, |
|
29 |
+ height: window.innerHeight - 120, |
|
30 |
+ chartArea: { top: '100', left: '80', width: '86%' }, |
|
31 |
+ hAxis: { |
|
32 |
+ showTextEvery: SET_SHOWTEXTEVERY |
|
33 |
+ }, |
|
34 |
+ vAxis: { } |
|
35 |
+ }; |
|
36 |
+ let chart = new google.visualization.AreaChart(document.getElementById('chart_div')); |
|
37 |
+ chart.draw(data, options); |
|
38 |
+ } |
|
39 |
+ </script> |
|
40 |
+ </head> |
|
41 |
+ |
|
42 |
+ <body> |
|
43 |
+ <h1>Bitrates</h1> |
|
44 |
+ REPLACE_ME_WITH_OVERVIEW |
|
45 |
+ <div id="chart_div"></div> |
|
46 |
+ </body> |
|
47 |
+</html>""" |
|
48 |
+ |
|
49 |
+ |
|
50 |
+def calculate_median_mean_stddev_from_samples(values): |
|
51 |
+ """ returns the median, mean, and standard deviation of values """ |
|
52 |
+ # Calculate the median |
|
53 |
+ values.sort() |
|
54 |
+ count = len(values) |
|
55 |
+ median = 0.0 |
|
56 |
+ if count % 2: |
|
57 |
+ median = float(values[count//2]) |
|
58 |
+ elif count > 0: |
|
59 |
+ median = (values[count // 2 - 1] + values[count // 2]) / 2.0 |
|
60 |
+ |
|
61 |
+ # Calculate the mean and standard deviation |
|
62 |
+ if count > 0: |
|
63 |
+ mean = sum(values) / len(values) |
|
64 |
+ squares_of_diffs = map(lambda x: pow(x - mean, 2), values) |
|
65 |
+ mean_of_squares = sum(squares_of_diffs) / len(values) |
|
66 |
+ else: |
|
67 |
+ mean = 0 |
|
68 |
+ mean_of_squares = 0 |
|
69 |
+ std_dev = math.sqrt(mean_of_squares) |
|
70 |
+ return median, mean, std_dev |
|
71 |
+ |
|
72 |
+ |
|
73 |
+def calculate_median_mean_stddev(x, y): |
|
74 |
+ """ returns the median, mean, and standard deviation given |
|
75 |
+ an array of values, x, and an array of counts of those values, y. """ |
|
76 |
+ # Median: Walk the sample counts (y) halfway to the sum of sample counts. |
|
77 |
+ median_pos = sum(y) // 2 |
|
78 |
+ cur_pos = 0 |
|
79 |
+ median = None |
|
80 |
+ mean = None |
|
81 |
+ for i in range(len(y)): |
|
82 |
+ cur_pos += y[i] |
|
83 |
+ if cur_pos >= median_pos: |
|
84 |
+ median = x[i] |
|
85 |
+ break |
|
86 |
+ # mean = sum(map(lambda x: x[0] * x[1], zip(x,y))) / sum(y) |
|
87 |
+ mean = sum(map(operator.mul, x, y)) / sum(y) |
|
88 |
+ squares_of_diffs = map(lambda x, y: pow(x - mean, 2) * y, x, y) |
|
89 |
+ mean_of_squares = sum(squares_of_diffs) / sum(y) |
|
90 |
+ std_dev = math.sqrt(mean_of_squares) |
|
91 |
+ return median, mean, std_dev |
|
92 |
+ |
|
93 |
+ |
|
94 |
+def remove_outliers_by_idx(x, y, outlier_count, idx): |
|
95 |
+ """Removes outlier_count samples from idx side of the buckets.""" |
|
96 |
+ cur_count = 0 |
|
97 |
+ while cur_count + y[idx] < outlier_count: |
|
98 |
+ cur_count += y[idx] |
|
99 |
+ del x[idx] |
|
100 |
+ del y[idx] |
|
101 |
+ if cur_count < outlier_count: |
|
102 |
+ y[idx] = y[idx] - (outlier_count - cur_count) |
|
103 |
+ |
|
104 |
+ |
|
105 |
+def remove_outliers(x, y, outlier_percentile): |
|
106 |
+ """Removes outlier_percentile samples from the beginning and end |
|
107 |
+ of the sample set.""" |
|
108 |
+ outlier_count = sum(y) * outlier_percentile // 100 |
|
109 |
+ remove_outliers_by_idx(x, y, outlier_count, 0) |
|
110 |
+ remove_outliers_by_idx(x, y, outlier_count, -1) |
|
111 |
+ |
|
112 |
+ |
|
113 |
+def acquire_data(buckets): |
|
114 |
+ x = [i*10 for i in range(1,buckets+1)] |
|
115 |
+ y = [] |
|
116 |
+ for i in range(0, buckets // 4): |
|
117 |
+ y.append((i+1) * 8) |
|
118 |
+ for i in range(buckets // 4, buckets): |
|
119 |
+ y.append((((buckets // 4) + 0) * 8) - (i * 2)) |
|
120 |
+ # print(f'x (value) = {x}, len={len(x)}') |
|
121 |
+ # print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}') |
|
122 |
+ samples = [] |
|
123 |
+ for i in range(len(x)): |
|
124 |
+ samples += [x[i]] * y[i] |
|
125 |
+ return samples, x, y |
|
126 |
+ |
|
127 |
+ |
|
128 |
+def main(renderer): |
|
129 |
+ buckets = 20 |
|
130 |
+ samples, x, y = acquire_data(buckets) |
|
131 |
+ |
|
132 |
+ outlier_percentile = 5 |
|
133 |
+ outlier_count = sum(y) * outlier_percentile // 100 |
|
134 |
+ middle_samples = samples[outlier_count:-outlier_count] |
|
135 |
+ if __debug__: |
|
136 |
+ print(f'len(samples) = {len(samples)}, outlier_count={outlier_count}, len(middle_sample) = {len(middle_samples)}') |
|
137 |
+ remove_outliers(x, y, outlier_percentile) |
|
138 |
+ if __debug__: |
|
139 |
+ print(f'x (value) = {x}, len={len(x)}') |
|
140 |
+ print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}') |
|
141 |
+ |
|
142 |
+ median, mean, std_dev = calculate_median_mean_stddev(x, y) |
|
143 |
+ s_median, s_mean, s_std_dev = calculate_median_mean_stddev_from_samples(middle_samples) |
|
144 |
+ assert(math.isclose(median, s_median)) |
|
145 |
+ assert(math.isclose(mean, s_mean)) |
|
146 |
+ assert(math.isclose(std_dev, s_std_dev)) |
|
147 |
+ |
|
148 |
+ localdir = os.path.abspath(os.path.dirname(sys.argv[0])) |
|
149 |
+ filename = os.path.basename(sys.argv[0]) |
|
150 |
+ if renderer == 'matplotlib': |
|
151 |
+ plt.figure(figsize=(10,8)) |
|
152 |
+ plt.plot(x, y) |
|
153 |
+ ax = plt.gca() |
|
154 |
+ ax.get_xaxis().set_major_formatter( |
|
155 |
+ matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) |
|
156 |
+ ax.get_yaxis().set_major_formatter( |
|
157 |
+ matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) |
|
158 |
+ ax.yaxis.grid(linestyle='--', alpha=0.4) |
|
159 |
+ ax.xaxis.grid(linestyle='--', alpha=0.4) |
|
160 |
+ plt.title(f"Histogram of sample data less outter {outlier_percentile}%") |
|
161 |
+ plt.figtext(0.14, 0.82, f'median = {median:,.2f}\nmean = {mean:,.2f} sdev={std_dev:,.2f}') |
|
162 |
+ plt.xlabel("Pizzas") |
|
163 |
+ plt.ylabel("Count of samples") |
|
164 |
+ pngname = os.path.splitext(filename)[0] + '.png' |
|
165 |
+ plt.savefig(os.path.join(localdir, pngname)) |
|
166 |
+ print('Attemtping to display %s' % (os.path.join(localdir, pngname))) |
|
167 |
+ webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, pngname))) |
|
168 |
+ elif renderer == 'google': |
|
169 |
+ localdir = os.path.abspath(os.path.dirname(sys.argv[0])) |
|
170 |
+ htmlname = os.path.splitext(filename)[0] + '.html' |
|
171 |
+ with open(os.path.join(localdir, htmlname), 'w') as f: |
|
172 |
+ for line in html.splitlines(): |
|
173 |
+ if line.find('REPLACEME_WITH_BITRATES') != -1: |
|
174 |
+ f.write('["Bitrate bucket", "Count of reported rates"],\n') |
|
175 |
+ for i in range(len(x)): |
|
176 |
+ f.write(f'["{x[i]:,} Pizzas", {y[i]}],\n') |
|
177 |
+ elif line.find('REPLACE_ME_WITH_OVERVIEW') != -1: |
|
178 |
+ f.write(f'<h3>{filename} less outter {outlier_percentile}%</h3>\n') |
|
179 |
+ f.write(f'median = {median:,.3f} Pizzas<br />\nmean = {mean:,.3f} σ={std_dev:,.3f} Pizzas<br />\n') |
|
180 |
+ elif line.find('NUMBER_ROWS') != -1: |
|
181 |
+ f.write(line.replace('NUMBER_ROWS', str(len(x))) + '\n') |
|
182 |
+ elif line.find('SET_SHOWTEXTEVERY') != -1: |
|
183 |
+ f.write(line.replace('SET_SHOWTEXTEVERY', str(len(x) // 6)) + '\n'); |
|
184 |
+ else: |
|
185 |
+ f.write(line + '\n') |
|
186 |
+ print('Attemtping to display %s' % (os.path.join(localdir, htmlname))) |
|
187 |
+ webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, htmlname))) |
|
188 |
+ if renderer != 'none': |
|
189 |
+ if platform.system() == 'Darwin': |
|
190 |
+ gnuplot_path = '/usr/local/bin/gnuplot' |
|
191 |
+ else: |
|
192 |
+ gnuplot_path = '/usr/bin/gnuplot' |
|
193 |
+ with subprocess.Popen([gnuplot_path], stdin=subprocess.PIPE, encoding='utf8') as gnuplot: |
|
194 |
+ gnuplot.stdin.write("set term dumb `tput cols` `tput lines`*2/3\n") |
|
195 |
+ gnuplot.stdin.write(f'set label "median = {median:,.2f}\\nmean = {mean:,.2f} sdev={std_dev:,.2f}" at graph 0.03, 0.9\n') |
|
196 |
+ gnuplot.stdin.write("plot '-' using 1:2 title 'Pizzas' with linespoints \n") |
|
197 |
+ for i, j in zip(x, y): |
|
198 |
+ gnuplot.stdin.write("%f %f\n" % (i, j)) |
|
199 |
+ gnuplot.stdin.write("e\n") |
|
200 |
+ gnuplot.stdin.flush() |
|
201 |
+ print(f'Processed {len(middle_samples)} individual samples.') |
|
202 |
+ print(f'Median = {median:,.2f}\nmean = {mean:,.2f} std dev = {std_dev:,.2f}') |
|
203 |
+ |
|
204 |
+if __name__ == '__main__': |
|
205 |
+ parser = ArgumentParser(description='Makes histograms from raw samples or prepared buckets.') |
|
206 |
+ parser.add_argument('-r', '--renderer', choices=['none', 'gnuplot', 'matplotlib', 'google'], |
|
207 |
+ default='gnuplot', help='Choose a renderer: summary only, text, png, or webpage.') |
|
208 |
+ args = parser.parse_args() |
|
209 |
+ if args.renderer == "matplotlib": |
|
210 |
+ import matplotlib.pyplot as plt |
|
211 |
+ import matplotlib.ticker |
|
212 |
+ main(args.renderer) |
|
0 | 213 |