927efbff9956d1de796c65764104c60cfd8660a0
David Blume First commit.

David Blume authored 5 years ago

1) #!/usr/bin/env python3
2) import os
3) import sys
4) from argparse import ArgumentParser
5) import math
6) import operator
7) import subprocess
8) import platform
9) import webbrowser
David Blume Add type hints.

David Blume authored 3 years ago

10) from typing import List, Tuple
David Blume First commit.

David Blume authored 5 years ago

11) 
12) 
13) html = """<html>
14)   <head>
15)     <style>
16)       h1, h3 { text-align: center; }
17)     </style>
18)     <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
19)     <script>
20)   google.charts.load('current', {'packages':['corechart']});
21)   google.charts.setOnLoadCallback(chartLoaded);
22)   function chartLoaded() {
23)     let data = google.visualization.arrayToDataTable([
24)         REPLACEME_WITH_BITRATES
25)     ]);
26) 
27)     let options = {
28)       title: 'Bitrates (excluding top and bottom 2%)',
29)       width: window.innerWidth,
30)       height: window.innerHeight - 120,
31)       chartArea: { top: '100', left: '80', width: '86%' },
32)       hAxis: {
33)         showTextEvery: SET_SHOWTEXTEVERY
34)       },
35)       vAxis: { }
36)     };
37)     let chart = new google.visualization.AreaChart(document.getElementById('chart_div'));
38)     chart.draw(data, options);
39)   }
40)     </script>
41)   </head>
42) 
43)   <body>
44)     <h1>Bitrates</h1>
45)     REPLACE_ME_WITH_OVERVIEW
46)     <div id="chart_div"></div>
47)   </body>
48) </html>"""
49) 
50) 
David Blume Add type hints.

David Blume authored 3 years ago

51) def calculate_median_mean_stddev_from_samples(values: List[int]) -> Tuple[float, float, float]:
David Blume First commit.

David Blume authored 5 years ago

52)     """ returns the median, mean, and standard deviation of values """
53)     # Calculate the median
54)     values.sort()
55)     count = len(values)
56)     median = 0.0
57)     if count % 2:
58)         median = float(values[count//2])
59)     elif count > 0:
60)         median = (values[count // 2 - 1] + values[count // 2]) / 2.0
61) 
62)     # Calculate the mean and standard deviation
63)     if count > 0:
64)         mean = sum(values) / len(values)
65)         squares_of_diffs = map(lambda x: pow(x - mean, 2), values)
66)         mean_of_squares = sum(squares_of_diffs) / len(values)
67)     else:
68)         mean = 0
69)         mean_of_squares = 0
70)     std_dev = math.sqrt(mean_of_squares)
71)     return median, mean, std_dev
72) 
73) 
David Blume Add type hints.

David Blume authored 3 years ago

74) def calculate_median_mean_stddev(x: List[float], y: List[int]) -> Tuple[float, float, float]:
David Blume First commit.

David Blume authored 5 years ago

75)     """ returns the median, mean, and standard deviation given
76)     an array of values, x, and an array of counts of those values, y. """
77)     # Median: Walk the sample counts (y) halfway to the sum of sample counts.
78)     median_pos = sum(y) // 2
79)     cur_pos = 0
80)     median = None
81)     mean = None
82)     for i in range(len(y)):
83)         cur_pos += y[i]
84)         if cur_pos >= median_pos:
85)             median = x[i]
86)             break
87)     # mean = sum(map(lambda x: x[0] * x[1], zip(x,y))) / sum(y)
88)     mean = sum(map(operator.mul, x, y)) / sum(y)
89)     squares_of_diffs = map(lambda x, y: pow(x - mean, 2) * y, x, y)
90)     mean_of_squares = sum(squares_of_diffs) / sum(y)
91)     std_dev = math.sqrt(mean_of_squares)
92)     return median, mean, std_dev
93) 
94) 
David Blume Add type hints.

David Blume authored 3 years ago

95) def remove_outliers_by_idx(x: List[float], y: List[int], outlier_count: int, idx: int) -> None:
David Blume First commit.

David Blume authored 5 years ago

96)     """Removes outlier_count samples from idx side of the buckets."""
97)     cur_count = 0
98)     while cur_count + y[idx] < outlier_count:
99)        cur_count += y[idx]
100)        del x[idx]
101)        del y[idx]
102)     if cur_count < outlier_count:
103)         y[idx] = y[idx] - (outlier_count - cur_count)
104) 
105) 
David Blume Add type hints.

David Blume authored 3 years ago

106) def remove_outliers(x: List[float], y: List[int], outlier_percentile: int) -> None:
David Blume First commit.

David Blume authored 5 years ago

107)     """Removes outlier_percentile samples from the beginning and end
108)     of the sample set."""
109)     outlier_count = sum(y) * outlier_percentile // 100
110)     remove_outliers_by_idx(x, y, outlier_count, 0)
111)     remove_outliers_by_idx(x, y, outlier_count, -1)
112) 
113) 
David Blume Add type hints.

David Blume authored 3 years ago

114) def acquire_data(buckets: int) -> Tuple[List[float], List[float], List[int]]:
115)     """Manufactures some fake data."""
David Blume First commit.

David Blume authored 5 years ago

116)     x = [i*10 for i in range(1,buckets+1)]
117)     y = []
118)     for i in range(0, buckets // 4):
119)         y.append((i+1) * 8)
120)     for i in range(buckets // 4, buckets):
121)         y.append((((buckets // 4) + 0) * 8) - (i * 2))
122)     # print(f'x (value) = {x}, len={len(x)}')
123)     # print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')
124)     samples = []
125)     for i in range(len(x)):
126)         samples += [x[i]] * y[i]
127)     return samples, x, y
128) 
129) 
David Blume Add type hints.

David Blume authored 3 years ago

130) def main(renderer: str) -> None:
David Blume First commit.

David Blume authored 5 years ago

131)     buckets = 20
132)     samples, x, y = acquire_data(buckets)
133) 
134)     outlier_percentile = 5
135)     outlier_count = sum(y) * outlier_percentile // 100
136)     middle_samples = samples[outlier_count:-outlier_count]
137)     if __debug__:
David Blume Wrap a print line.

David Blume authored 5 years ago

138)         print(f'len(samples) = {len(samples)}, outlier_count={outlier_count}, '
139)               f'len(middle_sample) = {len(middle_samples)}')
David Blume First commit.

David Blume authored 5 years ago

140)     remove_outliers(x, y, outlier_percentile)
141)     if __debug__:
142)         print(f'x (value) = {x}, len={len(x)}')
143)         print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')
144) 
145)     median, mean, std_dev = calculate_median_mean_stddev(x, y)
146)     s_median, s_mean, s_std_dev = calculate_median_mean_stddev_from_samples(middle_samples)
147)     assert(math.isclose(median, s_median))
148)     assert(math.isclose(mean, s_mean))
149)     assert(math.isclose(std_dev, s_std_dev))
150) 
151)     localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
152)     filename = os.path.basename(sys.argv[0])
153)     if renderer == 'matplotlib':
154)         plt.figure(figsize=(10,8))
155)         plt.plot(x, y)
156)         ax = plt.gca()
157)         ax.get_xaxis().set_major_formatter(
158)             matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
159)         ax.get_yaxis().set_major_formatter(
160)             matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
161)         ax.yaxis.grid(linestyle='--', alpha=0.4)
162)         ax.xaxis.grid(linestyle='--', alpha=0.4)
163)         plt.title(f"Histogram of sample data less outter {outlier_percentile}%")
David Blume Minor style tweaks.

David Blume authored 5 years ago

164)         plt.figtext(0.14, 0.82, f'median = {median:,.2f}\n'
165)                                 f'mean = {mean:,.2f} sdev={std_dev:,.2f}')
David Blume First commit.

David Blume authored 5 years ago

166)         plt.xlabel("Pizzas")
167)         plt.ylabel("Count of samples")
168)         pngname = os.path.splitext(filename)[0] + '.png'
169)         plt.savefig(os.path.join(localdir, pngname))
170)         print('Attemtping to display %s' % (os.path.join(localdir, pngname)))
171)         webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, pngname)))
172)     elif renderer == 'google':
173)         localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
174)         htmlname = os.path.splitext(filename)[0] + '.html'
175)         with open(os.path.join(localdir, htmlname), 'w') as f:
176)             for line in html.splitlines():
177)                 if line.find('REPLACEME_WITH_BITRATES') != -1:
178)                     f.write('["Bitrate bucket", "Count of reported rates"],\n')
179)                     for i in range(len(x)):
180)                        f.write(f'["{x[i]:,} Pizzas", {y[i]}],\n')
181)                 elif line.find('REPLACE_ME_WITH_OVERVIEW') != -1:
182)                     f.write(f'<h3>{filename} less outter {outlier_percentile}%</h3>\n')
David Blume Minor style tweaks.

David Blume authored 5 years ago

183)                     f.write(f'median = {median:,.3f} Pizzas<br />\n'
184)                             f'mean = {mean:,.3f} &sigma;={std_dev:,.3f} Pizzas<br />\n')
David Blume First commit.

David Blume authored 5 years ago

185)                 elif line.find('NUMBER_ROWS') != -1:
186)                     f.write(line.replace('NUMBER_ROWS', str(len(x))) + '\n')
187)                 elif line.find('SET_SHOWTEXTEVERY') != -1:
188)                     f.write(line.replace('SET_SHOWTEXTEVERY', str(len(x) // 6)) + '\n');
189)                 else:
190)                     f.write(line + '\n')
191)         print('Attemtping to display %s' % (os.path.join(localdir, htmlname)))
192)         webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, htmlname)))
193)     if renderer != 'none':
194)         if platform.system() == 'Darwin':
195)             gnuplot_path = '/usr/local/bin/gnuplot'
196)         else:
197)             gnuplot_path = '/usr/bin/gnuplot'
198)         with subprocess.Popen([gnuplot_path], stdin=subprocess.PIPE, encoding='utf8') as gnuplot:
199)             gnuplot.stdin.write("set term dumb `tput cols` `tput lines`*2/3\n")
David Blume Minor style tweaks.

David Blume authored 5 years ago

200)             gnuplot.stdin.write(f'set label "median = {median:,.2f}\\n'
201)                                 f'mean = {mean:,.2f} sdev={std_dev:,.2f}" at graph 0.03, 0.9\n')
David Blume First commit.

David Blume authored 5 years ago

202)             gnuplot.stdin.write("plot '-' using 1:2 title 'Pizzas' with linespoints \n")
203)             for i, j in zip(x, y):
204)                gnuplot.stdin.write("%f %f\n" % (i, j))
205)             gnuplot.stdin.write("e\n")
206)             gnuplot.stdin.flush()
207)     print(f'Processed {len(middle_samples)} individual samples.')
208)     print(f'Median = {median:,.2f}\nmean = {mean:,.2f} std dev = {std_dev:,.2f}')
209) 
210) if __name__ == '__main__':
211)     parser = ArgumentParser(description='Makes histograms from raw samples or prepared buckets.')
David Blume Minor style tweaks.

David Blume authored 5 years ago

212)     parser.add_argument('-r', '--renderer',
213)                         choices=['none', 'gnuplot', 'matplotlib', 'google'],
214)                         default='gnuplot',
215)                         help='Choose a renderer: summary only, text, png, or webpage.')