First commit.
David Blume

David Blume commited on 2019-02-03 19:18:33
Showing 2 changed files, with 282 additions and 0 deletions.

... ...
@@ -0,0 +1,70 @@
1
+# Make Chart
2
+
3
+This is a sample Python 3 script that generates a chart a few different ways.
4
+
5
+### Getting the project
6
+
7
+You can get a copy of this project by clicking on the [ZIP](http://git.dlma.com/make_chart.git/zipball/master)
8
+or [TAR](http://git.dlma.com/make_chart.git/tarball/master) buttons near the top right of the GitList web page.
9
+
10
+You can clone from the origin with:
11
+
12
+    git clone ssh://USERNAME@dlma.com/~/git/make_chart.git
13
+
14
+### What Does it Do?
15
+
16
+You specify one of four renderers, and it analyzes (currently) some hardcoded data..
17
+
18
+* **none**: Summary only, just print the mean, median, and standard deviation.
19
+* **gnuplot**: (default) Print an ASCII art graph.
20
+* **matplotlib**: Create a PNG file suitable for static reports and email.
21
+* **google**: A Google chart webpage with responsive features.
22
+
23
+Here's the help text:
24
+
25
+    usage: make\_chart.py [\-h] [-r {none,gnuplot,matplotlib,google}]
26
+
27
+    Makes histograms from raw samples or prepared buckets.
28
+
29
+    optional arguments:
30
+      -h, --help            show this help message and exit
31
+      -r {none,gnuplot,matplotlib,google}, --renderer {none,gnuplot,matplotlib,google}
32
+                            Choose a renderer: summary only, text, png, or
33
+                            webpage.
34
+
35
+#### Example text rendering
36
+
37
+    40 +-------------------------------------------------------+
38
+       |      +  * * +      +      +      +      +      +      |
39
+       |        *  *                            Pizzas ***A*** |
40
+    35 |-+     *    *                                        +-|
41
+       |      A     *                median = 80.00            |
42
+    30 |-+   *       A***            mean = 81.67 sdev=38.09 +-|
43
+       |     *           A**                                   |
44
+       |    *               A***                               |
45
+    25 |-+ A                    A**                          +-|
46
+       |   *                       A***                        |
47
+    20 |-+*                            A**                   +-|
48
+       |  *                                                    |
49
+       | *                                A***                 |
50
+    15 |-*                                    A**            +-|
51
+       | *                                       A***          |
52
+       |*                                            A**       |
53
+    10 |*+                                              A    +-|
54
+       |                                                 *     |
55
+     5 |-+                                                *  +-|
56
+       |                                                   *   |
57
+       |      +      +      +      +      +      +      +   A  |
58
+     0 +-------------------------------------------------------+
59
+       20     40     60     80    100    120    140    160    180
60
+
61
+### Is it any good?
62
+
63
+[Yes](https://news.ycombinator.com/item?id=3067434).
64
+
65
+### To Do
66
+
67
+* Make it more general.
68
+* Allow data to be specified different ways.
69
+* Support other chart types.
70
+
... ...
@@ -0,0 +1,212 @@
1
+#!/usr/bin/env python3
2
+import os
3
+import sys
4
+from argparse import ArgumentParser
5
+import math
6
+import operator
7
+import subprocess
8
+import platform
9
+import webbrowser
10
+
11
+
12
+html = """<html>
13
+  <head>
14
+    <style>
15
+      h1, h3 { text-align: center; }
16
+    </style>
17
+    <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
18
+    <script>
19
+  google.charts.load('current', {'packages':['corechart']});
20
+  google.charts.setOnLoadCallback(chartLoaded);
21
+  function chartLoaded() {
22
+    let data = google.visualization.arrayToDataTable([
23
+        REPLACEME_WITH_BITRATES
24
+    ]);
25
+
26
+    let options = {
27
+      title: 'Bitrates (excluding top and bottom 2%)',
28
+      width: window.innerWidth,
29
+      height: window.innerHeight - 120,
30
+      chartArea: { top: '100', left: '80', width: '86%' },
31
+      hAxis: {
32
+        showTextEvery: SET_SHOWTEXTEVERY
33
+      },
34
+      vAxis: { }
35
+    };
36
+    let chart = new google.visualization.AreaChart(document.getElementById('chart_div'));
37
+    chart.draw(data, options);
38
+  }
39
+    </script>
40
+  </head>
41
+
42
+  <body>
43
+    <h1>Bitrates</h1>
44
+    REPLACE_ME_WITH_OVERVIEW
45
+    <div id="chart_div"></div>
46
+  </body>
47
+</html>"""
48
+
49
+
50
+def calculate_median_mean_stddev_from_samples(values):
51
+    """ returns the median, mean, and standard deviation of values """
52
+    # Calculate the median
53
+    values.sort()
54
+    count = len(values)
55
+    median = 0.0
56
+    if count % 2:
57
+        median = float(values[count//2])
58
+    elif count > 0:
59
+        median = (values[count // 2 - 1] + values[count // 2]) / 2.0
60
+
61
+    # Calculate the mean and standard deviation
62
+    if count > 0:
63
+        mean = sum(values) / len(values)
64
+        squares_of_diffs = map(lambda x: pow(x - mean, 2), values)
65
+        mean_of_squares = sum(squares_of_diffs) / len(values)
66
+    else:
67
+        mean = 0
68
+        mean_of_squares = 0
69
+    std_dev = math.sqrt(mean_of_squares)
70
+    return median, mean, std_dev
71
+
72
+
73
+def calculate_median_mean_stddev(x, y):
74
+    """ returns the median, mean, and standard deviation given
75
+    an array of values, x, and an array of counts of those values, y. """
76
+    # Median: Walk the sample counts (y) halfway to the sum of sample counts.
77
+    median_pos = sum(y) // 2
78
+    cur_pos = 0
79
+    median = None
80
+    mean = None
81
+    for i in range(len(y)):
82
+        cur_pos += y[i]
83
+        if cur_pos >= median_pos:
84
+            median = x[i]
85
+            break
86
+    # mean = sum(map(lambda x: x[0] * x[1], zip(x,y))) / sum(y)
87
+    mean = sum(map(operator.mul, x, y)) / sum(y)
88
+    squares_of_diffs = map(lambda x, y: pow(x - mean, 2) * y, x, y)
89
+    mean_of_squares = sum(squares_of_diffs) / sum(y)
90
+    std_dev = math.sqrt(mean_of_squares)
91
+    return median, mean, std_dev
92
+
93
+
94
+def remove_outliers_by_idx(x, y, outlier_count, idx):
95
+    """Removes outlier_count samples from idx side of the buckets."""
96
+    cur_count = 0
97
+    while cur_count + y[idx] < outlier_count:
98
+       cur_count += y[idx]
99
+       del x[idx]
100
+       del y[idx]
101
+    if cur_count < outlier_count:
102
+        y[idx] = y[idx] - (outlier_count - cur_count)
103
+
104
+
105
+def remove_outliers(x, y, outlier_percentile):
106
+    """Removes outlier_percentile samples from the beginning and end
107
+    of the sample set."""
108
+    outlier_count = sum(y) * outlier_percentile // 100
109
+    remove_outliers_by_idx(x, y, outlier_count, 0)
110
+    remove_outliers_by_idx(x, y, outlier_count, -1)
111
+
112
+
113
+def acquire_data(buckets):
114
+    x = [i*10 for i in range(1,buckets+1)]
115
+    y = []
116
+    for i in range(0, buckets // 4):
117
+        y.append((i+1) * 8)
118
+    for i in range(buckets // 4, buckets):
119
+        y.append((((buckets // 4) + 0) * 8) - (i * 2))
120
+    # print(f'x (value) = {x}, len={len(x)}')
121
+    # print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')
122
+    samples = []
123
+    for i in range(len(x)):
124
+        samples += [x[i]] * y[i]
125
+    return samples, x, y
126
+
127
+
128
+def main(renderer):
129
+    buckets = 20
130
+    samples, x, y = acquire_data(buckets)
131
+
132
+    outlier_percentile = 5
133
+    outlier_count = sum(y) * outlier_percentile // 100
134
+    middle_samples = samples[outlier_count:-outlier_count]
135
+    if __debug__:
136
+        print(f'len(samples) = {len(samples)}, outlier_count={outlier_count}, len(middle_sample) = {len(middle_samples)}')
137
+    remove_outliers(x, y, outlier_percentile)
138
+    if __debug__:
139
+        print(f'x (value) = {x}, len={len(x)}')
140
+        print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')
141
+
142
+    median, mean, std_dev = calculate_median_mean_stddev(x, y)
143
+    s_median, s_mean, s_std_dev = calculate_median_mean_stddev_from_samples(middle_samples)
144
+    assert(math.isclose(median, s_median))
145
+    assert(math.isclose(mean, s_mean))
146
+    assert(math.isclose(std_dev, s_std_dev))
147
+
148
+    localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
149
+    filename = os.path.basename(sys.argv[0])
150
+    if renderer == 'matplotlib':
151
+        plt.figure(figsize=(10,8))
152
+        plt.plot(x, y)
153
+        ax = plt.gca()
154
+        ax.get_xaxis().set_major_formatter(
155
+            matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
156
+        ax.get_yaxis().set_major_formatter(
157
+            matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
158
+        ax.yaxis.grid(linestyle='--', alpha=0.4)
159
+        ax.xaxis.grid(linestyle='--', alpha=0.4)
160
+        plt.title(f"Histogram of sample data less outter {outlier_percentile}%")
161
+        plt.figtext(0.14, 0.82, f'median = {median:,.2f}\nmean = {mean:,.2f} sdev={std_dev:,.2f}')
162
+        plt.xlabel("Pizzas")
163
+        plt.ylabel("Count of samples")
164
+        pngname = os.path.splitext(filename)[0] + '.png'
165
+        plt.savefig(os.path.join(localdir, pngname))
166
+        print('Attemtping to display %s' % (os.path.join(localdir, pngname)))
167
+        webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, pngname)))
168
+    elif renderer == 'google':
169
+        localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
170
+        htmlname = os.path.splitext(filename)[0] + '.html'
171
+        with open(os.path.join(localdir, htmlname), 'w') as f:
172
+            for line in html.splitlines():
173
+                if line.find('REPLACEME_WITH_BITRATES') != -1:
174
+                    f.write('["Bitrate bucket", "Count of reported rates"],\n')
175
+                    for i in range(len(x)):
176
+                       f.write(f'["{x[i]:,} Pizzas", {y[i]}],\n')
177
+                elif line.find('REPLACE_ME_WITH_OVERVIEW') != -1:
178
+                    f.write(f'<h3>{filename} less outter {outlier_percentile}%</h3>\n')
179
+                    f.write(f'median = {median:,.3f} Pizzas<br />\nmean = {mean:,.3f} &sigma;={std_dev:,.3f} Pizzas<br />\n')
180
+                elif line.find('NUMBER_ROWS') != -1:
181
+                    f.write(line.replace('NUMBER_ROWS', str(len(x))) + '\n')
182
+                elif line.find('SET_SHOWTEXTEVERY') != -1:
183
+                    f.write(line.replace('SET_SHOWTEXTEVERY', str(len(x) // 6)) + '\n');
184
+                else:
185
+                    f.write(line + '\n')
186
+        print('Attemtping to display %s' % (os.path.join(localdir, htmlname)))
187
+        webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, htmlname)))
188
+    if renderer != 'none':
189
+        if platform.system() == 'Darwin':
190
+            gnuplot_path = '/usr/local/bin/gnuplot'
191
+        else:
192
+            gnuplot_path = '/usr/bin/gnuplot'
193
+        with subprocess.Popen([gnuplot_path], stdin=subprocess.PIPE, encoding='utf8') as gnuplot:
194
+            gnuplot.stdin.write("set term dumb `tput cols` `tput lines`*2/3\n")
195
+            gnuplot.stdin.write(f'set label "median = {median:,.2f}\\nmean = {mean:,.2f} sdev={std_dev:,.2f}" at graph 0.03, 0.9\n')
196
+            gnuplot.stdin.write("plot '-' using 1:2 title 'Pizzas' with linespoints \n")
197
+            for i, j in zip(x, y):
198
+               gnuplot.stdin.write("%f %f\n" % (i, j))
199
+            gnuplot.stdin.write("e\n")
200
+            gnuplot.stdin.flush()
201
+    print(f'Processed {len(middle_samples)} individual samples.')
202
+    print(f'Median = {median:,.2f}\nmean = {mean:,.2f} std dev = {std_dev:,.2f}')
203
+
204
+if __name__ == '__main__':
205
+    parser = ArgumentParser(description='Makes histograms from raw samples or prepared buckets.')
206
+    parser.add_argument('-r', '--renderer', choices=['none', 'gnuplot', 'matplotlib', 'google'],
207
+                        default='gnuplot', help='Choose a renderer: summary only, text, png, or webpage.')
208
+    args = parser.parse_args()
209
+    if args.renderer == "matplotlib":
210
+        import matplotlib.pyplot as plt
211
+        import matplotlib.ticker
212
+    main(args.renderer)
0 213