David Blume's GitList
Repositories
make_chart.git
Code
Commits
Branches
Tags
Search
Tree:
927efbf
Branches
Tags
main
make_chart.git
make_chart.py
Add type hints.
David Blume
commited
927efbf
at 2020-12-30 22:07:01
make_chart.py
Blame
History
Raw
#!/usr/bin/env python3 import os import sys from argparse import ArgumentParser import math import operator import subprocess import platform import webbrowser from typing import List, Tuple html = """<html> <head> <style> h1, h3 { text-align: center; } </style> <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> <script> google.charts.load('current', {'packages':['corechart']}); google.charts.setOnLoadCallback(chartLoaded); function chartLoaded() { let data = google.visualization.arrayToDataTable([ REPLACEME_WITH_BITRATES ]); let options = { title: 'Bitrates (excluding top and bottom 2%)', width: window.innerWidth, height: window.innerHeight - 120, chartArea: { top: '100', left: '80', width: '86%' }, hAxis: { showTextEvery: SET_SHOWTEXTEVERY }, vAxis: { } }; let chart = new google.visualization.AreaChart(document.getElementById('chart_div')); chart.draw(data, options); } </script> </head> <body> <h1>Bitrates</h1> REPLACE_ME_WITH_OVERVIEW <div id="chart_div"></div> </body> </html>""" def calculate_median_mean_stddev_from_samples(values: List[int]) -> Tuple[float, float, float]: """ returns the median, mean, and standard deviation of values """ # Calculate the median values.sort() count = len(values) median = 0.0 if count % 2: median = float(values[count//2]) elif count > 0: median = (values[count // 2 - 1] + values[count // 2]) / 2.0 # Calculate the mean and standard deviation if count > 0: mean = sum(values) / len(values) squares_of_diffs = map(lambda x: pow(x - mean, 2), values) mean_of_squares = sum(squares_of_diffs) / len(values) else: mean = 0 mean_of_squares = 0 std_dev = math.sqrt(mean_of_squares) return median, mean, std_dev def calculate_median_mean_stddev(x: List[float], y: List[int]) -> Tuple[float, float, float]: """ returns the median, mean, and standard deviation given an array of values, x, and an array of counts of those values, y. """ # Median: Walk the sample counts (y) halfway to the sum of sample counts. median_pos = sum(y) // 2 cur_pos = 0 median = None mean = None for i in range(len(y)): cur_pos += y[i] if cur_pos >= median_pos: median = x[i] break # mean = sum(map(lambda x: x[0] * x[1], zip(x,y))) / sum(y) mean = sum(map(operator.mul, x, y)) / sum(y) squares_of_diffs = map(lambda x, y: pow(x - mean, 2) * y, x, y) mean_of_squares = sum(squares_of_diffs) / sum(y) std_dev = math.sqrt(mean_of_squares) return median, mean, std_dev def remove_outliers_by_idx(x: List[float], y: List[int], outlier_count: int, idx: int) -> None: """Removes outlier_count samples from idx side of the buckets.""" cur_count = 0 while cur_count + y[idx] < outlier_count: cur_count += y[idx] del x[idx] del y[idx] if cur_count < outlier_count: y[idx] = y[idx] - (outlier_count - cur_count) def remove_outliers(x: List[float], y: List[int], outlier_percentile: int) -> None: """Removes outlier_percentile samples from the beginning and end of the sample set.""" outlier_count = sum(y) * outlier_percentile // 100 remove_outliers_by_idx(x, y, outlier_count, 0) remove_outliers_by_idx(x, y, outlier_count, -1) def acquire_data(buckets: int) -> Tuple[List[float], List[float], List[int]]: """Manufactures some fake data.""" x = [i*10 for i in range(1,buckets+1)] y = [] for i in range(0, buckets // 4): y.append((i+1) * 8) for i in range(buckets // 4, buckets): y.append((((buckets // 4) + 0) * 8) - (i * 2)) # print(f'x (value) = {x}, len={len(x)}') # print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}') samples = [] for i in range(len(x)): samples += [x[i]] * y[i] return samples, x, y def main(renderer: str) -> None: buckets = 20 samples, x, y = acquire_data(buckets) outlier_percentile = 5 outlier_count = sum(y) * outlier_percentile // 100 middle_samples = samples[outlier_count:-outlier_count] if __debug__: print(f'len(samples) = {len(samples)}, outlier_count={outlier_count}, ' f'len(middle_sample) = {len(middle_samples)}') remove_outliers(x, y, outlier_percentile) if __debug__: print(f'x (value) = {x}, len={len(x)}') print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}') median, mean, std_dev = calculate_median_mean_stddev(x, y) s_median, s_mean, s_std_dev = calculate_median_mean_stddev_from_samples(middle_samples) assert(math.isclose(median, s_median)) assert(math.isclose(mean, s_mean)) assert(math.isclose(std_dev, s_std_dev)) localdir = os.path.abspath(os.path.dirname(sys.argv[0])) filename = os.path.basename(sys.argv[0]) if renderer == 'matplotlib': plt.figure(figsize=(10,8)) plt.plot(x, y) ax = plt.gca() ax.get_xaxis().set_major_formatter( matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) ax.get_yaxis().set_major_formatter( matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) ax.yaxis.grid(linestyle='--', alpha=0.4) ax.xaxis.grid(linestyle='--', alpha=0.4) plt.title(f"Histogram of sample data less outter {outlier_percentile}%") plt.figtext(0.14, 0.82, f'median = {median:,.2f}\n' f'mean = {mean:,.2f} sdev={std_dev:,.2f}') plt.xlabel("Pizzas") plt.ylabel("Count of samples") pngname = os.path.splitext(filename)[0] + '.png' plt.savefig(os.path.join(localdir, pngname)) print('Attemtping to display %s' % (os.path.join(localdir, pngname))) webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, pngname))) elif renderer == 'google': localdir = os.path.abspath(os.path.dirname(sys.argv[0])) htmlname = os.path.splitext(filename)[0] + '.html' with open(os.path.join(localdir, htmlname), 'w') as f: for line in html.splitlines(): if line.find('REPLACEME_WITH_BITRATES') != -1: f.write('["Bitrate bucket", "Count of reported rates"],\n') for i in range(len(x)): f.write(f'["{x[i]:,} Pizzas", {y[i]}],\n') elif line.find('REPLACE_ME_WITH_OVERVIEW') != -1: f.write(f'<h3>{filename} less outter {outlier_percentile}%</h3>\n') f.write(f'median = {median:,.3f} Pizzas<br />\n' f'mean = {mean:,.3f} σ={std_dev:,.3f} Pizzas<br />\n') elif line.find('NUMBER_ROWS') != -1: f.write(line.replace('NUMBER_ROWS', str(len(x))) + '\n') elif line.find('SET_SHOWTEXTEVERY') != -1: f.write(line.replace('SET_SHOWTEXTEVERY', str(len(x) // 6)) + '\n'); else: f.write(line + '\n') print('Attemtping to display %s' % (os.path.join(localdir, htmlname))) webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, htmlname))) if renderer != 'none': if platform.system() == 'Darwin': gnuplot_path = '/usr/local/bin/gnuplot' else: gnuplot_path = '/usr/bin/gnuplot' with subprocess.Popen([gnuplot_path], stdin=subprocess.PIPE, encoding='utf8') as gnuplot: gnuplot.stdin.write("set term dumb `tput cols` `tput lines`*2/3\n") gnuplot.stdin.write(f'set label "median = {median:,.2f}\\n' f'mean = {mean:,.2f} sdev={std_dev:,.2f}" at graph 0.03, 0.9\n') gnuplot.stdin.write("plot '-' using 1:2 title 'Pizzas' with linespoints \n") for i, j in zip(x, y): gnuplot.stdin.write("%f %f\n" % (i, j)) gnuplot.stdin.write("e\n") gnuplot.stdin.flush() print(f'Processed {len(middle_samples)} individual samples.') print(f'Median = {median:,.2f}\nmean = {mean:,.2f} std dev = {std_dev:,.2f}') if __name__ == '__main__': parser = ArgumentParser(description='Makes histograms from raw samples or prepared buckets.') parser.add_argument('-r', '--renderer', choices=['none', 'gnuplot', 'matplotlib', 'google'], default='gnuplot', help='Choose a renderer: summary only, text, png, or webpage.') args = parser.parse_args() if args.renderer == "matplotlib": import matplotlib.pyplot as plt import matplotlib.ticker main(args.renderer)