#!/usr/bin/env python3
import os
import sys
from argparse import ArgumentParser
import math
import operator
import subprocess
import platform
import webbrowser
from typing import List, Tuple
html = """
Bitrates
REPLACE_ME_WITH_OVERVIEW
"""
def calculate_median_mean_stddev_from_samples(values: List[int]) -> Tuple[float, float, float]:
""" returns the median, mean, and standard deviation of values """
# Calculate the median
values.sort()
count = len(values)
median = 0.0
if count % 2:
median = float(values[count//2])
elif count > 0:
median = (values[count // 2 - 1] + values[count // 2]) / 2.0
# Calculate the mean and standard deviation
if count > 0:
mean = sum(values) / len(values)
squares_of_diffs = map(lambda x: pow(x - mean, 2), values)
mean_of_squares = sum(squares_of_diffs) / len(values)
else:
mean = 0
mean_of_squares = 0
std_dev = math.sqrt(mean_of_squares)
return median, mean, std_dev
def calculate_median_mean_stddev(x: List[float], y: List[int]) -> Tuple[float, float, float]:
""" returns the median, mean, and standard deviation given
an array of values, x, and an array of counts of those values, y. """
# Median: Walk the sample counts (y) halfway to the sum of sample counts.
median_pos = sum(y) // 2
cur_pos = 0
median = None
mean = None
for i in range(len(y)):
cur_pos += y[i]
if cur_pos >= median_pos:
median = x[i]
break
# mean = sum(map(lambda x: x[0] * x[1], zip(x,y))) / sum(y)
mean = sum(map(operator.mul, x, y)) / sum(y)
squares_of_diffs = map(lambda x, y: pow(x - mean, 2) * y, x, y)
mean_of_squares = sum(squares_of_diffs) / sum(y)
std_dev = math.sqrt(mean_of_squares)
return median, mean, std_dev
def remove_outliers_by_idx(x: List[float], y: List[int], outlier_count: int, idx: int) -> None:
"""Removes outlier_count samples from idx side of the buckets."""
cur_count = 0
while cur_count + y[idx] < outlier_count:
cur_count += y[idx]
del x[idx]
del y[idx]
if cur_count < outlier_count:
y[idx] = y[idx] - (outlier_count - cur_count)
def remove_outliers(x: List[float], y: List[int], outlier_percentile: int) -> None:
"""Removes outlier_percentile samples from the beginning and end
of the sample set."""
outlier_count = sum(y) * outlier_percentile // 100
remove_outliers_by_idx(x, y, outlier_count, 0)
remove_outliers_by_idx(x, y, outlier_count, -1)
def acquire_data(buckets: int) -> Tuple[List[float], List[float], List[int]]:
"""Manufactures some fake data."""
x = [i*10 for i in range(1,buckets+1)]
y = []
for i in range(0, buckets // 4):
y.append((i+1) * 8)
for i in range(buckets // 4, buckets):
y.append((((buckets // 4) + 0) * 8) - (i * 2))
# print(f'x (value) = {x}, len={len(x)}')
# print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')
samples = []
for i in range(len(x)):
samples += [x[i]] * y[i]
return samples, x, y
def main(renderer: str) -> None:
buckets = 20
samples, x, y = acquire_data(buckets)
outlier_percentile = 5
outlier_count = sum(y) * outlier_percentile // 100
middle_samples = samples[outlier_count:-outlier_count]
if __debug__:
print(f'len(samples) = {len(samples)}, outlier_count={outlier_count}, '
f'len(middle_sample) = {len(middle_samples)}')
remove_outliers(x, y, outlier_percentile)
if __debug__:
print(f'x (value) = {x}, len={len(x)}')
print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')
median, mean, std_dev = calculate_median_mean_stddev(x, y)
s_median, s_mean, s_std_dev = calculate_median_mean_stddev_from_samples(middle_samples)
assert(math.isclose(median, s_median))
assert(math.isclose(mean, s_mean))
assert(math.isclose(std_dev, s_std_dev))
localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
filename = os.path.basename(sys.argv[0])
if renderer == 'matplotlib':
plt.figure(figsize=(10,8))
plt.plot(x, y)
ax = plt.gca()
ax.get_xaxis().set_major_formatter(
matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
ax.get_yaxis().set_major_formatter(
matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
ax.yaxis.grid(linestyle='--', alpha=0.4)
ax.xaxis.grid(linestyle='--', alpha=0.4)
plt.title(f"Histogram of sample data less outter {outlier_percentile}%")
plt.figtext(0.14, 0.82, f'median = {median:,.2f}\n'
f'mean = {mean:,.2f} sdev={std_dev:,.2f}')
plt.xlabel("Pizzas")
plt.ylabel("Count of samples")
pngname = os.path.splitext(filename)[0] + '.png'
plt.savefig(os.path.join(localdir, pngname))
print('Attemtping to display %s' % (os.path.join(localdir, pngname)))
webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, pngname)))
elif renderer == 'google':
localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
htmlname = os.path.splitext(filename)[0] + '.html'
with open(os.path.join(localdir, htmlname), 'w') as f:
for line in html.splitlines():
if line.find('REPLACEME_WITH_BITRATES') != -1:
f.write('["Bitrate bucket", "Count of reported rates"],\n')
for i in range(len(x)):
f.write(f'["{x[i]:,} Pizzas", {y[i]}],\n')
elif line.find('REPLACE_ME_WITH_OVERVIEW') != -1:
f.write(f'{filename} less outter {outlier_percentile}%
\n')
f.write(f'median = {median:,.3f} Pizzas
\n'
f'mean = {mean:,.3f} σ={std_dev:,.3f} Pizzas
\n')
elif line.find('NUMBER_ROWS') != -1:
f.write(line.replace('NUMBER_ROWS', str(len(x))) + '\n')
elif line.find('SET_SHOWTEXTEVERY') != -1:
f.write(line.replace('SET_SHOWTEXTEVERY', str(len(x) // 6)) + '\n');
else:
f.write(line + '\n')
print('Attemtping to display %s' % (os.path.join(localdir, htmlname)))
webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, htmlname)))
if renderer != 'none':
if platform.system() == 'Darwin':
gnuplot_path = '/usr/local/bin/gnuplot'
else:
gnuplot_path = '/usr/bin/gnuplot'
with subprocess.Popen([gnuplot_path], stdin=subprocess.PIPE, encoding='utf8') as gnuplot:
gnuplot.stdin.write("set term dumb `tput cols` `tput lines`*2/3\n")
gnuplot.stdin.write(f'set label "median = {median:,.2f}\\n'
f'mean = {mean:,.2f} sdev={std_dev:,.2f}" at graph 0.03, 0.9\n')
gnuplot.stdin.write("plot '-' using 1:2 title 'Pizzas' with linespoints \n")
for i, j in zip(x, y):
gnuplot.stdin.write("%f %f\n" % (i, j))
gnuplot.stdin.write("e\n")
gnuplot.stdin.flush()
print(f'Processed {len(middle_samples)} individual samples.')
print(f'Median = {median:,.2f}\nmean = {mean:,.2f} std dev = {std_dev:,.2f}')
if __name__ == '__main__':
parser = ArgumentParser(description='Makes histograms from raw samples or prepared buckets.')
parser.add_argument('-r', '--renderer',
choices=['none', 'gnuplot', 'matplotlib', 'google'],
default='gnuplot',
help='Choose a renderer: summary only, text, png, or webpage.')
args = parser.parse_args()
if args.renderer == "matplotlib":
import matplotlib.pyplot as plt
import matplotlib.ticker
main(args.renderer)