make_chart.py (927efbf) - make_chart.git

make_chart.py

#!/usr/bin/env python3
import os
import sys
from argparse import ArgumentParser
import math
import operator
import subprocess
import platform
import webbrowser
from typing import List, Tuple

html = """<html>
  <head>
    <style>
      h1, h3 { text-align: center; }
    </style>
    <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
    <script>
  google.charts.load('current', {'packages':['corechart']});
  google.charts.setOnLoadCallback(chartLoaded);
  function chartLoaded() {
    let data = google.visualization.arrayToDataTable([
        REPLACEME_WITH_BITRATES
    ]);

<body>
    <h1>Bitrates</h1>
    REPLACE_ME_WITH_OVERVIEW
    <div id="chart_div"></div>
  </body>
</html>"""

def calculate_median_mean_stddev_from_samples(values: List[int]) -> Tuple[float, float, float]:
    """ returns the median, mean, and standard deviation of values """
    # Calculate the median
    values.sort()
    count = len(values)
    median = 0.0
    if count % 2:
        median = float(values[count//2])
    elif count > 0:
        median = (values[count // 2 - 1] + values[count // 2]) / 2.0

# Calculate the mean and standard deviation
    if count > 0:
        mean = sum(values) / len(values)
        squares_of_diffs = map(lambda x: pow(x - mean, 2), values)
        mean_of_squares = sum(squares_of_diffs) / len(values)
    else:
        mean = 0
        mean_of_squares = 0
    std_dev = math.sqrt(mean_of_squares)
    return median, mean, std_dev

def calculate_median_mean_stddev(x: List[float], y: List[int]) -> Tuple[float, float, float]:
    """ returns the median, mean, and standard deviation given
    an array of values, x, and an array of counts of those values, y. """
    # Median: Walk the sample counts (y) halfway to the sum of sample counts.
    median_pos = sum(y) // 2
    cur_pos = 0
    median = None
    mean = None
    for i in range(len(y)):
        cur_pos += y[i]
        if cur_pos >= median_pos:
            median = x[i]
            break
    # mean = sum(map(lambda x: x[0] * x[1], zip(x,y))) / sum(y)
    mean = sum(map(operator.mul, x, y)) / sum(y)
    squares_of_diffs = map(lambda x, y: pow(x - mean, 2) * y, x, y)
    mean_of_squares = sum(squares_of_diffs) / sum(y)
    std_dev = math.sqrt(mean_of_squares)
    return median, mean, std_dev

def remove_outliers_by_idx(x: List[float], y: List[int], outlier_count: int, idx: int) -> None:
    """Removes outlier_count samples from idx side of the buckets."""
    cur_count = 0
    while cur_count + y[idx] < outlier_count:
       cur_count += y[idx]
       del x[idx]
       del y[idx]
    if cur_count < outlier_count:
        y[idx] = y[idx] - (outlier_count - cur_count)

def remove_outliers(x: List[float], y: List[int], outlier_percentile: int) -> None:
    """Removes outlier_percentile samples from the beginning and end
    of the sample set."""
    outlier_count = sum(y) * outlier_percentile // 100
    remove_outliers_by_idx(x, y, outlier_count, 0)
    remove_outliers_by_idx(x, y, outlier_count, -1)

def acquire_data(buckets: int) -> Tuple[List[float], List[float], List[int]]:
    """Manufactures some fake data."""
    x = [i*10 for i in range(1,buckets+1)]
    y = []
    for i in range(0, buckets // 4):
        y.append((i+1) * 8)
    for i in range(buckets // 4, buckets):
        y.append((((buckets // 4) + 0) * 8) - (i * 2))
    # print(f'x (value) = {x}, len={len(x)}')
    # print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')
    samples = []
    for i in range(len(x)):
        samples += [x[i]] * y[i]
    return samples, x, y

def main(renderer: str) -> None:
    buckets = 20
    samples, x, y = acquire_data(buckets)

outlier_percentile = 5
    outlier_count = sum(y) * outlier_percentile // 100
    middle_samples = samples[outlier_count:-outlier_count]
    if __debug__:
        print(f'len(samples) = {len(samples)}, outlier_count={outlier_count}, '
              f'len(middle_sample) = {len(middle_samples)}')
    remove_outliers(x, y, outlier_percentile)
    if __debug__:
        print(f'x (value) = {x}, len={len(x)}')
        print(f'y (sample count) = {y}, len={len(y)}, sum={sum(y)}')

median, mean, std_dev = calculate_median_mean_stddev(x, y)
    s_median, s_mean, s_std_dev = calculate_median_mean_stddev_from_samples(middle_samples)
    assert(math.isclose(median, s_median))
    assert(math.isclose(mean, s_mean))
    assert(math.isclose(std_dev, s_std_dev))

localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
    filename = os.path.basename(sys.argv[0])
    if renderer == 'matplotlib':
        plt.figure(figsize=(10,8))
        plt.plot(x, y)
        ax = plt.gca()
        ax.get_xaxis().set_major_formatter(
            matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
        ax.get_yaxis().set_major_formatter(
            matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
        ax.yaxis.grid(linestyle='--', alpha=0.4)
        ax.xaxis.grid(linestyle='--', alpha=0.4)
        plt.title(f"Histogram of sample data less outter {outlier_percentile}%")
        plt.figtext(0.14, 0.82, f'median = {median:,.2f}\n'
                                f'mean = {mean:,.2f} sdev={std_dev:,.2f}')
        plt.xlabel("Pizzas")
        plt.ylabel("Count of samples")
        pngname = os.path.splitext(filename)[0] + '.png'
        plt.savefig(os.path.join(localdir, pngname))
        print('Attemtping to display %s' % (os.path.join(localdir, pngname)))
        webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, pngname)))
    elif renderer == 'google':
        localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
        htmlname = os.path.splitext(filename)[0] + '.html'
        with open(os.path.join(localdir, htmlname), 'w') as f:
            for line in html.splitlines():
                if line.find('REPLACEME_WITH_BITRATES') != -1:
                    f.write('["Bitrate bucket", "Count of reported rates"],\n')
                    for i in range(len(x)):
                       f.write(f'["{x[i]:,} Pizzas", {y[i]}],\n')
                elif line.find('REPLACE_ME_WITH_OVERVIEW') != -1:
                    f.write(f'<h3>{filename} less outter {outlier_percentile}%</h3>\n')
                    f.write(f'median = {median:,.3f} Pizzas<br />\n'
                            f'mean = {mean:,.3f} &sigma;={std_dev:,.3f} Pizzas<br />\n')
                elif line.find('NUMBER_ROWS') != -1:
                    f.write(line.replace('NUMBER_ROWS', str(len(x))) + '\n')
                elif line.find('SET_SHOWTEXTEVERY') != -1:
                    f.write(line.replace('SET_SHOWTEXTEVERY', str(len(x) // 6)) + '\n');
                else:
                    f.write(line + '\n')
        print('Attemtping to display %s' % (os.path.join(localdir, htmlname)))
        webbrowser.open_new_tab('file://%s' % (os.path.join(localdir, htmlname)))
    if renderer != 'none':
        if platform.system() == 'Darwin':
            gnuplot_path = '/usr/local/bin/gnuplot'
        else:
            gnuplot_path = '/usr/bin/gnuplot'
        with subprocess.Popen([gnuplot_path], stdin=subprocess.PIPE, encoding='utf8') as gnuplot:
            gnuplot.stdin.write("set term dumb `tput cols` `tput lines`*2/3\n")
            gnuplot.stdin.write(f'set label "median = {median:,.2f}\\n'
                                f'mean = {mean:,.2f} sdev={std_dev:,.2f}" at graph 0.03, 0.9\n')
            gnuplot.stdin.write("plot '-' using 1:2 title 'Pizzas' with linespoints \n")
            for i, j in zip(x, y):
               gnuplot.stdin.write("%f %f\n" % (i, j))
            gnuplot.stdin.write("e\n")
            gnuplot.stdin.flush()
    print(f'Processed {len(middle_samples)} individual samples.')
    print(f'Median = {median:,.2f}\nmean = {mean:,.2f} std dev = {std_dev:,.2f}')

if __name__ == '__main__':
    parser = ArgumentParser(description='Makes histograms from raw samples or prepared buckets.')
    parser.add_argument('-r', '--renderer',
                        choices=['none', 'gnuplot', 'matplotlib', 'google'],
                        default='gnuplot',
                        help='Choose a renderer: summary only, text, png, or webpage.')
    args = parser.parse_args()
    if args.renderer == "matplotlib":
        import matplotlib.pyplot as plt
        import matplotlib.ticker
    main(args.renderer)

make_chart.git

Add type hints.