techcrunch.py (297a766) - techcrunch.git

techcrunch.py

#!/usr/bin/env python
#
# Testing without affecting the yaml file and saving the updated one aside:
# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \
# cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
import feedparser
import yaml
import sys
import os
import time
import StringIO
import codecs
import traceback
import calendar
import pickle
import exceptions
import urllib
import urllib2
import httplib
import shutil
import glob
import smtplib
import analysis
import json
import xml
import texttime
import operator
from datetime import timedelta
import cgi
import smtp_creds  # Your own credentials, used in send_email()

debug = True
any_entry_added = False
tags_to_post = set(['apple', 'google'])
authors_to_post = ['michael arrington',]

# TODO 2018-01-18: Maybe combine fb_likes with bf_shares or something...
rhs_metric = 'fb_likes'
rhs_metric_times = 'comment_times'

localdir = ''

html_head = """
<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>
<HTML><HEAD>
  <title>TechCrunch Feed Filter</title>
  
  <link rel="alternate" type="application/rss+xml" title="RSS feed" href="http://feeds.feedburner.com/TrendingAtTechcrunch" />
  <style type="text/css">
    body { font-family: "Arial", san-serif; }
    .author { font-size: smaller; }
    .h3 { font-size: larger; }
    a { text-decoration: none; }
    /* table { border: none; border-collapse:collapse; font-size: large } */
    table { border-collapse: collapse; }
    table.legend { border:1px solid LightSlateGray; font-size: medium; border-collapse:separated; }
    table.legend th { border: 1px solid LightSlateGray; background-color: #E0E0E0; }
    table.legend td { border: 1px solid LightSlateGray; }
    tr.even { background:#%s; padding: 2em; }
    tr.odd { background:#%s; padding-bottom: 2em; }
  </style>
</HEAD>
<BODY>
<div align='center'><h3>TechCrunch Feed Filter</h3></div>
This page shows what analysis is done to filter the noise away from the Techcrunch feed into <a href="http://feeds.feedburner.com/TrendingAtTechcrunch">a more concise feed</a>. <a href="http://david.dlma.com/blog/my-techcrunch-feed-filter">Learn more about the Feed Filter</a>.<br /><br />
"""

html_footer = """
</table>
</div><br />
<div align='center'>Thanks to <a href="http://www.feedparser.org/">The Universal Feed Parser module</a>,
<a href="http://pyyaml.org/">PyYAML</a> and <a href="http://code.google.com/apis/chart/">Google Charts</a>.<br /><a href="techcrunch.yaml">raw data</a> &bull; <a href="stats.txt">status</a><br />&copy; 2011 <a href="http://david.dlma.com">David Blume</a></div><br />
</BODY>
</HTML>
"""

img_width = 300
img_height = 50

series_1_color = "0000FF"
series_2_color = "00AA00"
threshold_color = "FF8C00"
tag_color = "F01000"

even_background = "F8F8F8"
odd_background = "E8E8E8"

even_watermark = "E0E0FF"
odd_watermark = "D0D0F0"

def asciiize(s):
    try:
        return s.encode('ascii')
    except UnicodeEncodeError as e:
        return s
    except exceptions.AttributeError as e:
        return s

def send_email(subject, message, toaddrs,
        fromaddr='"%s" <%s>' % (os.path.basename(__file__), smtp_creds.user)):
    """ Sends Email """
    smtp = smtplib.SMTP(smtp_creds.server, port=smtp_creds.port)
    smtp.login(smtp_creds.user, smtp_creds.passw)
    smtp.sendmail(fromaddr, \
                  toaddrs, \
                  "Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \
                  (fromaddr, ", ".join(toaddrs), subject, message))
    smtp.quit()

def index_id(a_list, op, elem):
    try:
        return (index for index, item in enumerate(a_list) if op(item, elem)).next()
    except:
        return -1

def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times,
                   rhs_values, threshold_value, is_odd_row, tag_hit):
#    lhs_times, lhs_values = zip(*comments)
#    rhs_times, rhs_values = zip(*rhs)

if not len(lhs_times):
        lhs_times = [time_posted,]
    if not len(lhs_values):
        lhs_values = [0,]
    if not len(rhs_times):
        rhs_times = [time_posted,]
    if not len(rhs_values):
        rhs_values = [0,]

lhs_times = [(i - time_posted) / 1800 for i in lhs_times]
    rhs_times = [(i - time_posted) / 1800 for i in rhs_times]

min_comment_time = min(lhs_times)
    max_comment_time = max(lhs_times)
    min_comment_value = min(lhs_values)
    max_comment_value = max(lhs_values)
    min_rhs_time = min(rhs_times)
    max_rhs_time = max(rhs_times)
    min_rhs_value = min(rhs_values)
    max_rhs_value = max(rhs_values)

met_threshold_pt = -1
    if threshold_value != -1:
        met_threshold_pt = index_id(rhs_values, operator.ge, threshold_value)
        if met_threshold_pt == -1 or tag_hit:
            # This can happen if threshold_value was set to a number
            # because the author or a tag was matched, but the article
            # was unpopular. We choose to put a marker at point index 0.
            met_threshold_pt = 0

if is_odd_row != 0:
        bg_color = even_background
        watermark_color = even_watermark
    else:
        bg_color = odd_background
        watermark_color = odd_watermark

if len(lhs_values) < 8 and len(lhs_values) > 1:
        # max_comment_value *= 2
        pass
    elif len(lhs_values) == 1:
        min_comment_value = 0
    if len(rhs_values) < 8 and len(rhs_values) > 1:
        # max_rhs_value *= 2
        pass
    elif len(rhs_values) == 1:
        min_rhs_value = 0

min_comment_value = 0
    min_rhs_value = 0

chart_url = "http://chart.apis.google.com/chart?cht=lxy&chco=%s,%s&chs=%dx%d&chxs=0,%s|1,%s" % \
                (series_1_color, series_2_color, img_width, img_height, series_1_color, series_2_color)
    chart_url += "&chd=t:%s|%s|%s|%s" % (','.join([str(n) for n in lhs_times]),
                                         ','.join([str(n) for n in lhs_values]),
                                         ','.join([str(n) for n in rhs_times]),
                                         ','.join([str(n) for n in rhs_values]))
    # TODO: Consider watermark levels, like:
    # chm=h,B0B0B0,1,0.3,1|r,E0E0E0,0,0,0.5
    if max_rhs_value > 0:
        threshold_percent = max(0, min((float(threshold_value) / max_rhs_value) - 0.01, 1.0))
    else:
        threshold_percent = 1.0
    chart_url += "&chm=r,%s,0,0,%1.3f" % (watermark_color, threshold_percent)
    if met_threshold_pt != -1:
        if tag_hit:
            dot_color = tag_color
            dot_shape = 'd'
        else:
            dot_color = threshold_color
            dot_shape = 'o'
        chart_url += "|%s,%s,1,%d,10" % (dot_shape, dot_color, met_threshold_pt)
    chart_url += "&chxt=y,r&chxl=0:|%d|%d|1:|%d|%d&chds=%d,%d,%d,%d,%d,%d,%d,%d" % \
                 (min_comment_value, max_comment_value, min_rhs_value, max_rhs_value,
                  0, max(7, max_comment_time),
                  min_comment_value, max_comment_value,
                  0, max(7, max_rhs_time),
                  min_comment_value, max_rhs_value)
    chart_url += "&chf=bg,s,%s&chdl=comments|shares" % (bg_color,)
    return chart_url

def process_feed(yaml_items):
    """Retrieve the url and process it.
    feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
    """
    feed = feedparser.parse('http://feeds.feedburner.com/TechCrunch')
    if hasattr(feed, 'status'):
        if feed.status == 304:
            pass
        else:
            feed_is_modified = True
            if feed.status != 200 and feed.status != 307 and feed.status != 301 and feed.status != 302:
                if feed.status == 503:
                    print "the feed is temporarily unavailable."
                elif feed.status == 400:
                    print "the feed says we made a bad request."
                elif feed.status == 502:
                    print "the feed reported a bad gateway error."
                elif feed.status == 404:
                    print "the feed says the page was not found."
                elif feed.status == 500:
                    print "the feed had an internal server error."
                elif feed.status == 403:
                    print "Access to the feed was forbidden."
                else:
                    print "the feed returned feed.status %d." % ( feed.status, )
            else:
                # Save off this
                if hasattr(feed, 'bozo_exception') and isinstance(feed.bozo_exception, xml.sax._exceptions.SAXParseException):
                    print "Didn't pickle TechCrunch feed because it had a bozo_exception: %s" % (str(feed.bozo_exception))
                else:
                    try:
                        with open(os.path.join(localdir, 'techcrunch_feed.pickle'), 'wb') as f:
                            pickle.dump(feed, f)
                    except(pickle.PicklingError, exceptions.TypeError) as e:
                        print "An error occurred while pickling the feed: %s." % \
                              (# str(e.__class__),
                               str(e))
                        traceback.print_exc(3, file=sys.stdout)
                        feed_is_modified = False

for i in reversed(feed.entries):
                process_item(i, yaml_items)

# If we have more than 200 items, remove the old ones.
            while len(yaml_items) > 200:
                yaml_items.pop()

for i in yaml_items:
                # i['title'] = asciiize(i['title'])
                # i['tags'] = map(asciiize, i['tags'])
                process_yaml_item(i)

else:
        if hasattr(feed, 'bozo_exception'):
            e = feed.bozo_exception
            if isinstance(e, urllib2.URLError):
                print_last_line = True
                if hasattr(e, 'reason'):
                    if e.reason[0] == 110:
                        print "the feed's connection timed out."
                        print_last_line = False
                    elif e.reason[0] == 111:
                        print "the feed's connection was refused."
                        print_last_line = False
                    elif e.reason[0] == 104:
                        print "the feed reset the connection."
                        print_last_line = False
                    else:
                        print "the feed had a URLError with reason %s." % (str(e.reason),)
                        print_last_line = False
                if print_last_line:
                    print "the feed had a URLError %s" % (str(e),)
            elif isinstance(e, httplib.BadStatusLine):
                print "the feed gave a bad status line. (%s)" % (str(e),)
            else:
                if len(str(e)):
                    print "the feed bozo_exception: %s \"%s\"" % (str(e.__class__), str(e))
                else:
                    print "the feed bozo_exception: %s %s" % (str(e.__class__), repr(e))
        else:
            print "the feed returned class %s, %s" % (str(feed.__class__), str(feed))

def process_item(feed_item, yaml_items):
    """Processes an RSS feed item, and converts it to a YAML item"""
    # Get the time
    global any_entry_added
    timecode_now = int(time.time())
    date_parsed = time.gmtime()
    if hasattr(feed_item, 'issued_parsed'):
        date_parsed = feed_item.issued_parsed
        date_set = True
    elif hasattr(feed_item, 'date_parsed'):
        date_parsed = feed_item.date_parsed
        date_set = True
    else:
        print "process_item found no timestamp for", asciiize(feed_item.link)
    timecode_parsed = calendar.timegm(date_parsed)

link = feed_item.link
    if hasattr(feed_item, 'feedburner_origlink'):
        link = feed_item.feedburner_origlink

# TODO 2018-01-18: Leave in the ncid for URL clicks, but remove during processing.
#    suffix_to_remove = '?ncid=rss'
#    if link.endswith(suffix_to_remove):
#        link = link[:-len(suffix_to_remove)]

# Look for i.feedburner_origlink in yaml_items
    yaml_item = None
    for i in yaml_items:
        if link == i['link']:
            yaml_item = i
            break
    if yaml_item is None:
        author = ''
        if hasattr(feed_item, 'author'):
            author = asciiize(feed_item.author)

# Make a new yaml_item
        yaml_item = {'title'               : asciiize(feed_item.title),
                     'link'                : asciiize(link),
                     'author'              : author,
                     'tags'                : [],
                     'orig_posted'         : timecode_parsed,
                     'qualified'           : -1,
                     'comment_times'       : [],
                     'fb_comments'         : [],
                     'fb_shares'           : [],
                     'fb_likes'            : [],
                     'slash_comment_times' : [],
                     'slash_comments'      : []
                    }
        if hasattr(feed_item, 'tags'):
            for i in feed_item.tags:
                yaml_item['tags'].append(asciiize(i.term))

yaml_items.insert(0, yaml_item)
        any_entry_added = True

# Maybe check to ensure that this item isn't too old.
    if timecode_parsed < timecode_now - 60 * 30 * 9:
        return

# Now, add the new values
    if hasattr(feed_item, 'slash_comments') and len(yaml_item['slash_comments']) < 8:
        any_entry_added = True
        yaml_item['slash_comment_times'].append(timecode_now)
        yaml_item['slash_comments'].append(int(feed_item.slash_comments))

def process_yaml_item(yaml_item):
    global any_entry_added

# Related to TODO 2018-01-18: Remove ncid only during processing.
    link = yaml_item['link']
    suffix_to_remove = '?ncid=rss'
    # Maybe we should find() it instead, in case feedburner adds other options
    if link.endswith(suffix_to_remove):
        link = link[:-len(suffix_to_remove)]

timecode_now = int(time.time())
    if len(yaml_item['fb_comments']) < 8:
        num_shares, num_comments, num_likes = Get_fb_stats(link)
        if num_comments != -1:
            any_entry_added = True
            yaml_item['comment_times'].append(timecode_now)
            yaml_item['fb_shares'].append(num_shares)
            yaml_item['fb_comments'].append(num_comments)
            yaml_item['fb_likes'].append(num_likes)

#    if len(yaml_item['reddit_']) < 8:
#        num_ = Get_reddit_stats(link)
#        if num_ != -1:
#            any_entry_added = True
#            yaml_item['reddit_times'].append(timecode_now)
#            yaml_item['reddit_'].append(num_)

def Get_reddit_stats(url_string):
    """ Consider curl "https://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg"
    """
    return -1

def Get_fb_stats(url_string):
    """Use graph's "engagement" field to get reactions and shares."""
    shares = -1
    comments = -1
    likes = -1

url_string = url_string.encode('utf-8')

try:
        encoded = urllib.urlencode({'access_token': facebook_token})
        url = 'https://graph.facebook.com/v2.11/?id=%s&fields=engagement&%s'
        f = urllib2.urlopen(url % (urllib.quote_plus(url_string), encoded))
        data = f.read()
        f.close()
    except (urllib2.URLError, httplib.BadStatusLine) as e:
        if hasattr(e, 'reason'): # URLError
            if hasattr(e, 'code'):
                print "Get_fb_stats got an error (1):", e.code, e.reason, url_string
            else:
                print "Get_fb_stats got an error (2):", e.reason, url_string
        elif hasattr(e, 'code'): #URLError
            print "Get_fb_stats got an error. Code:", e.code, url_string
        else:
            print "Get_fb_stats got an error (3):", str(e)
        return shares, comments, likes
    except KeyError as e:
        print "Get_fb_stats got a key error 1e (%s)" % (str(e), )
        print "Get_fb_stats got a key error 2.2 enc (%s)" % (encoded, )
        print url_string.encode('utf-8')
        print u"Get_fb_stats got a key error 2.1 url (%s)" % (url_string, )
        print "Get_fb_stats got a key error 3q (%s)" % (urllib.quote_plus(url_string))
        print "Get_fb_stats got a key error 4 (%s)" % (url % (urllib.quote_plus(url_string), encoded))
        print "Get_fb_stats got a key error 5 (%s) for url %s:" % (str(e), url % (urllib.quote_plus(url_string), encoded))
        return shares, comments, likes
    if len(data) > 20:
        d = json.loads(data)['engagement']
        try:
            shares = d['share_count']
        except KeyError:
            shares = 0

try:
            likes = d['reaction_count']
        except KeyError:
            likes = 0

# TODO 2018-01-18: og_object metric was likes + shares + comments
        # Here we'll combine likes and shares, and comments with plugin_comments
        likes += shares

try:
            comments = d['comment_plugin_count'] + d['comment_count']
        except KeyError:
            comments = 0
    else:
        print "Get_fb_stats got too little data for ",  url_string
    return shares, comments, likes

def save_image(url_string, file_path):
    try:
        f = urllib2.urlopen(url_string)
        data = f.read()
        f.close()
    except (urllib2.URLError, httplib.BadStatusLine) as e:
        if hasattr(e, 'reason'): # URLError
            print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Reason:", e.reason
        elif hasattr(e, 'code'): # URLError
            print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Code:", e.code
        else:
            print "save_image: Error from urlopen", e
        return url_string

if len(data) > 50:
        with open(file_path, 'wb') as f:
            f.write(data)
        return 'cache/' + os.path.basename(file_path)
    return url_string

def make_index_html(yaml_items, weekend_stats, weekday_stats):
    """Writes a static index.html file from the YAML items."""
    cur_time = int(time.time())
    new_index_fullpath = os.path.join(localdir, 'index.html_new')
    index_fullpath = os.path.join(localdir, 'index.html')
    cache_path = os.path.join(localdir, 'cache')

files_to_delete = glob.glob(os.path.join(cache_path, '*.png'))

with codecs.open(new_index_fullpath, 'w', 'utf-8') as f:
        f.write(html_head % (even_background, odd_background))

f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n')
        f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2]))
        f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2]))
        f.write('</table></div>\n<br />\n')

f.write('<div align="center">\n<table>\n')
        for image_index, image in enumerate(yaml_items[:40]):
            tag_hit = False
            if image['author'].lower() in authors_to_post:
                tag_hit = True
            elif len(set([j.lower() for j in image['tags']]) & tags_to_post) > 0:
                tag_hit = True
            chart_url = make_chart_url(image['orig_posted'],
                                       image['comment_times'],
                                       image['fb_comments'],
                                       image[rhs_metric_times],
                                       image[rhs_metric],
                                       image['qualified'],
                                       image_index % 2,
                                       tag_hit
                                      )
            image_url = save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index)))
            f.write('<tr valign="center" class="%s">\n  <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
                     (image_index % 2 and "even" or "odd",
                      image['link'],
                      image['title'].encode('ascii', 'xmlcharrefreplace'),
                      image['author'].encode('ascii', 'xmlcharrefreplace'),
                     )
                   )
            f.write('  <td>%s<td>\n' % (image['qualified'] != -1 and '<img src="star_30.png" width="30" height="29" />' or ''))
            f.write('  <td><img src="%s" width="%d" height="%d" border="0" /></td></tr>\n' % \
                     (image_url,
                      img_width,
                      img_height
                     )
                   )
        f.write(html_footer)

if os.path.exists(index_fullpath):
        os.unlink(index_fullpath)
    shutil.move(new_index_fullpath, index_fullpath)
    for fname in files_to_delete:
        os.unlink(fname)

def make_feed_file(yaml_items):
    """Writes the RSS feed file with the YAML items."""
    with codecs.open(os.path.join(localdir, 'rss_feed.xml'), 'wb', 'utf-8') as f:
        f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\">\n<channel>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>")
        f.write("<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
        count = 0
        for item in yaml_items:
            now = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(item['orig_posted']))
            if item['qualified'] != -1:
                escaped_title = cgi.escape(item['title']).encode('ascii', 'xmlcharrefreplace')
                escaped_author = cgi.escape(item['author']).encode('ascii', 'xmlcharrefreplace')
                f.write("<item><title>%s</title><pubDate>%s</pubDate><link>%s</link><guid isPermaLink=\"false\">%s</guid><description><![CDATA[By: %s]]></description></item>\n" % \
                         (escaped_title, now, item['link'], item['link'], escaped_author))
                count += 1
                if count > 14:
                    break
        f.write("</channel></rss>")

if __name__=='__main__':
    start_time = time.time()
    progress_text = []

old_stdout = sys.stdout
    old_stderr = sys.stderr
    sys.stdout = sys.stderr = StringIO.StringIO()

try:
        localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
        #
        # Read in techcrunch.yaml
        #
        # [ { 'title'               : 'Title Text',
        #     'link'                : u'http://techcrunch.com/2010/08/17/google-buzz-who-to-follow/',
        #     'author'              : u'MG Siegler',
        #     'orig_posted'         : 1282197199
        #     'tags'                : [ u'Google', u'privacy' ]
        #     'qualified'           : -1
        #     'comment_times'       : [ 1282197199, 1282197407 ]
        #     'fb_comments'         : [ 0, 5 ]
        #     'fb_shares'           : [ 0, 300 ]
        #     'fb_likes'            : [ 0, 19 ]
        #     'slash_comment_times' : [ 1282197199, 1282197407 ]
        #     'slash_comments'      : [ 0, 5 ]
        #    },
        #    { ... }
        #  ]
        #
        yaml_fullpath = os.path.join(localdir, 'techcrunch.yaml')
        if os.path.exists(yaml_fullpath):
            with open(yaml_fullpath, 'rb') as f:
                items = yaml.load(f)
                if items is None:
                    print yaml_fullpath, "exists, but was empty."
                    items = []

# Do any dictionary item updating that might be necessary
#                for item in items:
#                    if not item.has_key('fb_shares'):
#                        item['fb_shares'] = []
        else:
            print "could not open", yaml_fullpath
            items = []

with open(os.path.join(localdir, 'facebook-token.txt'), 'r') as f:
            facebook_token = f.read()

progress_text = ["read techcrunch.yaml"]
        process_feed(items)

#
        # If any work was done, then write files.
        #
        if any_entry_added:
            weekend_stats, weekday_stats = analysis.Process_feed(items, rhs_metric, rhs_metric_times)

# We'll only look at the stats for the time 1:00 to 1:30 after posting.
            weekend_median, weekend_mean, weekend_sigma = weekend_stats[2]
            weekend_threshold = weekend_mean + weekend_sigma
            weekday_median, weekday_mean, weekday_sigma = weekday_stats[2]
            weekday_threshold = weekday_mean + weekday_sigma
            for item in items:
                wday = time.localtime(item['orig_posted']).tm_wday
                if wday == 5 or wday == 6:
                    threshold = weekend_threshold
                else:
                    threshold = weekday_threshold
                if item['qualified'] == -1:
                    for i in range(len(item[rhs_metric_times])):
                        r_time = item[rhs_metric_times][i]
                        if r_time - item['orig_posted'] < 5400:
                            if item[rhs_metric][i] >= threshold:
                                item['qualified'] = threshold
                            if r_time - item['orig_posted'] >= 3600:
                                break

# Automatically add those items whose authors and tags I like
            for item in items:
                if item['qualified'] == -1 and len(item[rhs_metric_times]) > 0:
                    if item['author'].lower() in authors_to_post:
                        item['qualified'] = threshold
                    elif len(set([j.lower() for j in item['tags']]) & tags_to_post) > 0:
                        item['qualified'] = threshold

#
            # Write out the updated yaml file.
            #

# For the one file we really use, write to a file on the side, then move it.
            yaml_newfile_fullpath = os.path.join(localdir, 'techcrunch_temp_writable.yaml')
            with open(yaml_newfile_fullpath, 'wb') as f:
                yaml.dump(items, f, width=120)
            try:
                os.rename(yaml_newfile_fullpath, yaml_fullpath)
            except OSError as e:
                print "The source file was", yaml_newfile_fullpath, "and exists =", os.path.isfile(yaml_newfile_fullpath)
            with open(os.path.join(localdir, 'techcrunch_text.yaml'), 'w') as f:
                yaml.dump(items, f, width=120)
            with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f:
                yaml.dump(items, f, encoding='utf-8', width=120)

make_feed_file(items)

make_index_html(items, weekend_stats, weekday_stats)
        else:
            print "No entries were added this time."

except Exception as e:
        exceptional_text = "An exception occurred: " + str(e.__class__) + " " + str(e)
        print exceptional_text, ' '.join(progress_text)
        traceback.print_exc(file=sys.stdout)
        try:
            send_email('Exception thrown in techcrunch.py',
                      exceptional_text + "\n" + traceback.format_exc(),
                      ('david.blume@gmail.com',))
        except Exception as e:
            print "Could not send email to notify you of the exception. :("

message = sys.stdout.getvalue()
    sys.stdout = old_stdout
    sys.stderr = old_stderr
    if not debug:
        print message

# Finally, let's save this to a statistics page
    if os.path.exists(os.path.join(localdir, 'stats.txt')):
        with open(os.path.join(localdir, 'stats.txt')) as f:
            lines = f.readlines()
    else:
        lines = []
    lines = lines[:672] # Just keep the past week's worth
    # status = len(message.strip()) and message.strip().replace('\n', ' - ') or "OK"
    status = len(message.strip()) and '\n                       '.join( message.splitlines()) or "OK"
    lines.insert(0, "%s %3.0fs %s\n" % (time.strftime('%Y-%m-%d, %H:%M', time.localtime()), time.time() - start_time, status))
    with open(os.path.join(localdir,'stats.txt' ), 'w') as f:
        f.writelines(lines)

techcrunch.git

Moved SMTP credentials to smtp_creds