techcrunch.py
4218b421
 #!/usr/bin/env python
7d91adf2
 #
297a7665
 # Testing without affecting the yaml file and saving the updated one aside:
 # cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \
 # cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
7d91adf2
 import feedparser
 import yaml
 import sys
 import os
 import time
 import codecs
 import traceback
 import calendar
 import pickle
 import exceptions
 import urllib
 import urllib2
 import httplib
 import shutil
 import smtplib
 import analysis
25b161eb
 import json
b3d09671
 import xml
2b4c980d
 import operator
25b161eb
 import cgi
a2a0ee8d
 import cStringIO
297a7665
 import smtp_creds  # Your own credentials, used in send_email()
7d91adf2
 
 debug = True
 any_entry_added = False
a2a0ee8d
 tags_to_post = {'apple', 'google', 'roku'}
1406ec59
 authors_to_post = ['michael arrington',]
 
4218b421
 # TODO 2018-01-18: Maybe combine fb_likes with bf_shares or something...
 rhs_metric = 'fb_likes'
1406ec59
 rhs_metric_times = 'comment_times'
7d91adf2
 
 localdir = ''
 
a2a0ee8d
 html_head = """<!DOCTYPE html>
 <html><head>
7d91adf2
   <title>TechCrunch Feed Filter</title>
   <!-- <link rel="alternate" type="application/rss+xml" title="RSS feed" href="http://techcrunch.dlma.com/rss_feed.xml" /> -->
   <link rel="alternate" type="application/rss+xml" title="RSS feed" href="http://feeds.feedburner.com/TrendingAtTechcrunch" />
   <style type="text/css">
     body { font-family: "Arial", san-serif; }
06ebb890
     .author { font-size: smaller; color:gray; }
     .date { font-size: smaller; color:gray; }
7d91adf2
     .h3 { font-size: larger; }
     a { text-decoration: none; }
     /* table { border: none; border-collapse:collapse; font-size: large } */
     table { border-collapse: collapse; }
     table.legend { border:1px solid LightSlateGray; font-size: medium; border-collapse:separated; }
     table.legend th { border: 1px solid LightSlateGray; background-color: #E0E0E0; }
     table.legend td { border: 1px solid LightSlateGray; }
     tr.even { background:#%s; padding: 2em; }
     tr.odd { background:#%s; padding-bottom: 2em; }
39cf9b42
     td div { height: 68px; }
7d91adf2
   </style>
a2a0ee8d
   <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
   <script type="text/javascript">
     google.charts.load('current', {'packages':['corechart']});
     google.charts.setOnLoadCallback(drawChart);
     function drawChart() {
       var options = {
06ebb890
         width:%d,
a2a0ee8d
         height:68,
         pointSize:0.1,
         dataOpacity:1.0,
         series: { 0: {targetAxisIndex:0}, 1: {targetAxisIndex:1, color:'limegreen'} },
         vAxis: { gridlines: {count: 0}, maxValue: 1 },
         hAxis: { gridlines: {count: 0}, ticks: [] },
         vAxes: { 0: {textStyle: {fontSize: 11, color: 'blue'} }, 1: {viewWindowMode: 'maximized', baselineColor: '#A0D0A0', textStyle: {fontSize: 11, color: 'limegreen'} } },
       };
 %s
     }
   </script>
 </head>
 <body>
7d91adf2
 <div align='center'><h3>TechCrunch Feed Filter</h3></div>
fc5273ae
 This page shows what analysis is done to filter the noise away from the Techcrunch feed into
 <a href="http://feeds.feedburner.com/TrendingAtTechcrunch"> a more concise feed <img src="feed.png" alt="feed" height="14" width="14"></a>.
a2a0ee8d
 <a href="https://david.dlma.com/blog/my-techcrunch-feed-filter">Learn more about the Feed Filter</a>.<br /><br />
7d91adf2
 """
 
 html_footer = """
 </table>
 </div><br />
 <div align='center'>Thanks to <a href="http://www.feedparser.org/">The Universal Feed Parser module</a>,
fc5273ae
 <a href="http://pyyaml.org/">PyYAML</a> and <a href="http://code.google.com/apis/chart/">Google Charts</a>.<br />
a2a0ee8d
 <a href="http://git.dlma.com/techcrunch.git/">source</a> &bull; <a href="techcrunch.yaml">raw data</a> &bull; <a href="stats.txt">status</a><br />&copy; 2011 <a href="https://david.dlma.com">David Blume</a></div><br />
 </body>
 </html>
 """
 
 chart_data_header = """      var data = google.visualization.arrayToDataTable([
         ['', 'Comments', 'Shares', {'type': 'string', 'role': 'style'}],
 """
 chart_data_middle = """      ]);
       var chart = new google.visualization.LineChart(document.getElementById('chart%d'));
       options.backgroundColor = '#%s';
7d91adf2
 """
 
06ebb890
 img_width = 240
a2a0ee8d
 img_height = 68
7d91adf2
 
 series_1_color = "0000FF"
 series_2_color = "00AA00"
 threshold_color = "FF8C00"
b3d09671
 tag_color = "F01000"
7d91adf2
 
 even_background = "F8F8F8"
 odd_background = "E8E8E8"
 
2b4c980d
 even_watermark = "E0E0FF"
 odd_watermark = "D0D0F0"
 
 
1406ec59
 def asciiize(s):
7d91adf2
     try:
1406ec59
         return s.encode('ascii')
ea0e1e38
     except UnicodeEncodeError:
7d91adf2
         return s
ea0e1e38
     except exceptions.AttributeError:
7d91adf2
         return s
 
ef6f5ca2
 
297a7665
 def send_email(subject, message, toaddrs,
         fromaddr='"%s" <%s>' % (os.path.basename(__file__), smtp_creds.user)):
     """ Sends Email """
     smtp = smtplib.SMTP(smtp_creds.server, port=smtp_creds.port)
     smtp.login(smtp_creds.user, smtp_creds.passw)
ea0e1e38
     smtp.sendmail(fromaddr,
                   toaddrs,
1406ec59
                   "Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \
                   (fromaddr, ", ".join(toaddrs), subject, message))
7d91adf2
     smtp.quit()
 
ef6f5ca2
 
1406ec59
 def index_id(a_list, op, elem):
b3d09671
     try:
1406ec59
         return (index for index, item in enumerate(a_list) if op(item, elem)).next()
b3d09671
     except:
         return -1
 
ef6f5ca2
 
a2a0ee8d
 def write_chart_data(time_posted, lhs_times, lhs_values, rhs_times,
                    rhs_values, threshold_value, image_index, tag_hit, chart_io):
1406ec59
 #    lhs_times, lhs_values = zip(*comments)
 #    rhs_times, rhs_values = zip(*rhs)
a2a0ee8d
     is_odd_row = image_index % 2
7d91adf2
 
1406ec59
     if not len(lhs_times):
         lhs_times = [time_posted,]
     if not len(lhs_values):
         lhs_values = [0,]
     if not len(rhs_times):
         rhs_times = [time_posted,]
     if not len(rhs_values):
         rhs_values = [0,]
 
     lhs_times = [(i - time_posted) / 1800 for i in lhs_times]
     rhs_times = [(i - time_posted) / 1800 for i in rhs_times]
 
2b4c980d
     met_threshold_pt = -1
     if threshold_value != -1:
1406ec59
         met_threshold_pt = index_id(rhs_values, operator.ge, threshold_value)
2b4c980d
         if met_threshold_pt == -1 or tag_hit:
             # This can happen if threshold_value was set to a number
             # because the author or a tag was matched, but the article
             # was unpopular. We choose to put a marker at point index 0.
             met_threshold_pt = 0
 
     if is_odd_row != 0:
         bg_color = even_background
     else:
         bg_color = odd_background
a2a0ee8d
 
     chart_io.write(chart_data_header)
     for i in range(8):
         if i == met_threshold_pt:
             if tag_hit:
                 style = "'point { size: 5; fill-color: #FF0000; shape-type: diamond}'"
             else:
                 style = "'point { size: 5; fill-color: #FF8C00; }'"
         else:
             style = "null"
         if i < len(lhs_values):
             lhs_value = str(lhs_values[i])
         else:
             lhs_value = "null"
         if i < len(rhs_values):
             rhs_value = str(rhs_values[i])
b3d09671
         else:
a2a0ee8d
             rhs_value = "null"
         chart_io.write("        [%d,  %s,        %s, %s],\n" % (i, lhs_value, rhs_value, style))
     chart_io.write(chart_data_middle % (image_index, bg_color))
     if met_threshold_pt == -1 and not tag_hit:
         chart_io.write("      delete options.vAxes[1].baseline;\n")
     else:
         chart_io.write("      options.vAxes[1].baseline = %d;\n" % (threshold_value,))
     chart_io.write("      chart.draw(data, options);\n\n")
7d91adf2
 
ef6f5ca2
 
1406ec59
 def process_feed(yaml_items):
297a7665
     """Retrieve the url and process it.
7d91adf2
     feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
     """
1406ec59
     feed = feedparser.parse('http://feeds.feedburner.com/TechCrunch')
     if hasattr(feed, 'status'):
7d91adf2
         if feed.status == 304:
             pass
         else:
             if feed.status != 200 and feed.status != 307 and feed.status != 301 and feed.status != 302:
                 if feed.status == 503:
                     print "the feed is temporarily unavailable."
                 elif feed.status == 400:
                     print "the feed says we made a bad request."
                 elif feed.status == 502:
                     print "the feed reported a bad gateway error."
                 elif feed.status == 404:
                     print "the feed says the page was not found."
                 elif feed.status == 500:
                     print "the feed had an internal server error."
                 elif feed.status == 403:
                     print "Access to the feed was forbidden."
                 else:
                     print "the feed returned feed.status %d." % ( feed.status, )
             else:
                 # Save off this
1406ec59
                 if hasattr(feed, 'bozo_exception') and isinstance(feed.bozo_exception, xml.sax._exceptions.SAXParseException):
                     print "Didn't pickle TechCrunch feed because it had a bozo_exception: %s" % (str(feed.bozo_exception))
b3d09671
                 else:
                     try:
1406ec59
                         with open(os.path.join(localdir, 'techcrunch_feed.pickle'), 'wb') as f:
                             pickle.dump(feed, f)
4218b421
                     except(pickle.PicklingError, exceptions.TypeError) as e:
b3d09671
                         print "An error occurred while pickling the feed: %s." % \
1406ec59
                               (# str(e.__class__),
                                str(e))
                         traceback.print_exc(3, file=sys.stdout)
7d91adf2
 
1406ec59
             for i in reversed(feed.entries):
                 process_item(i, yaml_items)
7d91adf2
 
             # If we have more than 200 items, remove the old ones.
1406ec59
             while len(yaml_items) > 200:
7d91adf2
                 yaml_items.pop()
 
             for i in yaml_items:
1406ec59
                 # i['title'] = asciiize(i['title'])
                 # i['tags'] = map(asciiize, i['tags'])
                 process_yaml_item(i)
7d91adf2
 
     else:
         if hasattr(feed, 'bozo_exception'):
             e = feed.bozo_exception
1406ec59
             if isinstance(e, urllib2.URLError):
7d91adf2
                 print_last_line = True
                 if hasattr(e, 'reason'):
                     if e.reason[0] == 110:
                         print "the feed's connection timed out."
                         print_last_line = False
                     elif e.reason[0] == 111:
                         print "the feed's connection was refused."
                         print_last_line = False
                     elif e.reason[0] == 104:
                         print "the feed reset the connection."
                         print_last_line = False
                     else:
1406ec59
                         print "the feed had a URLError with reason %s." % (str(e.reason),)
7d91adf2
                         print_last_line = False
                 if print_last_line:
1406ec59
                     print "the feed had a URLError %s" % (str(e),)
             elif isinstance(e, httplib.BadStatusLine):
                 print "the feed gave a bad status line. (%s)" % (str(e),)
7d91adf2
             else:
1406ec59
                 if len(str(e)):
                     print "the feed bozo_exception: %s \"%s\"" % (str(e.__class__), str(e))
7d91adf2
                 else:
1406ec59
                     print "the feed bozo_exception: %s %s" % (str(e.__class__), repr(e))
7d91adf2
         else:
1406ec59
             print "the feed returned class %s, %s" % (str(feed.__class__), str(feed))
7d91adf2
 
ef6f5ca2
 
1406ec59
 def process_item(feed_item, yaml_items):
4218b421
     """Processes an RSS feed item, and converts it to a YAML item"""
7d91adf2
     # Get the time
     global any_entry_added
1406ec59
     timecode_now = int(time.time())
7d91adf2
     date_parsed = time.gmtime()
1406ec59
     if hasattr(feed_item, 'issued_parsed'):
7d91adf2
         date_parsed = feed_item.issued_parsed
1406ec59
     elif hasattr(feed_item, 'date_parsed'):
7d91adf2
         date_parsed = feed_item.date_parsed
     else:
1406ec59
         print "process_item found no timestamp for", asciiize(feed_item.link)
     timecode_parsed = calendar.timegm(date_parsed)
7d91adf2
 
4218b421
     link = feed_item.link
     if hasattr(feed_item, 'feedburner_origlink'):
         link = feed_item.feedburner_origlink
 
     # TODO 2018-01-18: Leave in the ncid for URL clicks, but remove during processing.
 #    suffix_to_remove = '?ncid=rss'
 #    if link.endswith(suffix_to_remove):
 #        link = link[:-len(suffix_to_remove)]
 
7d91adf2
     # Look for i.feedburner_origlink in yaml_items
     yaml_item = None
     for i in yaml_items:
4218b421
         if link == i['link']:
b3d09671
             yaml_item = i
             break
4218b421
     if yaml_item is None:
7d91adf2
         author = ''
1406ec59
         if hasattr(feed_item, 'author'):
             author = asciiize(feed_item.author)
7d91adf2
 
4218b421
         # Make a new yaml_item
1406ec59
         yaml_item = {'title'               : asciiize(feed_item.title),
                      'link'                : asciiize(link),
                      'author'              : author,
                      'tags'                : [],
                      'orig_posted'         : timecode_parsed,
                      'qualified'           : -1,
                      'comment_times'       : [],
                      'fb_comments'         : [],
                      'fb_shares'           : [],
                      'fb_likes'            : [],
                      'slash_comment_times' : [],
                      'slash_comments'      : []
7d91adf2
                     }
1406ec59
         if hasattr(feed_item, 'tags'):
7d91adf2
             for i in feed_item.tags:
1406ec59
                 yaml_item['tags'].append(asciiize(i.term))
7d91adf2
 
1406ec59
         yaml_items.insert(0, yaml_item)
7d91adf2
         any_entry_added = True
 
     # Maybe check to ensure that this item isn't too old.
     if timecode_parsed < timecode_now - 60 * 30 * 9:
         return
 
     # Now, add the new values
1406ec59
     if hasattr(feed_item, 'slash_comments') and len(yaml_item['slash_comments']) < 8:
7d91adf2
         any_entry_added = True
1406ec59
         yaml_item['slash_comment_times'].append(timecode_now)
         yaml_item['slash_comments'].append(int(feed_item.slash_comments))
7d91adf2
 
ef6f5ca2
 
1406ec59
 def process_yaml_item(yaml_item):
7d91adf2
     global any_entry_added
 
4218b421
     # Related to TODO 2018-01-18: Remove ncid only during processing.
     link = yaml_item['link']
     suffix_to_remove = '?ncid=rss'
     # Maybe we should find() it instead, in case feedburner adds other options
     if link.endswith(suffix_to_remove):
         link = link[:-len(suffix_to_remove)]
 
1406ec59
     timecode_now = int(time.time())
     if len(yaml_item['fb_comments']) < 8:
4218b421
         num_shares, num_comments, num_likes = Get_fb_stats(link)
7d91adf2
         if num_comments != -1:
             any_entry_added = True
1406ec59
             yaml_item['comment_times'].append(timecode_now)
             yaml_item['fb_shares'].append(num_shares)
             yaml_item['fb_comments'].append(num_comments)
             yaml_item['fb_likes'].append(num_likes)
7d91adf2
 
1406ec59
 #    if len(yaml_item['reddit_']) < 8:
ea0e1e38
 #        num_ = get_reddit_stats(link)
1406ec59
 #        if num_ != -1:
 #            any_entry_added = True
 #            yaml_item['reddit_times'].append(timecode_now)
 #            yaml_item['reddit_'].append(num_)
7d91adf2
 
ef6f5ca2
 
ea0e1e38
 def get_reddit_stats(url_string):
4218b421
     """ Consider curl "https://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg"
1406ec59
     """
7d91adf2
     return -1
 
ef6f5ca2
 
1406ec59
 def Get_fb_stats(url_string):
297a7665
     """Use graph's "engagement" field to get reactions and shares."""
25b161eb
     shares = -1
     comments = -1
1406ec59
     likes = -1
 
4218b421
     url_string = url_string.encode('utf-8')
 
25b161eb
     try:
4218b421
         encoded = urllib.urlencode({'access_token': facebook_token})
         url = 'https://graph.facebook.com/v2.11/?id=%s&fields=engagement&%s'
         f = urllib2.urlopen(url % (urllib.quote_plus(url_string), encoded))
25b161eb
         data = f.read()
         f.close()
4218b421
     except (urllib2.URLError, httplib.BadStatusLine) as e:
1406ec59
         if hasattr(e, 'reason'): # URLError
4218b421
             if hasattr(e, 'code'):
                 print "Get_fb_stats got an error (1):", e.code, e.reason, url_string
             else:
                 print "Get_fb_stats got an error (2):", e.reason, url_string
1406ec59
         elif hasattr(e, 'code'): #URLError
25b161eb
             print "Get_fb_stats got an error. Code:", e.code, url_string
         else:
4218b421
             print "Get_fb_stats got an error (3):", str(e)
         return shares, comments, likes
1406ec59
     if len(data) > 20:
4218b421
         d = json.loads(data)['engagement']
         try:
1406ec59
             shares = d['share_count']
4218b421
         except KeyError:
1406ec59
             shares = 0
4218b421
 
         try:
             likes = d['reaction_count']
         except KeyError:
             likes = 0
 
         # TODO 2018-01-18: og_object metric was likes + shares + comments
         # Here we'll combine likes and shares, and comments with plugin_comments
         likes += shares
 
         try:
             comments = d['comment_plugin_count'] + d['comment_count']
         except KeyError:
             comments = 0
25b161eb
     else:
         print "Get_fb_stats got too little data for ",  url_string
1406ec59
     return shares, comments, likes
b3d09671
 
 
297a7665
 def make_index_html(yaml_items, weekend_stats, weekday_stats):
     """Writes a static index.html file from the YAML items."""
1406ec59
     cur_time = int(time.time())
     new_index_fullpath = os.path.join(localdir, 'index.html_new')
     index_fullpath = os.path.join(localdir, 'index.html')
7d91adf2
 
a2a0ee8d
     chart_io = cStringIO.StringIO()
     for image_index, image in enumerate(yaml_items[:40]):
         tag_hit = False
         if image['author'].lower() in authors_to_post:
             tag_hit = True
         elif len(set([j.lower() for j in image['tags']]) & tags_to_post) > 0:
             tag_hit = True
         write_chart_data(image['orig_posted'],
                          image['comment_times'],
                          image['fb_comments'],
                          image[rhs_metric_times],
                          image[rhs_metric],
                          image['qualified'],
                          image_index,
                          tag_hit,
                          chart_io
                         )
4218b421
 
a2a0ee8d
     with codecs.open(new_index_fullpath, 'w', 'utf-8') as f:
06ebb890
         f.write(html_head % (even_background, odd_background, img_width, chart_io.getvalue()))
a2a0ee8d
         chart_io.close()
4218b421
         f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n')
         f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2]))
         f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2]))
         f.write('</table></div>\n<br />\n')
         f.write('<div align="center">\n<table>\n')
         for image_index, image in enumerate(yaml_items[:40]):
06ebb890
             f.write('<tr valign="center" class="%s">\n  <td><strong><a href="%s">%s</a></strong> <span class="date">at %s</span> <span class="author">by %s</span></td>\n' % \
4218b421
                      (image_index % 2 and "even" or "odd",
                       image['link'],
                       image['title'].encode('ascii', 'xmlcharrefreplace'),
06ebb890
                       time.strftime("%H:%M", time.localtime(image['orig_posted'])).encode('ascii', 'xmlcharrefreplace'),
4218b421
                       image['author'].encode('ascii', 'xmlcharrefreplace'),
                      )
                    )
             f.write('  <td>%s<td>\n' % (image['qualified'] != -1 and '<img src="star_30.png" width="30" height="29" />' or ''))
a2a0ee8d
             f.write('  <td><div id="chart%d" /></td></tr>\n' % (image_index, ))
4218b421
         f.write(html_footer)
 
1406ec59
     if os.path.exists(index_fullpath):
         os.unlink(index_fullpath)
     shutil.move(new_index_fullpath, index_fullpath)
7d91adf2
 
ef6f5ca2
 
297a7665
 def make_feed_file(yaml_items):
     """Writes the RSS feed file with the YAML items."""
4218b421
     with codecs.open(os.path.join(localdir, 'rss_feed.xml'), 'wb', 'utf-8') as f:
06ebb890
         f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n<channel>\n<atom:link href=\"http://techcrunch.dlma.com/rss_feed.xml\" rel=\"self\" type=\"application/rss+xml\"/>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>")
1406ec59
         f.write("<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
ef6f5ca2
         count = 0
         for item in yaml_items:
1406ec59
             now = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(item['orig_posted']))
ef6f5ca2
             if item['qualified'] != -1:
1406ec59
                 escaped_title = cgi.escape(item['title']).encode('ascii', 'xmlcharrefreplace')
                 escaped_author = cgi.escape(item['author']).encode('ascii', 'xmlcharrefreplace')
                 f.write("<item><title>%s</title><pubDate>%s</pubDate><link>%s</link><guid isPermaLink=\"false\">%s</guid><description><![CDATA[By: %s]]></description></item>\n" % \
                          (escaped_title, now, item['link'], item['link'], escaped_author))
ef6f5ca2
                 count += 1
                 if count > 14:
                     break
1406ec59
         f.write("</channel></rss>")
ef6f5ca2
 
7d91adf2
 
 if __name__=='__main__':
     start_time = time.time()
     progress_text = []
 
     old_stdout = sys.stdout
     old_stderr = sys.stderr
39cf9b42
     sys.stdout = sys.stderr = cStringIO.StringIO()
7d91adf2
 
     try:
1406ec59
         localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
7d91adf2
         #
         # Read in techcrunch.yaml
         #
         # [ { 'title'               : 'Title Text',
         #     'link'                : u'http://techcrunch.com/2010/08/17/google-buzz-who-to-follow/',
         #     'author'              : u'MG Siegler',
         #     'orig_posted'         : 1282197199
         #     'tags'                : [ u'Google', u'privacy' ]
         #     'qualified'           : -1
         #     'comment_times'       : [ 1282197199, 1282197407 ]
1406ec59
         #     'fb_comments'         : [ 0, 5 ]
         #     'fb_shares'           : [ 0, 300 ]
         #     'fb_likes'            : [ 0, 19 ]
7d91adf2
         #     'slash_comment_times' : [ 1282197199, 1282197407 ]
         #     'slash_comments'      : [ 0, 5 ]
         #    },
         #    { ... }
         #  ]
         #
1406ec59
         yaml_fullpath = os.path.join(localdir, 'techcrunch.yaml')
         if os.path.exists(yaml_fullpath):
             with open(yaml_fullpath, 'rb') as f:
                 items = yaml.load(f)
4218b421
                 if items is None:
                     print yaml_fullpath, "exists, but was empty."
                     items = []
ef6f5ca2
 
                 # Do any dictionary item updating that might be necessary
 #                for item in items:
1406ec59
 #                    if not item.has_key('fb_shares'):
ef6f5ca2
 #                        item['fb_shares'] = []
7d91adf2
         else:
             print "could not open", yaml_fullpath
             items = []
 
297a7665
         with open(os.path.join(localdir, 'facebook-token.txt'), 'r') as f:
4218b421
             facebook_token = f.read()
 
1406ec59
         progress_text = ["read techcrunch.yaml"]
         process_feed(items)
7d91adf2
 
         #
         # If any work was done, then write files.
         #
b3d09671
         if any_entry_added:
ea0e1e38
             weekend_stats, weekday_stats = analysis.process_feed(items, rhs_metric, rhs_metric_times)
7d91adf2
 
06ebb890
             # We'll only look at the stats up to 2 hours after posting.
b3d09671
             weekend_median, weekend_mean, weekend_sigma = weekend_stats[2]
             weekend_threshold = weekend_mean + weekend_sigma
             weekday_median, weekday_mean, weekday_sigma = weekday_stats[2]
             weekday_threshold = weekday_mean + weekday_sigma
             for item in items:
1406ec59
                 wday = time.localtime(item['orig_posted']).tm_wday
b3d09671
                 if wday == 5 or wday == 6:
                     threshold = weekend_threshold
                 else:
                     threshold = weekday_threshold
                 if item['qualified'] == -1:
1406ec59
                     for i in range(len(item[rhs_metric_times])):
                         r_time = item[rhs_metric_times][i]
06ebb890
                         if r_time - item['orig_posted'] < 7200:
1406ec59
                             if item[rhs_metric][i] >= threshold:
2b4c980d
                                 item['qualified'] = threshold
b3d09671
                                 break
06ebb890
                         else:
                             break
b3d09671
 
             # Automatically add those items whose authors and tags I like
             for item in items:
1406ec59
                 if item['qualified'] == -1 and len(item[rhs_metric_times]) > 0:
b3d09671
                     if item['author'].lower() in authors_to_post:
2b4c980d
                         item['qualified'] = threshold
1406ec59
                     elif len(set([j.lower() for j in item['tags']]) & tags_to_post) > 0:
2b4c980d
                         item['qualified'] = threshold
7d91adf2
 
             #
             # Write out the updated yaml file.
             #
b3d09671
 
             # For the one file we really use, write to a file on the side, then move it.
1406ec59
             yaml_newfile_fullpath = os.path.join(localdir, 'techcrunch_temp_writable.yaml')
             with open(yaml_newfile_fullpath, 'wb') as f:
                 yaml.dump(items, f, width=120)
ef6f5ca2
             try:
1406ec59
                 os.rename(yaml_newfile_fullpath, yaml_fullpath)
ef6f5ca2
             except OSError as e:
                 print "The source file was", yaml_newfile_fullpath, "and exists =", os.path.isfile(yaml_newfile_fullpath)
1406ec59
             with open(os.path.join(localdir, 'techcrunch_text.yaml'), 'w') as f:
                 yaml.dump(items, f, width=120)
             with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f:
                 yaml.dump(items, f, encoding='utf-8', width=120)
7d91adf2
 
297a7665
             make_feed_file(items)
7d91adf2
 
297a7665
             make_index_html(items, weekend_stats, weekday_stats)
7d91adf2
         else:
             print "No entries were added this time."
 
4218b421
     except Exception as e:
1406ec59
         exceptional_text = "An exception occurred: " + str(e.__class__) + " " + str(e)
         print exceptional_text, ' '.join(progress_text)
         traceback.print_exc(file=sys.stdout)
7d91adf2
         try:
e2d7af95
             send_email('Exception thrown in ' + os.path.basename(__file__),
1406ec59
                       exceptional_text + "\n" + traceback.format_exc(),
e2d7af95
                       (smtp_creds.default_recipient,))
4218b421
         except Exception as e:
7d91adf2
             print "Could not send email to notify you of the exception. :("
 
     message = sys.stdout.getvalue()
     sys.stdout = old_stdout
     sys.stderr = old_stderr
     if not debug:
         print message
 
     # Finally, let's save this to a statistics page
1406ec59
     if os.path.exists(os.path.join(localdir, 'stats.txt')):
         with open(os.path.join(localdir, 'stats.txt')) as f:
7d91adf2
             lines = f.readlines()
     else:
         lines = []
4218b421
     lines = lines[:672] # Just keep the past week's worth
1406ec59
     # status = len(message.strip()) and message.strip().replace('\n', ' - ') or "OK"
     status = len(message.strip()) and '\n                       '.join( message.splitlines()) or "OK"
     lines.insert(0, "%s %3.0fs %s\n" % (time.strftime('%Y-%m-%d, %H:%M', time.localtime()), time.time() - start_time, status))
     with open(os.path.join(localdir,'stats.txt' ), 'w') as f:
         f.writelines(lines)
4218b421