2013-08-04: Miscellaneous changes to techcrunch.py (25b161e) - techcrunch.git

techcrunch.py

@@ -1,4 +1,4 @@
-#!/usr/bin/python2.5
+#!/usr/bin/python
 # chmod 755 me, and make sure I have UNIX style newlines.
 #
 # techcrunch.py
@@ -10,10 +10,11 @@
 # TODO:
 # 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry &raquo;</a>'
 #   link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/"
+# 2. Add Reddit counts: curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg"
 #
 # This file was coverted from tabs to spaces with the vim command %retab
 #
-# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; mv techcrunch.yaml_back techcrunch.yaml
+# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
 #
 
 import feedparser
@@ -35,12 +36,13 @@ import glob
 import smtplib
 import bisect
 import analysis
-import simplejson as json
+import json
 import cookielib
 import xml
 import texttime
 import operator
 from datetime import timedelta
+import cgi
 
 debug = True
 any_entry_added = False
@@ -78,7 +80,7 @@ html_footer = """
 </table>
 </div><br />
 <div align='center'>Thanks to <a href="http://www.feedparser.org/">The Universal Feed Parser module</a>,
-<a href="http://pyyaml.org/">PyYAML</a> and <a href="http://code.google.com/apis/chart/">Google Charts</a>.<br /><a href="techcrunch.yaml">raw data</a> &bull; <a href="stats.txt">status</a></div><br />
+<a href="http://pyyaml.org/">PyYAML</a> and <a href="http://code.google.com/apis/chart/">Google Charts</a>.<br /><a href="techcrunch.yaml">raw data</a> &bull; <a href="stats.txt">status</a><br />&copy; 2011 <a href="http://david.dlma.com">David Blume</a></div><br />
 </BODY>
 </HTML>
 """
@@ -109,6 +111,7 @@ def asciiize( s ):
 def sendEmail( subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@techcrunch.dlma.com>' ):
     """Sends Email"""
     smtp = smtplib.SMTP( 'localhost' )
+    smtp.login( user, passw )
     smtp.sendmail( fromaddr, \
                    toaddrs, \
                    "Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \
@@ -248,10 +251,8 @@ def process_feed( yaml_items ):
                     print "the feed returned feed.status %d." % ( feed.status, )
             else:
                 # Save off this
-                if hasattr( feed, 'bozo_exception' ) and type( feed.bozo_exception ) == xml.sax._exceptions.SAXParseException:
-                    print "Didn't pickle because of bozo_exception %s." % ( str( feed.bozo_exception ) )
-                elif hasattr( feed, 'bozo_exception' ) and isinstance( feed.bozo_exception, xml.sax._exceptions.SAXParseException ):
-                    print "Didn't pickle because of bozo_exception instance %s." % ( str( feed.bozo_exception ) )
+                if hasattr( feed, 'bozo_exception' ) and isinstance( feed.bozo_exception, xml.sax._exceptions.SAXParseException ):
+                    print "Didn't pickle TechCrunch feed because it had a bozo_exception: %s" % ( str( feed.bozo_exception ) )
                 else:
                     f = file( os.path.join( localdir, 'techcrunch_feed.pickle' ), 'wb' )
                     try:
@@ -271,12 +272,12 @@ def process_feed( yaml_items ):
             while len( yaml_items ) > 200:
                 yaml_items.pop()
 
-            cookie = Get_cookie( urllib2.Request( 'http://mediacdn.disqus.com/1078/build/system/count.js' ) )
+#            cookie = Get_cookie( urllib2.Request( 'http://mediacdn.disqus.com/1078/build/system/count.js' ) )
 
             for i in yaml_items:
                 # i['title'] = asciiize( i['title'] )
                 # i['tags'] = map( asciiize, i['tags'] )
-                process_yaml_item( i, cookie )
+                process_yaml_item( i )
 
     else:
         if hasattr(feed, 'bozo_exception'):
@@ -299,10 +300,7 @@ def process_feed( yaml_items ):
                 if print_last_line:
                     print "the feed had a URLError %s" % ( str(e), )
             elif isinstance( e, httplib.BadStatusLine ):
-                if hasattr(e, 'message'):
-                    print "the feed gave a bad status line %s." % ( str(e.message ), )
-                else:
-                    print "the feed gave a bad status line."
+                print "the feed gave a bad status line. (%s)" % ( str(e), )
             else:
                 if len( str(e) ):
                     print "the feed bozo_exception: %s \"%s\"" % ( str(e.__class__), str(e) )
@@ -352,6 +350,7 @@ def process_item( feed_item, yaml_items ):
                       'qualified'           : -1,
                       'comment_times'       : [],
                       'comments'            : [],
+                      'fb_shares'           : [],
                       'slash_comment_times' : [],
                       'slash_comments'      : [],
                       'retweet_times'       : [],
@@ -374,17 +373,19 @@ def process_item( feed_item, yaml_items ):
         yaml_item['slash_comment_times'].append( timecode_now )
         yaml_item['slash_comments'].append( int( feed_item.slash_comments ) )
 
-def process_yaml_item( yaml_item, cookie ):
+def process_yaml_item( yaml_item ):
     global any_entry_added
 
     timecode_now = int( time.time() )
     if len( yaml_item['comments'] ) < 8:
-        disqus_id = Get_disqus_id( yaml_item )
-        num_comments = Get_num_disqus_comments( yaml_item['link'], disqus_id, cookie )
+        num_shares, num_comments = Get_fb_stats( yaml_item['link'] )
+#        disqus_id = Get_disqus_id( yaml_item )
+#        num_comments = Get_num_disqus_comments( yaml_item['link'], disqus_id, cookie )
         if num_comments != -1:
             any_entry_added = True
             yaml_item['comment_times'].append( timecode_now )
             yaml_item['comments'].append( num_comments )
+            yaml_item['fb_shares'].append( num_shares )
 
     if len( yaml_item['retweets'] ) < 8:
         num_retweets = Get_num_retweets( yaml_item )
@@ -522,14 +523,17 @@ def Get_num_retweets_unused( yaml_item ):
 def Get_num_retweets( yaml_item ):
     url_string = yaml_item['link']
     try:
-        f = urllib2.urlopen( 'http://urls.api.twitter.com/1/urls/count.json?url=%s&callback=twttr.receiveCount' % urllib.quote_plus( url_string ) )
+        f = urllib2.urlopen( 'http://urls.api.twitter.com/1/urls/count.json?url=%s&callback=twttr.receiveCount' % \
+                             urllib.quote_plus( url_string ) )
         data = f.read()
         f.close()
-    except urllib2.URLError, e:
+    except (urllib2.URLError, httplib.BadStatusLine), e:
         if hasattr( e, 'reason' ):
             print "Get_num_retweets got an error:", e.reason
         elif hasattr( e, 'code' ):
             print "Get_num_retweets got an error. Code:", e.code
+        else:
+            print "Get_num_retweets got an error:", str( e )
         return -1
     tag_to_find = '"count":'
     offset = data.find( tag_to_find )
@@ -549,6 +553,35 @@ def Get_num_retweets( yaml_item ):
                           )
     return -1
 
+def Get_fb_stats( url_string ):
+    """ Returns shares and comments """
+    shares = -1
+    comments = -1
+    try:
+        f = urllib2.urlopen( 'https://graph.facebook.com/?ids=' + url_string )
+        data = f.read()
+        f.close()
+    except (urllib2.URLError, httplib.BadStatusLine), e:
+        if hasattr( e, 'reason' ): # URLError
+            print "Get_fb_stats got an error:", e.reason, url_string
+        elif hasattr( e, 'code' ): #URLError
+            print "Get_fb_stats got an error. Code:", e.code, url_string
+        else:
+            print "Get_fb_stats got an error:", str( e )
+        return -1, -1
+    if len( data ) > len( url_string ):
+        d = json.loads( data ).values()[0]
+        if d.has_key( 'shares' ):
+            shares = d['shares']
+        else:
+            shares = 0
+        if d.has_key( 'comments' ):
+            comments = d['comments']
+        else:
+            comments = 0
+    else:
+        print "Get_fb_stats got too little data for ",  url_string
+    return shares, comments
 
 
 def Save_image( url_string, file_path ):
@@ -556,12 +589,15 @@ def Save_image( url_string, file_path ):
         f = urllib2.urlopen( url_string )
         data = f.read()
         f.close()
-    except urllib2.URLError, e:
-        if hasattr( e, 'reason' ):
-            print "Save_image got an error:", e.reason
-        elif hasattr( e, 'code' ):
-            print "Save_image got an error. Code:", e.code
+    except (urllib2.URLError, httplib.BadStatusLine), e:
+        if hasattr( e, 'reason' ): # URLError
+            print "Save_image got an error attempting to create", file_path, "Reason:", e.reason
+        elif hasattr( e, 'code' ): # URLError
+            print "Save_image got an error attempting to create", file_path, "Code:", e.code
+        else:
+            print "Save_image got an error from urlopen", e
         return url_string
+
     if len( data ) > 50:
         f = open( file_path, 'wb' )
         f.write( data )
@@ -575,7 +611,7 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
     index_fullpath = os.path.join( localdir, 'index.html' )
     cache_path = os.path.join( localdir, 'cache' )
 
-    files_to_delete = glob.glob( cache_path + '*.png' )
+    files_to_delete = glob.glob( os.path.join( cache_path, '*.png' ) )
 
     f = file( new_index_fullpath, 'w' )
     f.write( html_head % ( even_background, odd_background ) )
@@ -588,41 +624,39 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
 
 
     f.write( '<div align="center">\n<table>\n' )
-    image_index = 0
-    for i in yaml_items[:40]:
+    for image_index, image in enumerate(yaml_items[:40]):
         tag_hit = False
-        if i['author'].lower() in authors_to_post:
+        if image['author'].lower() in authors_to_post:
             tag_hit = True
-        elif len( set([j.lower() for j in i['tags']]) & tags_to_post ) > 0:
+        elif len( set([j.lower() for j in image['tags']]) & tags_to_post ) > 0:
             tag_hit = True
-        chart_url = make_chart_url( i['orig_posted'],
-                                    i['comment_times'],
-                                    i['comments'],
-                                    i['retweet_times'],
-                                    i['retweets'],
-                                    i['qualified'],
+        chart_url = make_chart_url( image['orig_posted'],
+                                    image['comment_times'],
+                                    image['comments'],
+                                    image['retweet_times'],
+                                    image['retweets'],
+                                    image['qualified'],
                                     image_index % 2,
                                     tag_hit
                                   )
-#        if i['title'].startswith( 'Too ' ):
-#            print i['title'], i['qualified'], i['retweet_times']
+#        if image['title'].startswith( 'Too ' ):
+#            print image['title'], image['qualified'], image['retweet_times']
 #            print chart_url
         image_url = Save_image( chart_url, os.path.join( cache_path, '%d_%d.png' % ( cur_time, image_index ) ) )
         f.write( '<tr valign="center" class="%s">\n  <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
                  ( image_index % 2 and "even" or "odd",
-                   i['link'],
-                   i['title'].encode( 'ascii', 'xmlcharrefreplace' ),
-                   i['author'].encode( 'ascii', 'xmlcharrefreplace' ),
+                   image['link'],
+                   image['title'].encode( 'ascii', 'xmlcharrefreplace' ),
+                   image['author'].encode( 'ascii', 'xmlcharrefreplace' ),
                  )
                )
-        f.write( '  <td>%s<td>\n' % ( i['qualified'] != -1 and '<img src="star_30.png" width="30" height="29" />' or '' ) )
+        f.write( '  <td>%s<td>\n' % ( image['qualified'] != -1 and '<img src="star_30.png" width="30" height="29" />' or '' ) )
         f.write( '  <td><img src="%s" width="%d" height="%d" border="0" /></td></tr>\n' % \
                  ( image_url,
                    img_width,
                    img_height
                  )
                )
-        image_index += 1
     f.write( html_footer )
     f.close()
     if os.path.exists( index_fullpath ):
@@ -634,13 +668,15 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
 def Make_feed_file( yaml_items ):
     f = open( os.path.join( localdir, 'rss_feed.xml' ), 'wb' )
     f.write( "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n<rss version=\"2.0\">\n<channel>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>" )
-    f.write( "<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>" % ( time.strftime( "%a, %d %b %Y %H:%M:%S +0000", time.gmtime() ) ) )
+    f.write( "<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % ( time.strftime( "%a, %d %b %Y %H:%M:%S +0000", time.gmtime() ) ) )
     count = 0
     for item in yaml_items:
         now = time.strftime( "%a, %d %b %Y %H:%M:%S +0000", time.gmtime( item['orig_posted'] ) )
         if item['qualified'] != -1:
+            escaped_title = cgi.escape( item['title'] ).encode( 'ascii', 'xmlcharrefreplace' )
+            escaped_author = cgi.escape( item['author'] ).encode( 'ascii', 'xmlcharrefreplace' )
             f.write( "<item><title>%s</title><pubDate>%s</pubDate><link>%s</link><guid isPermaLink=\"false\">%s</guid><description><![CDATA[By: %s]]></description></item>\n" % \
-                     ( item['title'].encode( 'ascii', 'xmlcharrefreplace' ), now, item['link'], item['link'], item['author'].encode( 'ascii', 'xmlcharrefreplace' ) ) )
+                     ( escaped_title, now, item['link'], item['link'], escaped_author ) )
             count += 1
             if count > 14:
                 break
@@ -668,10 +704,9 @@ if __name__=='__main__':
         #     'qualified'           : -1
         #     'comment_times'       : [ 1282197199, 1282197407 ]
         #     'comments'            : [ 0, 15 ]
+        #     'fb_shares'           : [ 0, 3 ]
         #     'slash_comment_times' : [ 1282197199, 1282197407 ]
         #     'slash_comments'      : [ 0, 5 ]
-        #     'slash_comment_times' : [ 1282197199, 1282197407 ]
-        #     'slash_comments'      : [ 0, 3 ]
         #     'retweet_times'       : [ 1282197199, 1282197407 ]
         #     'retweets'            : [ 0, 43 ]
         #    },
@@ -682,6 +717,11 @@ if __name__=='__main__':
         if os.path.exists( yaml_fullpath ):
             f = file( yaml_fullpath, 'rb' )
             items = yaml.load( f )
+
+            # Do any dictionary item updating that might be necessary
+#            for item in items:
+#                if not item.has_key( 'fb_shares' ):
+#                    item['fb_shares'] = []
             f.close()
         else:
             print "could not open", yaml_fullpath
@@ -754,7 +794,7 @@ if __name__=='__main__':
         traceback.print_exc( file = sys.stdout )
         try:
             sendEmail( 'Exception thrown in techcrunch.py',
-                       exceptional_text,
+                       exceptional_text + "\n" + traceback.format_exc(),
                        ( 'david.blume@gmail.com', ) )
         except Exception, e:
             print "Could not send email to notify you of the exception. :("
@@ -777,7 +817,7 @@ if __name__=='__main__':
     lines = lines[:168] # Just keep the past week's worth
     # status = len( message.strip() ) and message.strip().replace( '\n', ' - ' ) or "OK"
     status = len( message.strip() ) and '\n                       '.join( message.splitlines() ) or "OK"
-    lines.insert( 0, "%s %3.0fs %s\n" % ( time.strftime('%H:%M, %Y-%m-%d', time.localtime()), time.time() - start_time, status ))
+    lines.insert( 0, "%s %3.0fs %s\n" % ( time.strftime('%Y-%m-%d, %H:%M', time.localtime()), time.time() - start_time, status ))
     f = open( os.path.join( localdir,'stats.txt' ), 'w' )
     f.writelines( lines )
     f.close()


...	...	@@ -1,4 +1,4 @@
1		-#!/usr/bin/python2.5
	1	+#!/usr/bin/python
2	2	# chmod 755 me, and make sure I have UNIX style newlines.
3	3	#
4	4	# techcrunch.py
...	...	@@ -10,10 +10,11 @@
10	10	# TODO:
11	11	# 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry »</a>'
12	12	# link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/"
	13	+# 2. Add Reddit counts: curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg"
13	14	#
14	15	# This file was coverted from tabs to spaces with the vim command %retab
15	16	#
16		-# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; mv techcrunch.yaml_back techcrunch.yaml
	17	+# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
17	18	#
18	19
19	20	import feedparser
...	...	@@ -35,12 +36,13 @@ import glob
35	36	import smtplib
36	37	import bisect
37	38	import analysis
38		-import simplejson as json
	39	+import json
39	40	import cookielib
40	41	import xml
41	42	import texttime
42	43	import operator
43	44	from datetime import timedelta
	45	+import cgi
44	46
45	47	debug = True
46	48	any_entry_added = False
...	...	@@ -78,7 +80,7 @@ html_footer = """
78	80	</table>
79	81	</div><br />
80	82	<div align='center'>Thanks to <a href="http://www.feedparser.org/">The Universal Feed Parser module</a>,
81		-<a href="http://pyyaml.org/">PyYAML</a> and <a href="http://code.google.com/apis/chart/">Google Charts</a>.<br /><a href="techcrunch.yaml">raw data</a> • <a href="stats.txt">status</a></div><br />
	83	+<a href="http://pyyaml.org/">PyYAML</a> and <a href="http://code.google.com/apis/chart/">Google Charts</a>.<br /><a href="techcrunch.yaml">raw data</a> • <a href="stats.txt">status</a><br />© 2011 <a href="http://david.dlma.com">David Blume</a></div><br />
82	84	</BODY>
83	85	</HTML>
84	86	"""
...	...	@@ -109,6 +111,7 @@ def asciiize( s ):
109	111	def sendEmail( subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@techcrunch.dlma.com>' ):
110	112	"""Sends Email"""
111	113	smtp = smtplib.SMTP( 'localhost' )
	114	+ smtp.login( user, passw )
112	115	smtp.sendmail( fromaddr, \
113	116	toaddrs, \
114	117	"Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \
...	...	@@ -248,10 +251,8 @@ def process_feed( yaml_items ):
248	251	print "the feed returned feed.status %d." % ( feed.status, )
249	252	else:
250	253	# Save off this
251		- if hasattr( feed, 'bozo_exception' ) and type( feed.bozo_exception ) == xml.sax._exceptions.SAXParseException:
252		- print "Didn't pickle because of bozo_exception %s." % ( str( feed.bozo_exception ) )
253		- elif hasattr( feed, 'bozo_exception' ) and isinstance( feed.bozo_exception, xml.sax._exceptions.SAXParseException ):
254		- print "Didn't pickle because of bozo_exception instance %s." % ( str( feed.bozo_exception ) )
	254	+ if hasattr( feed, 'bozo_exception' ) and isinstance( feed.bozo_exception, xml.sax._exceptions.SAXParseException ):
	255	+ print "Didn't pickle TechCrunch feed because it had a bozo_exception: %s" % ( str( feed.bozo_exception ) )
255	256	else:
256	257	f = file( os.path.join( localdir, 'techcrunch_feed.pickle' ), 'wb' )
257	258	try:
...	...	@@ -271,12 +272,12 @@ def process_feed( yaml_items ):
271	272	while len( yaml_items ) > 200:
272	273	yaml_items.pop()
273	274
274		- cookie = Get_cookie( urllib2.Request( 'http://mediacdn.disqus.com/1078/build/system/count.js' ) )
	275	+# cookie = Get_cookie( urllib2.Request( 'http://mediacdn.disqus.com/1078/build/system/count.js' ) )
275	276
276	277	for i in yaml_items:
277	278	# i['title'] = asciiize( i['title'] )
278	279	# i['tags'] = map( asciiize, i['tags'] )
279		- process_yaml_item( i, cookie )
	280	+ process_yaml_item( i )
280	281
281	282	else:
282	283	if hasattr(feed, 'bozo_exception'):
...	...	@@ -299,10 +300,7 @@ def process_feed( yaml_items ):
299	300	if print_last_line:
300	301	print "the feed had a URLError %s" % ( str(e), )
301	302	elif isinstance( e, httplib.BadStatusLine ):
302		- if hasattr(e, 'message'):
303		- print "the feed gave a bad status line %s." % ( str(e.message ), )
304		- else:
305		- print "the feed gave a bad status line."
	303	+ print "the feed gave a bad status line. (%s)" % ( str(e), )
306	304	else:
307	305	if len( str(e) ):
308	306	print "the feed bozo_exception: %s \"%s\"" % ( str(e.__class__), str(e) )
...	...	@@ -352,6 +350,7 @@ def process_item( feed_item, yaml_items ):
352	350	'qualified' : -1,
353	351	'comment_times' : [],
354	352	'comments' : [],
	353	+ 'fb_shares' : [],
355	354	'slash_comment_times' : [],
356	355	'slash_comments' : [],
357	356	'retweet_times' : [],
...	...	@@ -374,17 +373,19 @@ def process_item( feed_item, yaml_items ):
374	373	yaml_item['slash_comment_times'].append( timecode_now )
375	374	yaml_item['slash_comments'].append( int( feed_item.slash_comments ) )
376	375
377		-def process_yaml_item( yaml_item, cookie ):
	376	+def process_yaml_item( yaml_item ):
378	377	global any_entry_added
379	378
380	379	timecode_now = int( time.time() )
381	380	if len( yaml_item['comments'] ) < 8:
382		- disqus_id = Get_disqus_id( yaml_item )
383		- num_comments = Get_num_disqus_comments( yaml_item['link'], disqus_id, cookie )
	381	+ num_shares, num_comments = Get_fb_stats( yaml_item['link'] )
	382	+# disqus_id = Get_disqus_id( yaml_item )
	383	+# num_comments = Get_num_disqus_comments( yaml_item['link'], disqus_id, cookie )
384	384	if num_comments != -1:
385	385	any_entry_added = True
386	386	yaml_item['comment_times'].append( timecode_now )
387	387	yaml_item['comments'].append( num_comments )
	388	+ yaml_item['fb_shares'].append( num_shares )
388	389
389	390	if len( yaml_item['retweets'] ) < 8:
390	391	num_retweets = Get_num_retweets( yaml_item )
...	...	@@ -522,14 +523,17 @@ def Get_num_retweets_unused( yaml_item ):
522	523	def Get_num_retweets( yaml_item ):
523	524	url_string = yaml_item['link']
524	525	try:
525		- f = urllib2.urlopen( 'http://urls.api.twitter.com/1/urls/count.json?url=%s&callback=twttr.receiveCount' % urllib.quote_plus( url_string ) )
	526	+ f = urllib2.urlopen( 'http://urls.api.twitter.com/1/urls/count.json?url=%s&callback=twttr.receiveCount' % \
	527	+ urllib.quote_plus( url_string ) )
526	528	data = f.read()
527	529	f.close()
528		- except urllib2.URLError, e:
	530	+ except (urllib2.URLError, httplib.BadStatusLine), e:
529	531	if hasattr( e, 'reason' ):
530	532	print "Get_num_retweets got an error:", e.reason
531	533	elif hasattr( e, 'code' ):
532	534	print "Get_num_retweets got an error. Code:", e.code
	535	+ else:
	536	+ print "Get_num_retweets got an error:", str( e )
533	537	return -1
534	538	tag_to_find = '"count":'
535	539	offset = data.find( tag_to_find )
...	...	@@ -549,6 +553,35 @@ def Get_num_retweets( yaml_item ):
549	553	)
550	554	return -1
551	555
	556	+def Get_fb_stats( url_string ):
	557	+ """ Returns shares and comments """
	558	+ shares = -1
	559	+ comments = -1
	560	+ try:
	561	+ f = urllib2.urlopen( 'https://graph.facebook.com/?ids=' + url_string )
	562	+ data = f.read()
	563	+ f.close()
	564	+ except (urllib2.URLError, httplib.BadStatusLine), e:
	565	+ if hasattr( e, 'reason' ): # URLError
	566	+ print "Get_fb_stats got an error:", e.reason, url_string
	567	+ elif hasattr( e, 'code' ): #URLError
	568	+ print "Get_fb_stats got an error. Code:", e.code, url_string
	569	+ else:
	570	+ print "Get_fb_stats got an error:", str( e )
	571	+ return -1, -1
	572	+ if len( data ) > len( url_string ):
	573	+ d = json.loads( data ).values()[0]
	574	+ if d.has_key( 'shares' ):
	575	+ shares = d['shares']
	576	+ else:
	577	+ shares = 0
	578	+ if d.has_key( 'comments' ):
	579	+ comments = d['comments']
	580	+ else:
	581	+ comments = 0
	582	+ else:
	583	+ print "Get_fb_stats got too little data for ", url_string
	584	+ return shares, comments
552	585
553	586
554	587	def Save_image( url_string, file_path ):
...	...	@@ -556,12 +589,15 @@ def Save_image( url_string, file_path ):
556	589	f = urllib2.urlopen( url_string )
557	590	data = f.read()
558	591	f.close()
559		- except urllib2.URLError, e:
560		- if hasattr( e, 'reason' ):
561		- print "Save_image got an error:", e.reason
562		- elif hasattr( e, 'code' ):
563		- print "Save_image got an error. Code:", e.code
	592	+ except (urllib2.URLError, httplib.BadStatusLine), e:
	593	+ if hasattr( e, 'reason' ): # URLError
	594	+ print "Save_image got an error attempting to create", file_path, "Reason:", e.reason
	595	+ elif hasattr( e, 'code' ): # URLError
	596	+ print "Save_image got an error attempting to create", file_path, "Code:", e.code
	597	+ else:
	598	+ print "Save_image got an error from urlopen", e
564	599	return url_string
	600	+
565	601	if len( data ) > 50:
566	602	f = open( file_path, 'wb' )
567	603	f.write( data )
...	...	@@ -575,7 +611,7 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
575	611	index_fullpath = os.path.join( localdir, 'index.html' )
576	612	cache_path = os.path.join( localdir, 'cache' )
577	613
578		- files_to_delete = glob.glob( cache_path + '*.png' )
	614	+ files_to_delete = glob.glob( os.path.join( cache_path, '*.png' ) )
579	615
580	616	f = file( new_index_fullpath, 'w' )
581	617	f.write( html_head % ( even_background, odd_background ) )
...	...	@@ -588,41 +624,39 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
588	624
589	625
590	626	f.write( '<div align="center">\n<table>\n' )
591		- image_index = 0
592		- for i in yaml_items[:40]:
	627	+ for image_index, image in enumerate(yaml_items[:40]):
593	628	tag_hit = False
594		- if i['author'].lower() in authors_to_post:
	629	+ if image['author'].lower() in authors_to_post:
595	630	tag_hit = True
596		- elif len( set([j.lower() for j in i['tags']]) & tags_to_post ) > 0:
	631	+ elif len( set([j.lower() for j in image['tags']]) & tags_to_post ) > 0:
597	632	tag_hit = True
598		- chart_url = make_chart_url( i['orig_posted'],
599		- i['comment_times'],
600		- i['comments'],
601		- i['retweet_times'],
602		- i['retweets'],
603		- i['qualified'],
	633	+ chart_url = make_chart_url( image['orig_posted'],
	634	+ image['comment_times'],
	635	+ image['comments'],
	636	+ image['retweet_times'],
	637	+ image['retweets'],
	638	+ image['qualified'],
604	639	image_index % 2,
605	640	tag_hit
606	641	)
607		-# if i['title'].startswith( 'Too ' ):
608		-# print i['title'], i['qualified'], i['retweet_times']
	642	+# if image['title'].startswith( 'Too ' ):
	643	+# print image['title'], image['qualified'], image['retweet_times']
609	644	# print chart_url
610	645	image_url = Save_image( chart_url, os.path.join( cache_path, '%d_%d.png' % ( cur_time, image_index ) ) )
611	646	f.write( '<tr valign="center" class="%s">\n <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
612	647	( image_index % 2 and "even" or "odd",
613		- i['link'],
614		- i['title'].encode( 'ascii', 'xmlcharrefreplace' ),
615		- i['author'].encode( 'ascii', 'xmlcharrefreplace' ),
	648	+ image['link'],
	649	+ image['title'].encode( 'ascii', 'xmlcharrefreplace' ),
	650	+ image['author'].encode( 'ascii', 'xmlcharrefreplace' ),
616	651	)
617	652	)
618		- f.write( ' <td>%s<td>\n' % ( i['qualified'] != -1 and '<img src="star_30.png" width="30" height="29" />' or '' ) )
	653	+ f.write( ' <td>%s<td>\n' % ( image['qualified'] != -1 and '<img src="star_30.png" width="30" height="29" />' or '' ) )
619	654	f.write( ' <td><img src="%s" width="%d" height="%d" border="0" /></td></tr>\n' % \
620	655	( image_url,
621	656	img_width,
622	657	img_height
623	658	)
624	659	)
625		- image_index += 1
626	660	f.write( html_footer )
627	661	f.close()
628	662	if os.path.exists( index_fullpath ):
...	...	@@ -634,13 +668,15 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
634	668	def Make_feed_file( yaml_items ):
635	669	f = open( os.path.join( localdir, 'rss_feed.xml' ), 'wb' )
636	670	f.write( "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n<rss version=\"2.0\">\n<channel>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>" )
637		- f.write( "<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>" % ( time.strftime( "%a, %d %b %Y %H:%M:%S +0000", time.gmtime() ) ) )
	671	+ f.write( "<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % ( time.strftime( "%a, %d %b %Y %H:%M:%S +0000", time.gmtime() ) ) )
638	672	count = 0
639	673	for item in yaml_items:
640	674	now = time.strftime( "%a, %d %b %Y %H:%M:%S +0000", time.gmtime( item['orig_posted'] ) )
641	675	if item['qualified'] != -1:
	676	+ escaped_title = cgi.escape( item['title'] ).encode( 'ascii', 'xmlcharrefreplace' )
	677	+ escaped_author = cgi.escape( item['author'] ).encode( 'ascii', 'xmlcharrefreplace' )
642	678	f.write( "<item><title>%s</title><pubDate>%s</pubDate><link>%s</link><guid isPermaLink=\"false\">%s</guid><description><![CDATA[By: %s]]></description></item>\n" % \
643		- ( item['title'].encode( 'ascii', 'xmlcharrefreplace' ), now, item['link'], item['link'], item['author'].encode( 'ascii', 'xmlcharrefreplace' ) ) )
	679	+ ( escaped_title, now, item['link'], item['link'], escaped_author ) )
644	680	count += 1
645	681	if count > 14:
646	682	break
...	...	@@ -668,10 +704,9 @@ if __name__=='__main__':
668	704	# 'qualified' : -1
669	705	# 'comment_times' : [ 1282197199, 1282197407 ]
670	706	# 'comments' : [ 0, 15 ]
	707	+ # 'fb_shares' : [ 0, 3 ]
671	708	# 'slash_comment_times' : [ 1282197199, 1282197407 ]
672	709	# 'slash_comments' : [ 0, 5 ]
673		- # 'slash_comment_times' : [ 1282197199, 1282197407 ]
674		- # 'slash_comments' : [ 0, 3 ]
675	710	# 'retweet_times' : [ 1282197199, 1282197407 ]
676	711	# 'retweets' : [ 0, 43 ]
677	712	# },
...	...	@@ -682,6 +717,11 @@ if __name__=='__main__':
682	717	if os.path.exists( yaml_fullpath ):
683	718	f = file( yaml_fullpath, 'rb' )
684	719	items = yaml.load( f )
	720	+
	721	+ # Do any dictionary item updating that might be necessary
	722	+# for item in items:
	723	+# if not item.has_key( 'fb_shares' ):
	724	+# item['fb_shares'] = []
685	725	f.close()
686	726	else:
687	727	print "could not open", yaml_fullpath
...	...	@@ -754,7 +794,7 @@ if __name__=='__main__':
754	794	traceback.print_exc( file = sys.stdout )
755	795	try:
756	796	sendEmail( 'Exception thrown in techcrunch.py',
757		- exceptional_text,
	797	+ exceptional_text + "\n" + traceback.format_exc(),
758	798	( 'david.blume@gmail.com', ) )
759	799	except Exception, e:
760	800	print "Could not send email to notify you of the exception. :("
...	...	@@ -777,7 +817,7 @@ if __name__=='__main__':
777	817	lines = lines[:168] # Just keep the past week's worth
778	818	# status = len( message.strip() ) and message.strip().replace( '\n', ' - ' ) or "OK"
779	819	status = len( message.strip() ) and '\n '.join( message.splitlines() ) or "OK"
780		- lines.insert( 0, "%s %3.0fs %s\n" % ( time.strftime('%H:%M, %Y-%m-%d', time.localtime()), time.time() - start_time, status ))
	820	+ lines.insert( 0, "%s %3.0fs %s\n" % ( time.strftime('%Y-%m-%d, %H:%M', time.localtime()), time.time() - start_time, status ))
781	821	f = open( os.path.join( localdir,'stats.txt' ), 'w' )
782	822	f.writelines( lines )
783	823	f.close()
784	824