2011-02-04: Algorithm changes (tags and author checked), new chart drawing, spaces used instead of tabs. (b3d0967)

techcrunch.py

@@ -10,6 +10,11 @@
 # TODO:
 # 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry &raquo;</a>'
 #   link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/"
+#
+# This file was coverted from tabs to spaces with the vim command %retab
+#
+# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; mv techcrunch.yaml_back techcrunch.yaml
+#
 
 import feedparser
 import yaml
@@ -32,9 +37,14 @@ import bisect
 import analysis
 import simplejson as json
 import cookielib
+import xml
+import texttime
+from datetime import timedelta
 
 debug = True
 any_entry_added = False
+tags_to_post = set([ 'apple', 'google'])
+authors_to_post = [ 'michael arrington', ]
 
 localdir = ''
 
@@ -78,6 +88,7 @@ img_height = 50
 series_1_color = "0000FF"
 series_2_color = "00AA00"
 threshold_color = "FF8C00"
+tag_color = "F01000"
 
 even_background = "F8F8F8"
 #even_background = "FFFFFF"
@@ -100,7 +111,21 @@ def sendEmail( subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@
                    ( fromaddr, ", ".join( toaddrs ), subject, message ) )
     smtp.quit()
 
-def make_chart_url( time_posted, comment_times, comment_values, retweet_times, retweet_values, met_threshold_pt, bg_color ):
+def index_id(a_list, elem):
+    try:
+        return (index for index, item in enumerate( a_list ) if item == elem).next()
+    except:
+        return -1
+
+def index_id_simple(a_list, elem):
+    index = 0
+    for item in a_list:
+        if item == elem:
+            return index
+        index += 1
+    return -1
+
+def make_chart_url( time_posted, comment_times, comment_values, retweet_times, retweet_values, met_threshold_pt, bg_color, tag_hit ):
 #    comment_times, comment_values = zip( *comments )
 #    retweet_times, retweet_values = zip( *retweets )
 
@@ -149,8 +174,16 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, r
                                           ','.join( [ str( n ) for n in comment_values ] ),
                                           ','.join( [ str( n ) for n in retweet_times ] ),
                                           ','.join( [ str( n ) for n in retweet_values ] ) )
+    # TODO: Consider watermark levels, like:
+    # chm=h,B0B0B0,1,0.3,1|r,E0E0E0,0,0,0.5
     if met_threshold_pt != -1:
-        chart_url += "&chm=o,%s,1,%d,10" % ( threshold_color, met_threshold_pt )
+        if tag_hit:
+            dot_color = tag_color
+            dot_shape = 'd'
+        else:
+            dot_color = threshold_color
+            dot_shape = 'o'
+        chart_url += "&chm=%s,%s,1,%d,10" % ( dot_shape, dot_color, met_threshold_pt )
     chart_url += "&chxt=y,r&chxl=0:|%d|%d|1:|%d|%d&chds=%d,%d,%d,%d,%d,%d,%d,%d" % \
                  ( min_comment_value, max_comment_value, min_retweet_value, max_retweet_value,
                    0, max( 7, max_comment_time ),
@@ -189,6 +222,11 @@ def process_feed( yaml_items ):
                     print "the feed returned feed.status %d." % ( feed.status, )
             else:
                 # Save off this
+                if hasattr( feed, 'bozo_exception' ) and type( feed.bozo_exception ) == xml.sax._exceptions.SAXParseException:
+                    print "Didn't pickle because of bozo_exception %s." % ( str( feed.bozo_exception ) )
+                elif hasattr( feed, 'bozo_exception' ) and isinstance( feed.bozo_exception, xml.sax._exceptions.SAXParseException ):
+                    print "Didn't pickle because of bozo_exception instance %s." % ( str( feed.bozo_exception ) )
+                else:
                     f = file( os.path.join( localdir, 'techcrunch_feed.pickle' ), 'wb' )
                     try:
                         pickle.dump( feed, f )
@@ -196,7 +234,7 @@ def process_feed( yaml_items ):
                         print "An error occurred while pickling the feed: %s." % \
                               ( # str(e.__class__),
                                 str(e) )
-                    traceback.print_exc( file = sys.stdout )
+                        traceback.print_exc( 3, file = sys.stdout )
                         feed_is_modified = False
                     f.close()
 
@@ -265,7 +303,10 @@ def process_item( feed_item, yaml_items ):
     # Look for i.feedburner_origlink in yaml_items
     yaml_item = None
     for i in yaml_items:
-        if feed_item.feedburner_origlink == i['link']:
+        if hasattr( feed_item, 'feedburner_origlink' ) and feed_item.feedburner_origlink == i['link']:
+            yaml_item = i
+            break
+        elif feed_item.link == i['link']:
             yaml_item = i
             break
     if not yaml_item:
@@ -320,7 +361,7 @@ def process_yaml_item( yaml_item, cookie ):
             yaml_item['comments'].append( num_comments )
 
     if len( yaml_item['retweets'] ) < 8:
-        num_retweets = Get_num_retweets( yaml_item['link'] )
+        num_retweets = Get_num_retweets( yaml_item )
         if num_retweets != -1:
             any_entry_added = True
             yaml_item['retweet_times'].append( timecode_now )
@@ -375,7 +416,10 @@ def Get_disqus_id( yaml_item ):
         if hasattr( e, 'reason' ):
             print "Get_disqus_id got an error:", e.reason
         elif hasattr( e, 'code' ):
-            print "Get_disqus_id got an error. Code:", e.code
+            print "Get_disqus_id got an error. Code:", e.code, yaml_item['link']
+        return url_get_data
+    except httplib.BadStatusLine, e:
+        print "Get_discus_id got a BadStatusLine:", str( e )
         return url_get_data
 
     tag_to_find = '<a href="#comments" rel="nofollow"><span class="dsq-postid" rel="'
@@ -407,6 +451,7 @@ def Get_num_disqus_comments( url_string, disqus_id, cookie ):
             print "Get_num_disqus_comments got an error getting the count:", e.reason
         elif hasattr( e, 'code' ):
             print "Get_num_disqus_comments got an error getting the count. Code:", e.code
+        return -1
     disqus_tag_to_find = 'displayCount('
     disqus_offset = disqus_data.find( disqus_tag_to_find )
     if disqus_offset != -1:
@@ -418,7 +463,8 @@ def Get_num_disqus_comments( url_string, disqus_id, cookie ):
         print "Get_num_disqus_comments found no disqus tag for", url_string
     return -1
 
-def Get_num_retweets( url_string ):
+def Get_num_retweets_unused( yaml_item ):
+    url_string = yaml_item['link']
     try:
         f = urllib2.urlopen( 'http://api.tweetmeme.com/button.js?url=%s' % ( url_string ) )
         data = f.read()
@@ -435,9 +481,50 @@ def Get_num_retweets( url_string ):
         start_pos = offset + len( tag_to_find )
         end_pos = data.find( '<', start_pos )
         if end_pos != -1:
+            try:
                 return int( data[ start_pos:end_pos ] )
+            except ValueError, e:
+                if data[ start_pos:end_pos ] != '?':
+                    print "Get_num_retweets expected a number but got \"%s\"" % ( data[ start_pos:end_pos ], )
+                else:
+                    print "Get_num_retweets got '?' for \"%s...\", posted %s ago." % \
+                          ( yaml_item['title'][:20],
+                            texttime.stringify( timedelta( seconds = time.time() - yaml_item['orig_posted'] ) )
+                          )
     return -1
 
+def Get_num_retweets( yaml_item ):
+    url_string = yaml_item['link']
+    try:
+        f = urllib2.urlopen( 'http://urls.api.twitter.com/1/urls/count.json?url=%s&callback=twttr.receiveCount' % urllib.quote_plus( url_string ) )
+        data = f.read()
+        f.close()
+    except urllib2.URLError, e:
+        if hasattr( e, 'reason' ):
+            print "Get_num_retweets got an error:", e.reason
+        elif hasattr( e, 'code' ):
+            print "Get_num_retweets got an error. Code:", e.code
+        return -1
+    tag_to_find = '"count":'
+    offset = data.find( tag_to_find )
+    if offset != -1:
+        start_pos = offset + len( tag_to_find )
+        end_pos = data.find( ',', start_pos )
+        if end_pos != -1:
+            try:
+                return int( data[ start_pos:end_pos ] )
+            except ValueError, e:
+                if data[ start_pos:end_pos ] != '?':
+                    print "Get_num_retweets expected a number but got \"%s\"" % ( data[ start_pos:end_pos ], )
+                else:
+                    print "Get_num_retweets got '?' for \"%s...\", posted %s ago." % \
+                          ( yaml_item['title'][:20],
+                            texttime.stringify( timedelta( seconds = time.time() - yaml_item['orig_posted'] ) )
+                          )
+    return -1
+
+
+
 def Save_image( url_string, file_path ):
     try:
         f = urllib2.urlopen( url_string )
@@ -456,7 +543,7 @@ def Save_image( url_string, file_path ):
         return 'cache/' + os.path.basename( file_path )
     return url_string
 
-def Make_index_html( yaml_items, stats ):
+def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
     cur_time = int( time.time() )
     new_index_fullpath = os.path.join( localdir, 'index.html_new' )
     index_fullpath = os.path.join( localdir, 'index.html' )
@@ -470,14 +557,25 @@ def Make_index_html( yaml_items, stats ):
     f.write( html_head % ( even_background, odd_background ) )
 #    f.write( '<div align="center">\n<table cellpadding="4">' )
 
-    f.write( '<div align="center">\n<table class="legend">\n<tr><th>0:30</th><th>1:00</th><th>1:30</th><th>2:00</th><th>2:30</th><th>3:00</th><th>3:30</th><th>4:00</th></tr><tr>' )
-    for median, mean, std_dev in stats:
-    f.write( '<td>med=%1.1f &#956;=%1.1f &#963;=%1.1f&nbsp;</td> ' % ( median, mean, std_dev ) )
-    f.write( '</tr>\n</table></div>\n<br />\n' )
+#    f.write( '<div align="center">\n<table class="legend">\n<tr><th>0:30</th><th>1:00</th><th>1:30</th><th>2:00</th><th>2:30</th><th>3:00</th><th>3:30</th><th>4:00</th></tr><tr>' )
+#    for median, mean, std_dev in weekday_stats:
+#        f.write( '<td>med=%1.1f &#956;=%1.1f &#963;=%1.1f&nbsp;</td> ' % ( median, mean, std_dev ) )
+#    f.write( '</tr>\n</table></div>\n<br />\n' )
+
+    f.write( '<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n' )
+    f.write( '<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % ( weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2] ) )
+    f.write( '<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % ( weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2] ) )
+    f.write( '</table></div>\n<br />\n' )
+
 
     f.write( '<div align="center">\n<table>\n' )
     image_index = 0
     for i in yaml_items[:40]:
+        tag_hit = False
+        if i['author'].lower() in authors_to_post:
+            tag_hit = True
+        elif len( set([j.lower() for j in i['tags']]) & tags_to_post ) > 0:
+            tag_hit = True
         chart_url = make_chart_url( i['orig_posted'],
                                     i['comment_times'],
                                     i['comments'],
@@ -485,7 +583,11 @@ def Make_index_html( yaml_items, stats ):
                                     i['retweets'],
                                     i['qualified'],
                                     image_index % 2 and even_background or odd_background,
+                                    tag_hit
                                   )
+#        if i['title'].startswith( 'Verizon To' ):
+#            print i['title'], i['qualified'], i['retweet_times']
+#            print chart_url
         image_url = Save_image( chart_url, os.path.join( cache_path, '%d_%d.png' % ( cur_time, image_index ) ) )
         f.write( '<tr valign="center" class="%s">\n  <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
                  ( image_index % 2 and "even" or "odd",
@@ -572,14 +674,21 @@ if __name__=='__main__':
         #
         # If any work was done, then write files.
         #
-        if True or any_entry_added:
+        if any_entry_added:
 
-            stats = analysis.Process_retweets_for_feed( items )
+            weekend_stats, weekday_stats = analysis.Process_retweets_for_feed( items )
 
             # We'll only look at the stats for the time 1:00 to 1:30 after posting.
-        median, mean, sigma = stats[2]
-        threshold = median + sigma
+            weekend_median, weekend_mean, weekend_sigma = weekend_stats[2]
+            weekend_threshold = weekend_mean + weekend_sigma
+            weekday_median, weekday_mean, weekday_sigma = weekday_stats[2]
+            weekday_threshold = weekday_mean + weekday_sigma
             for item in items:
+                wday = time.localtime( item['orig_posted'] ).tm_wday
+                if wday == 5 or wday == 6:
+                    threshold = weekend_threshold
+                else:
+                    threshold = weekday_threshold
                 if item['qualified'] == -1:
                     for i in range( len( item['retweet_times'] ) ):
                         r_time = item['retweet_times'][i]
@@ -589,12 +698,24 @@ if __name__=='__main__':
                             if r_time - item['orig_posted'] >= 3600:
                                 break
 
+            # Automatically add those items whose authors and tags I like
+            for item in items:
+                if item['qualified'] == -1 and len( item['retweet_times'] ) > 0:
+                    if item['author'].lower() in authors_to_post:
+                        item['qualified'] = 0
+                    elif len( set([j.lower() for j in item['tags']]) & tags_to_post ) > 0:
+                        item['qualified'] = 0
+
             #
             # Write out the updated yaml file.
             #
-            f = file( yaml_fullpath, 'wb' )
+
+            # For the one file we really use, write to a file on the side, then move it.
+            yaml_newfile_fullpath = os.path.join( localdir, 'techcrunch_temp_writable.yaml' )
+            f = file( yaml_newfile_fullpath, 'wb' )
             yaml.dump( items, f, width=120 )
             f.close()
+            os.rename( yaml_newfile_fullpath, yaml_fullpath )
             f = file( os.path.join( localdir, 'techcrunch_text.yaml' ), 'w' )
             yaml.dump( items, f, width=120 )
             f.close()
@@ -604,7 +725,7 @@ if __name__=='__main__':
 
             Make_feed_file( items )
 
-            Make_index_html( items, stats )
+            Make_index_html( items, weekend_stats, weekday_stats )
         else:
             print "No entries were added this time."
 
@@ -635,7 +756,8 @@ if __name__=='__main__':
     else:
         lines = []
     lines = lines[:168] # Just keep the past week's worth
-    status = len( message.strip() ) and message.strip().replace( '\n', ' - ' ) or "OK"
+    # status = len( message.strip() ) and message.strip().replace( '\n', ' - ' ) or "OK"
+    status = len( message.strip() ) and '\n                       '.join( message.splitlines() ) or "OK"
     lines.insert( 0, "%s %3.0fs %s\n" % ( time.strftime('%H:%M, %Y-%m-%d', time.localtime()), time.time() - start_time, status ))
     f = open( os.path.join( localdir,'stats.txt' ), 'w' )
     f.writelines( lines )


...	...	@@ -10,6 +10,11 @@
10	10	# TODO:
11	11	# 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry »</a>'
12	12	# link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/"
	13	+#
	14	+# This file was coverted from tabs to spaces with the vim command %retab
	15	+#
	16	+# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; mv techcrunch.yaml_back techcrunch.yaml
	17	+#
13	18
14	19	import feedparser
15	20	import yaml
...	...	@@ -32,9 +37,14 @@ import bisect
32	37	import analysis
33	38	import simplejson as json
34	39	import cookielib
	40	+import xml
	41	+import texttime
	42	+from datetime import timedelta
35	43
36	44	debug = True
37	45	any_entry_added = False
	46	+tags_to_post = set([ 'apple', 'google'])
	47	+authors_to_post = [ 'michael arrington', ]
38	48
39	49	localdir = ''
40	50
...	...	@@ -78,6 +88,7 @@ img_height = 50
78	88	series_1_color = "0000FF"
79	89	series_2_color = "00AA00"
80	90	threshold_color = "FF8C00"
	91	+tag_color = "F01000"
81	92
82	93	even_background = "F8F8F8"
83	94	#even_background = "FFFFFF"
...	...	@@ -100,7 +111,21 @@ def sendEmail( subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@
100	111	( fromaddr, ", ".join( toaddrs ), subject, message ) )
101	112	smtp.quit()
102	113
103		-def make_chart_url( time_posted, comment_times, comment_values, retweet_times, retweet_values, met_threshold_pt, bg_color ):
	114	+def index_id(a_list, elem):
	115	+ try:
	116	+ return (index for index, item in enumerate( a_list ) if item == elem).next()
	117	+ except:
	118	+ return -1
	119	+
	120	+def index_id_simple(a_list, elem):
	121	+ index = 0
	122	+ for item in a_list:
	123	+ if item == elem:
	124	+ return index
	125	+ index += 1
	126	+ return -1
	127	+
	128	+def make_chart_url( time_posted, comment_times, comment_values, retweet_times, retweet_values, met_threshold_pt, bg_color, tag_hit ):
104	129	# comment_times, comment_values = zip( *comments )
105	130	# retweet_times, retweet_values = zip( *retweets )
106	131
...	...	@@ -149,8 +174,16 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, r
149	174	','.join( [ str( n ) for n in comment_values ] ),
150	175	','.join( [ str( n ) for n in retweet_times ] ),
151	176	','.join( [ str( n ) for n in retweet_values ] ) )
	177	+ # TODO: Consider watermark levels, like:
	178	+ # chm=h,B0B0B0,1,0.3,1\|r,E0E0E0,0,0,0.5
152	179	if met_threshold_pt != -1:
153		- chart_url += "&chm=o,%s,1,%d,10" % ( threshold_color, met_threshold_pt )
	180	+ if tag_hit:
	181	+ dot_color = tag_color
	182	+ dot_shape = 'd'
	183	+ else:
	184	+ dot_color = threshold_color
	185	+ dot_shape = 'o'
	186	+ chart_url += "&chm=%s,%s,1,%d,10" % ( dot_shape, dot_color, met_threshold_pt )
154	187	chart_url += "&chxt=y,r&chxl=0:\|%d\|%d\|1:\|%d\|%d&chds=%d,%d,%d,%d,%d,%d,%d,%d" % \
155	188	( min_comment_value, max_comment_value, min_retweet_value, max_retweet_value,
156	189	0, max( 7, max_comment_time ),
...	...	@@ -189,6 +222,11 @@ def process_feed( yaml_items ):
189	222	print "the feed returned feed.status %d." % ( feed.status, )
190	223	else:
191	224	# Save off this
	225	+ if hasattr( feed, 'bozo_exception' ) and type( feed.bozo_exception ) == xml.sax._exceptions.SAXParseException:
	226	+ print "Didn't pickle because of bozo_exception %s." % ( str( feed.bozo_exception ) )
	227	+ elif hasattr( feed, 'bozo_exception' ) and isinstance( feed.bozo_exception, xml.sax._exceptions.SAXParseException ):
	228	+ print "Didn't pickle because of bozo_exception instance %s." % ( str( feed.bozo_exception ) )
	229	+ else:
192	230	f = file( os.path.join( localdir, 'techcrunch_feed.pickle' ), 'wb' )
193	231	try:
194	232	pickle.dump( feed, f )
...	...	@@ -196,7 +234,7 @@ def process_feed( yaml_items ):
196	234	print "An error occurred while pickling the feed: %s." % \
197	235	( # str(e.__class__),
198	236	str(e) )
199		- traceback.print_exc( file = sys.stdout )
	237	+ traceback.print_exc( 3, file = sys.stdout )
200	238	feed_is_modified = False
201	239	f.close()
202	240
...	...	@@ -265,7 +303,10 @@ def process_item( feed_item, yaml_items ):
265	303	# Look for i.feedburner_origlink in yaml_items
266	304	yaml_item = None
267	305	for i in yaml_items:
268		- if feed_item.feedburner_origlink == i['link']:
	306	+ if hasattr( feed_item, 'feedburner_origlink' ) and feed_item.feedburner_origlink == i['link']:
	307	+ yaml_item = i
	308	+ break
	309	+ elif feed_item.link == i['link']:
269	310	yaml_item = i
270	311	break
271	312	if not yaml_item:
...	...	@@ -320,7 +361,7 @@ def process_yaml_item( yaml_item, cookie ):
320	361	yaml_item['comments'].append( num_comments )
321	362
322	363	if len( yaml_item['retweets'] ) < 8:
323		- num_retweets = Get_num_retweets( yaml_item['link'] )
	364	+ num_retweets = Get_num_retweets( yaml_item )
324	365	if num_retweets != -1:
325	366	any_entry_added = True
326	367	yaml_item['retweet_times'].append( timecode_now )
...	...	@@ -375,7 +416,10 @@ def Get_disqus_id( yaml_item ):
375	416	if hasattr( e, 'reason' ):
376	417	print "Get_disqus_id got an error:", e.reason
377	418	elif hasattr( e, 'code' ):
378		- print "Get_disqus_id got an error. Code:", e.code
	419	+ print "Get_disqus_id got an error. Code:", e.code, yaml_item['link']
	420	+ return url_get_data
	421	+ except httplib.BadStatusLine, e:
	422	+ print "Get_discus_id got a BadStatusLine:", str( e )
379	423	return url_get_data
380	424
381	425	tag_to_find = '<a href="#comments" rel="nofollow"><span class="dsq-postid" rel="'
...	...	@@ -407,6 +451,7 @@ def Get_num_disqus_comments( url_string, disqus_id, cookie ):
407	451	print "Get_num_disqus_comments got an error getting the count:", e.reason
408	452	elif hasattr( e, 'code' ):
409	453	print "Get_num_disqus_comments got an error getting the count. Code:", e.code
	454	+ return -1
410	455	disqus_tag_to_find = 'displayCount('
411	456	disqus_offset = disqus_data.find( disqus_tag_to_find )
412	457	if disqus_offset != -1:
...	...	@@ -418,7 +463,8 @@ def Get_num_disqus_comments( url_string, disqus_id, cookie ):
418	463	print "Get_num_disqus_comments found no disqus tag for", url_string
419	464	return -1
420	465
421		-def Get_num_retweets( url_string ):
	466	+def Get_num_retweets_unused( yaml_item ):
	467	+ url_string = yaml_item['link']
422	468	try:
423	469	f = urllib2.urlopen( 'http://api.tweetmeme.com/button.js?url=%s' % ( url_string ) )
424	470	data = f.read()
...	...	@@ -435,9 +481,50 @@ def Get_num_retweets( url_string ):
435	481	start_pos = offset + len( tag_to_find )
436	482	end_pos = data.find( '<', start_pos )
437	483	if end_pos != -1:
	484	+ try:
438	485	return int( data[ start_pos:end_pos ] )
	486	+ except ValueError, e:
	487	+ if data[ start_pos:end_pos ] != '?':
	488	+ print "Get_num_retweets expected a number but got \"%s\"" % ( data[ start_pos:end_pos ], )
	489	+ else:
	490	+ print "Get_num_retweets got '?' for \"%s...\", posted %s ago." % \
	491	+ ( yaml_item['title'][:20],
	492	+ texttime.stringify( timedelta( seconds = time.time() - yaml_item['orig_posted'] ) )
	493	+ )
439	494	return -1
440	495
	496	+def Get_num_retweets( yaml_item ):
	497	+ url_string = yaml_item['link']
	498	+ try:
	499	+ f = urllib2.urlopen( 'http://urls.api.twitter.com/1/urls/count.json?url=%s&callback=twttr.receiveCount' % urllib.quote_plus( url_string ) )
	500	+ data = f.read()
	501	+ f.close()
	502	+ except urllib2.URLError, e:
	503	+ if hasattr( e, 'reason' ):
	504	+ print "Get_num_retweets got an error:", e.reason
	505	+ elif hasattr( e, 'code' ):
	506	+ print "Get_num_retweets got an error. Code:", e.code
	507	+ return -1
	508	+ tag_to_find = '"count":'
	509	+ offset = data.find( tag_to_find )
	510	+ if offset != -1:
	511	+ start_pos = offset + len( tag_to_find )
	512	+ end_pos = data.find( ',', start_pos )
	513	+ if end_pos != -1:
	514	+ try:
	515	+ return int( data[ start_pos:end_pos ] )
	516	+ except ValueError, e:
	517	+ if data[ start_pos:end_pos ] != '?':
	518	+ print "Get_num_retweets expected a number but got \"%s\"" % ( data[ start_pos:end_pos ], )
	519	+ else:
	520	+ print "Get_num_retweets got '?' for \"%s...\", posted %s ago." % \
	521	+ ( yaml_item['title'][:20],
	522	+ texttime.stringify( timedelta( seconds = time.time() - yaml_item['orig_posted'] ) )
	523	+ )
	524	+ return -1
	525	+
	526	+
	527	+
441	528	def Save_image( url_string, file_path ):
442	529	try:
443	530	f = urllib2.urlopen( url_string )
...	...	@@ -456,7 +543,7 @@ def Save_image( url_string, file_path ):
456	543	return 'cache/' + os.path.basename( file_path )
457	544	return url_string
458	545
459		-def Make_index_html( yaml_items, stats ):
	546	+def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
460	547	cur_time = int( time.time() )
461	548	new_index_fullpath = os.path.join( localdir, 'index.html_new' )
462	549	index_fullpath = os.path.join( localdir, 'index.html' )
...	...	@@ -470,14 +557,25 @@ def Make_index_html( yaml_items, stats ):
470	557	f.write( html_head % ( even_background, odd_background ) )
471	558	# f.write( '<div align="center">\n<table cellpadding="4">' )
472	559
473		- f.write( '<div align="center">\n<table class="legend">\n<tr><th>0:30</th><th>1:00</th><th>1:30</th><th>2:00</th><th>2:30</th><th>3:00</th><th>3:30</th><th>4:00</th></tr><tr>' )
474		- for median, mean, std_dev in stats:
475		- f.write( '<td>med=%1.1f μ=%1.1f σ=%1.1f </td> ' % ( median, mean, std_dev ) )
476		- f.write( '</tr>\n</table></div>\n<br />\n' )
	560	+# f.write( '<div align="center">\n<table class="legend">\n<tr><th>0:30</th><th>1:00</th><th>1:30</th><th>2:00</th><th>2:30</th><th>3:00</th><th>3:30</th><th>4:00</th></tr><tr>' )
	561	+# for median, mean, std_dev in weekday_stats:
	562	+# f.write( '<td>med=%1.1f μ=%1.1f σ=%1.1f </td> ' % ( median, mean, std_dev ) )
	563	+# f.write( '</tr>\n</table></div>\n<br />\n' )
	564	+
	565	+ f.write( '<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n' )
	566	+ f.write( '<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % ( weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2] ) )
	567	+ f.write( '<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % ( weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2] ) )
	568	+ f.write( '</table></div>\n<br />\n' )
	569	+
477	570
478	571	f.write( '<div align="center">\n<table>\n' )
479	572	image_index = 0
480	573	for i in yaml_items[:40]:
	574	+ tag_hit = False
	575	+ if i['author'].lower() in authors_to_post:
	576	+ tag_hit = True
	577	+ elif len( set([j.lower() for j in i['tags']]) & tags_to_post ) > 0:
	578	+ tag_hit = True
481	579	chart_url = make_chart_url( i['orig_posted'],
482	580	i['comment_times'],
483	581	i['comments'],
...	...	@@ -485,7 +583,11 @@ def Make_index_html( yaml_items, stats ):
485	583	i['retweets'],
486	584	i['qualified'],
487	585	image_index % 2 and even_background or odd_background,
	586	+ tag_hit
488	587	)
	588	+# if i['title'].startswith( 'Verizon To' ):
	589	+# print i['title'], i['qualified'], i['retweet_times']
	590	+# print chart_url
489	591	image_url = Save_image( chart_url, os.path.join( cache_path, '%d_%d.png' % ( cur_time, image_index ) ) )
490	592	f.write( '<tr valign="center" class="%s">\n <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
491	593	( image_index % 2 and "even" or "odd",
...	...	@@ -572,14 +674,21 @@ if __name__=='__main__':
572	674	#
573	675	# If any work was done, then write files.
574	676	#
575		- if True or any_entry_added:
	677	+ if any_entry_added:
576	678
577		- stats = analysis.Process_retweets_for_feed( items )
	679	+ weekend_stats, weekday_stats = analysis.Process_retweets_for_feed( items )
578	680
579	681	# We'll only look at the stats for the time 1:00 to 1:30 after posting.
580		- median, mean, sigma = stats[2]
581		- threshold = median + sigma
	682	+ weekend_median, weekend_mean, weekend_sigma = weekend_stats[2]
	683	+ weekend_threshold = weekend_mean + weekend_sigma
	684	+ weekday_median, weekday_mean, weekday_sigma = weekday_stats[2]
	685	+ weekday_threshold = weekday_mean + weekday_sigma
582	686	for item in items:
	687	+ wday = time.localtime( item['orig_posted'] ).tm_wday
	688	+ if wday == 5 or wday == 6:
	689	+ threshold = weekend_threshold
	690	+ else:
	691	+ threshold = weekday_threshold
583	692	if item['qualified'] == -1:
584	693	for i in range( len( item['retweet_times'] ) ):
585	694	r_time = item['retweet_times'][i]
...	...	@@ -589,12 +698,24 @@ if __name__=='__main__':
589	698	if r_time - item['orig_posted'] >= 3600:
590	699	break
591	700
	701	+ # Automatically add those items whose authors and tags I like
	702	+ for item in items:
	703	+ if item['qualified'] == -1 and len( item['retweet_times'] ) > 0:
	704	+ if item['author'].lower() in authors_to_post:
	705	+ item['qualified'] = 0
	706	+ elif len( set([j.lower() for j in item['tags']]) & tags_to_post ) > 0:
	707	+ item['qualified'] = 0
	708	+
592	709	#
593	710	# Write out the updated yaml file.
594	711	#
595		- f = file( yaml_fullpath, 'wb' )
	712	+
	713	+ # For the one file we really use, write to a file on the side, then move it.
	714	+ yaml_newfile_fullpath = os.path.join( localdir, 'techcrunch_temp_writable.yaml' )
	715	+ f = file( yaml_newfile_fullpath, 'wb' )
596	716	yaml.dump( items, f, width=120 )
597	717	f.close()
	718	+ os.rename( yaml_newfile_fullpath, yaml_fullpath )
598	719	f = file( os.path.join( localdir, 'techcrunch_text.yaml' ), 'w' )
599	720	yaml.dump( items, f, width=120 )
600	721	f.close()
...	...	@@ -604,7 +725,7 @@ if __name__=='__main__':
604	725
605	726	Make_feed_file( items )
606	727
607		- Make_index_html( items, stats )
	728	+ Make_index_html( items, weekend_stats, weekday_stats )
608	729	else:
609	730	print "No entries were added this time."
610	731
...	...	@@ -635,7 +756,8 @@ if __name__=='__main__':
635	756	else:
636	757	lines = []
637	758	lines = lines[:168] # Just keep the past week's worth
638		- status = len( message.strip() ) and message.strip().replace( '\n', ' - ' ) or "OK"
	759	+ # status = len( message.strip() ) and message.strip().replace( '\n', ' - ' ) or "OK"
	760	+ status = len( message.strip() ) and '\n '.join( message.splitlines() ) or "OK"
639	761	lines.insert( 0, "%s %3.0fs %s\n" % ( time.strftime('%H:%M, %Y-%m-%d', time.localtime()), time.time() - start_time, status ))
640	762	f = open( os.path.join( localdir,'stats.txt' ), 'w' )
641	763	f.writelines( lines )
642	764

techcrunch.git