2011-02-04: Update to the chart drawing algorithm. (2b4c980) - techcrunch.git

techcrunch.py

@@ -39,6 +39,7 @@ import simplejson as json
 import cookielib
 import xml
 import texttime
+import operator
 from datetime import timedelta
 
 debug = True
@@ -91,9 +92,12 @@ threshold_color = "FF8C00"
 tag_color = "F01000"
 
 even_background = "F8F8F8"
-#even_background = "FFFFFF"
 odd_background = "E8E8E8"
 
+even_watermark = "E0E0FF"
+odd_watermark = "D0D0F0"
+
+
 def asciiize( s ):
     try:
         return s.encode( 'ascii' )
@@ -111,9 +115,9 @@ def sendEmail( subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@
                    ( fromaddr, ", ".join( toaddrs ), subject, message ) )
     smtp.quit()
 
-def index_id(a_list, elem):
+def index_id( a_list, op, elem ):
     try:
-        return (index for index, item in enumerate( a_list ) if item == elem).next()
+        return (index for index, item in enumerate( a_list ) if op( item, elem ) ).next()
     except:
         return -1
 
@@ -125,7 +129,8 @@ def index_id_simple(a_list, elem):
         index += 1
     return -1
 
-def make_chart_url( time_posted, comment_times, comment_values, retweet_times, retweet_values, met_threshold_pt, bg_color, tag_hit ):
+def make_chart_url( time_posted, comment_times, comment_values, retweet_times,
+                    retweet_values, threshold_value, is_odd_row, tag_hit ):
 #    comment_times, comment_values = zip( *comments )
 #    retweet_times, retweet_values = zip( *retweets )
 
@@ -154,6 +159,22 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, r
     min_retweet_value = min( retweet_values )
     max_retweet_value = max( retweet_values )
 
+    met_threshold_pt = -1
+    if threshold_value != -1:
+        met_threshold_pt = index_id( retweet_values, operator.ge, threshold_value )
+        if met_threshold_pt == -1 or tag_hit:
+            # This can happen if threshold_value was set to a number
+            # because the author or a tag was matched, but the article
+            # was unpopular. We choose to put a marker at point index 0.
+            met_threshold_pt = 0
+
+    if is_odd_row != 0:
+        bg_color = even_background
+        watermark_color = even_watermark
+    else:
+        bg_color = odd_background
+        watermark_color = odd_watermark
+
     if len( comment_values ) < 8 and len( comment_values ) > 1:
         # max_comment_value *= 2
         pass
@@ -176,6 +197,11 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, r
                                           ','.join( [ str( n ) for n in retweet_values ] ) )
     # TODO: Consider watermark levels, like:
     # chm=h,B0B0B0,1,0.3,1|r,E0E0E0,0,0,0.5
+    if max_retweet_value > 0:
+        threshold_percent = max( 0, min( (float(threshold_value) / max_retweet_value) - 0.01, 1.0  ) )
+    else:
+        threshold_percent = 1.0
+    chart_url += "&chm=r,%s,0,0,%1.3f" % ( watermark_color, threshold_percent )
     if met_threshold_pt != -1:
         if tag_hit:
             dot_color = tag_color
@@ -183,7 +209,7 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, r
         else:
             dot_color = threshold_color
             dot_shape = 'o'
-        chart_url += "&chm=%s,%s,1,%d,10" % ( dot_shape, dot_color, met_threshold_pt )
+        chart_url += "|%s,%s,1,%d,10" % ( dot_shape, dot_color, met_threshold_pt )
     chart_url += "&chxt=y,r&chxl=0:|%d|%d|1:|%d|%d&chds=%d,%d,%d,%d,%d,%d,%d,%d" % \
                  ( min_comment_value, max_comment_value, min_retweet_value, max_retweet_value,
                    0, max( 7, max_comment_time ),
@@ -550,17 +576,10 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
     cache_path = os.path.join( localdir, 'cache' )
 
     files_to_delete = glob.glob( cache_path + '*.png' )
-#    shutil.rmtree( cache_path )
-#    os.mkdir( cache_path )
 
     f = file( new_index_fullpath, 'w' )
     f.write( html_head % ( even_background, odd_background ) )
-#    f.write( '<div align="center">\n<table cellpadding="4">' )
 
-#    f.write( '<div align="center">\n<table class="legend">\n<tr><th>0:30</th><th>1:00</th><th>1:30</th><th>2:00</th><th>2:30</th><th>3:00</th><th>3:30</th><th>4:00</th></tr><tr>' )
-#    for median, mean, std_dev in weekday_stats:
-#        f.write( '<td>med=%1.1f &#956;=%1.1f &#963;=%1.1f&nbsp;</td> ' % ( median, mean, std_dev ) )
-#    f.write( '</tr>\n</table></div>\n<br />\n' )
 
     f.write( '<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n' )
     f.write( '<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % ( weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2] ) )
@@ -582,10 +601,10 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
                                     i['retweet_times'],
                                     i['retweets'],
                                     i['qualified'],
-                                    image_index % 2 and even_background or odd_background,
+                                    image_index % 2,
                                     tag_hit
                                   )
-#        if i['title'].startswith( 'Verizon To' ):
+#        if i['title'].startswith( 'Too ' ):
 #            print i['title'], i['qualified'], i['retweet_times']
 #            print chart_url
         image_url = Save_image( chart_url, os.path.join( cache_path, '%d_%d.png' % ( cur_time, image_index ) ) )
@@ -694,7 +713,7 @@ if __name__=='__main__':
                         r_time = item['retweet_times'][i]
                         if r_time - item['orig_posted'] < 5400:
                             if item['retweets'][i] >= threshold:
-                                item['qualified'] = i
+                                item['qualified'] = threshold
                             if r_time - item['orig_posted'] >= 3600:
                                 break
 
@@ -702,9 +721,9 @@ if __name__=='__main__':
             for item in items:
                 if item['qualified'] == -1 and len( item['retweet_times'] ) > 0:
                     if item['author'].lower() in authors_to_post:
-                        item['qualified'] = 0
+                        item['qualified'] = threshold
                     elif len( set([j.lower() for j in item['tags']]) & tags_to_post ) > 0:
-                        item['qualified'] = 0
+                        item['qualified'] = threshold
 
             #
             # Write out the updated yaml file.


...	...	@@ -39,6 +39,7 @@ import simplejson as json
39	39	import cookielib
40	40	import xml
41	41	import texttime
	42	+import operator
42	43	from datetime import timedelta
43	44
44	45	debug = True
...	...	@@ -91,9 +92,12 @@ threshold_color = "FF8C00"
91	92	tag_color = "F01000"
92	93
93	94	even_background = "F8F8F8"
94		-#even_background = "FFFFFF"
95	95	odd_background = "E8E8E8"
96	96
	97	+even_watermark = "E0E0FF"
	98	+odd_watermark = "D0D0F0"
	99	+
	100	+
97	101	def asciiize( s ):
98	102	try:
99	103	return s.encode( 'ascii' )
...	...	@@ -111,9 +115,9 @@ def sendEmail( subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@
111	115	( fromaddr, ", ".join( toaddrs ), subject, message ) )
112	116	smtp.quit()
113	117
114		-def index_id(a_list, elem):
	118	+def index_id( a_list, op, elem ):
115	119	try:
116		- return (index for index, item in enumerate( a_list ) if item == elem).next()
	120	+ return (index for index, item in enumerate( a_list ) if op( item, elem ) ).next()
117	121	except:
118	122	return -1
119	123
...	...	@@ -125,7 +129,8 @@ def index_id_simple(a_list, elem):
125	129	index += 1
126	130	return -1
127	131
128		-def make_chart_url( time_posted, comment_times, comment_values, retweet_times, retweet_values, met_threshold_pt, bg_color, tag_hit ):
	132	+def make_chart_url( time_posted, comment_times, comment_values, retweet_times,
	133	+ retweet_values, threshold_value, is_odd_row, tag_hit ):
129	134	# comment_times, comment_values = zip( *comments )
130	135	# retweet_times, retweet_values = zip( *retweets )
131	136
...	...	@@ -154,6 +159,22 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, r
154	159	min_retweet_value = min( retweet_values )
155	160	max_retweet_value = max( retweet_values )
156	161
	162	+ met_threshold_pt = -1
	163	+ if threshold_value != -1:
	164	+ met_threshold_pt = index_id( retweet_values, operator.ge, threshold_value )
	165	+ if met_threshold_pt == -1 or tag_hit:
	166	+ # This can happen if threshold_value was set to a number
	167	+ # because the author or a tag was matched, but the article
	168	+ # was unpopular. We choose to put a marker at point index 0.
	169	+ met_threshold_pt = 0
	170	+
	171	+ if is_odd_row != 0:
	172	+ bg_color = even_background
	173	+ watermark_color = even_watermark
	174	+ else:
	175	+ bg_color = odd_background
	176	+ watermark_color = odd_watermark
	177	+
157	178	if len( comment_values ) < 8 and len( comment_values ) > 1:
158	179	# max_comment_value *= 2
159	180	pass
...	...	@@ -176,6 +197,11 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, r
176	197	','.join( [ str( n ) for n in retweet_values ] ) )
177	198	# TODO: Consider watermark levels, like:
178	199	# chm=h,B0B0B0,1,0.3,1\|r,E0E0E0,0,0,0.5
	200	+ if max_retweet_value > 0:
	201	+ threshold_percent = max( 0, min( (float(threshold_value) / max_retweet_value) - 0.01, 1.0 ) )
	202	+ else:
	203	+ threshold_percent = 1.0
	204	+ chart_url += "&chm=r,%s,0,0,%1.3f" % ( watermark_color, threshold_percent )
179	205	if met_threshold_pt != -1:
180	206	if tag_hit:
181	207	dot_color = tag_color
...	...	@@ -183,7 +209,7 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, r
183	209	else:
184	210	dot_color = threshold_color
185	211	dot_shape = 'o'
186		- chart_url += "&chm=%s,%s,1,%d,10" % ( dot_shape, dot_color, met_threshold_pt )
	212	+ chart_url += "\|%s,%s,1,%d,10" % ( dot_shape, dot_color, met_threshold_pt )
187	213	chart_url += "&chxt=y,r&chxl=0:\|%d\|%d\|1:\|%d\|%d&chds=%d,%d,%d,%d,%d,%d,%d,%d" % \
188	214	( min_comment_value, max_comment_value, min_retweet_value, max_retweet_value,
189	215	0, max( 7, max_comment_time ),
...	...	@@ -550,17 +576,10 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
550	576	cache_path = os.path.join( localdir, 'cache' )
551	577
552	578	files_to_delete = glob.glob( cache_path + '*.png' )
553		-# shutil.rmtree( cache_path )
554		-# os.mkdir( cache_path )
555	579
556	580	f = file( new_index_fullpath, 'w' )
557	581	f.write( html_head % ( even_background, odd_background ) )
558		-# f.write( '<div align="center">\n<table cellpadding="4">' )
559	582
560		-# f.write( '<div align="center">\n<table class="legend">\n<tr><th>0:30</th><th>1:00</th><th>1:30</th><th>2:00</th><th>2:30</th><th>3:00</th><th>3:30</th><th>4:00</th></tr><tr>' )
561		-# for median, mean, std_dev in weekday_stats:
562		-# f.write( '<td>med=%1.1f μ=%1.1f σ=%1.1f </td> ' % ( median, mean, std_dev ) )
563		-# f.write( '</tr>\n</table></div>\n<br />\n' )
564	583
565	584	f.write( '<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n' )
566	585	f.write( '<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % ( weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2] ) )
...	...	@@ -582,10 +601,10 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ):
582	601	i['retweet_times'],
583	602	i['retweets'],
584	603	i['qualified'],
585		- image_index % 2 and even_background or odd_background,
	604	+ image_index % 2,
586	605	tag_hit
587	606	)
588		-# if i['title'].startswith( 'Verizon To' ):
	607	+# if i['title'].startswith( 'Too ' ):
589	608	# print i['title'], i['qualified'], i['retweet_times']
590	609	# print chart_url
591	610	image_url = Save_image( chart_url, os.path.join( cache_path, '%d_%d.png' % ( cur_time, image_index ) ) )
...	...	@@ -694,7 +713,7 @@ if __name__=='__main__':
694	713	r_time = item['retweet_times'][i]
695	714	if r_time - item['orig_posted'] < 5400:
696	715	if item['retweets'][i] >= threshold:
697		- item['qualified'] = i
	716	+ item['qualified'] = threshold
698	717	if r_time - item['orig_posted'] >= 3600:
699	718	break
700	719
...	...	@@ -702,9 +721,9 @@ if __name__=='__main__':
702	721	for item in items:
703	722	if item['qualified'] == -1 and len( item['retweet_times'] ) > 0:
704	723	if item['author'].lower() in authors_to_post:
705		- item['qualified'] = 0
	724	+ item['qualified'] = threshold
706	725	elif len( set([j.lower() for j in item['tags']]) & tags_to_post ) > 0:
707		- item['qualified'] = 0
	726	+ item['qualified'] = threshold
708	727
709	728	#
710	729	# Write out the updated yaml file.
711	730